This is an automated email from the ASF dual-hosted git repository. yiguolei pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new b3a54b4af4c Improve show column stats performance. (#31298) b3a54b4af4c is described below commit b3a54b4af4c355a0b15c7f2a419a65cb60fc7324 Author: Jibing-Li <64681310+jibing...@users.noreply.github.com> AuthorDate: Fri Feb 23 10:44:13 2024 +0800 Improve show column stats performance. (#31298) --- .../apache/doris/analysis/ShowColumnStatsStmt.java | 4 +++ .../java/org/apache/doris/qe/ShowExecutor.java | 32 +++++++++++++++-- .../doris/statistics/StatisticsRepository.java | 13 +++++++ .../suites/statistics/test_analyze_mv.groovy | 42 ++++++++++++++++++++-- 4 files changed, 86 insertions(+), 5 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowColumnStatsStmt.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowColumnStatsStmt.java index f858263b9df..37be76b20df 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowColumnStatsStmt.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowColumnStatsStmt.java @@ -180,4 +180,8 @@ public class ShowColumnStatsStmt extends ShowStmt { public boolean isCached() { return cached; } + + public boolean isAllColumns() { + return columnNames == null; + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java b/fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java index c39288e3919..62e9f4ac5a5 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java @@ -197,6 +197,7 @@ import org.apache.doris.mysql.privilege.PrivPredicate; import org.apache.doris.statistics.AnalysisInfo; import org.apache.doris.statistics.ColumnStatistic; import org.apache.doris.statistics.Histogram; +import org.apache.doris.statistics.ResultRow; import org.apache.doris.statistics.StatisticsRepository; import org.apache.doris.statistics.TableStatsMeta; import org.apache.doris.statistics.query.QueryStatsUtil; @@ -2553,7 +2554,35 @@ public class ShowExecutor { Set<String> columnNames = showColumnStatsStmt.getColumnNames(); PartitionNames partitionNames = showColumnStatsStmt.getPartitionNames(); boolean showCache = showColumnStatsStmt.isCached(); + boolean isAllColumns = showColumnStatsStmt.isAllColumns(); + if (isAllColumns && !showCache && partitionNames == null) { + getStatsForAllColumns(columnStatistics, tableIf); + } else { + getStatsForSpecifiedColumns(columnStatistics, columnNames, tableIf, showCache, tableName, partitionNames); + } + resultSet = showColumnStatsStmt.constructResultSet(columnStatistics); + } + + private void getStatsForAllColumns(List<Pair<Pair<String, String>, ColumnStatistic>> columnStatistics, + TableIf tableIf) throws AnalysisException { + List<ResultRow> resultRows = StatisticsRepository.queryColumnStatisticsForTable(tableIf.getId()); + for (ResultRow row : resultRows) { + String indexName = "N/A"; + long indexId = Long.parseLong(row.get(4)); + if (indexId != -1) { + indexName = ((OlapTable) tableIf).getIndexNameById(indexId); + if (indexName == null) { + continue; + } + } + columnStatistics.add(Pair.of(Pair.of(row.get(5), indexName), ColumnStatistic.fromResultRow(row))); + } + } + private void getStatsForSpecifiedColumns(List<Pair<Pair<String, String>, ColumnStatistic>> columnStatistics, + Set<String> columnNames, TableIf tableIf, boolean showCache, + TableName tableName, PartitionNames partitionNames) + throws AnalysisException { for (String colName : columnNames) { // Olap base index use -1 as index id. List<Long> indexIds = Lists.newArrayList(); @@ -2584,13 +2613,12 @@ public class ShowExecutor { } else { String finalIndexName = indexName; columnStatistics.addAll(StatisticsRepository.queryColumnStatisticsByPartitions(tableName, - colName, showColumnStatsStmt.getPartitionNames().getPartitionNames()) + colName, partitionNames.getPartitionNames()) .stream().map(s -> Pair.of(Pair.of(colName, finalIndexName), s)) .collect(Collectors.toList())); } } } - resultSet = showColumnStatsStmt.constructResultSet(columnStatistics); } public void handleShowColumnHist() { diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java index c8db624eeb3..5ac9b7305c7 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java @@ -102,6 +102,11 @@ public class StatisticsRepository { + " ${inPredicate}" + " AND part_id IS NOT NULL"; + private static final String FETCH_TABLE_STATISTICS = "SELECT * FROM " + + FeConstants.INTERNAL_DB_NAME + "." + StatisticConstants.STATISTIC_TBL_NAME + + " WHERE tbl_id = ${tblId}" + + " AND part_id IS NULL"; + public static ColumnStatistic queryColumnStatisticsByName(long tableId, long indexId, String colName) { ResultRow resultRow = queryColumnStatisticById(tableId, indexId, colName); if (resultRow == null) { @@ -126,6 +131,14 @@ public class StatisticsRepository { Collectors.toList()); } + public static List<ResultRow> queryColumnStatisticsForTable(long tableId) + throws AnalysisException { + Map<String, String> params = new HashMap<>(); + params.put("tblId", String.valueOf(tableId)); + List<ResultRow> rows = StatisticsUtil.executeQuery(FETCH_TABLE_STATISTICS, params); + return rows == null ? Collections.emptyList() : rows; + } + public static ResultRow queryColumnStatisticById(long tblId, long indexId, String colName) { return queryColumnStatisticById(tblId, indexId, colName, false); } diff --git a/regression-test/suites/statistics/test_analyze_mv.groovy b/regression-test/suites/statistics/test_analyze_mv.groovy index 34c5736035e..da8df344a68 100644 --- a/regression-test/suites/statistics/test_analyze_mv.groovy +++ b/regression-test/suites/statistics/test_analyze_mv.groovy @@ -68,6 +68,25 @@ suite("test_analyze_mv") { } } + def verify_column_stats = { all_column_result, one_column_result -> + logger.info("all column result: " + all_column_result) + logger.info("one column result: " + one_column_result) + boolean found = false; + for (int i = 0; i < all_column_result.size(); i++) { + if (all_column_result[i][0] == one_column_result[0] && all_column_result[i][1] == one_column_result[1]) { + assertEquals(all_column_result[i][2], one_column_result[2]) + assertEquals(all_column_result[i][3], one_column_result[3]) + assertEquals(all_column_result[i][4], one_column_result[4]) + assertEquals(all_column_result[i][5], one_column_result[5]) + assertEquals(all_column_result[i][6], one_column_result[6]) + assertEquals(all_column_result[i][7], one_column_result[7]) + assertEquals(all_column_result[i][8], one_column_result[8]) + found = true; + } + } + assertTrue(found) + } + sql """drop database if exists test_analyze_mv""" sql """create database test_analyze_mv""" sql """use test_analyze_mv""" @@ -96,10 +115,13 @@ suite("test_analyze_mv") { sql """analyze table mvTestDup with sync;""" - def result_sample = sql """show column stats mvTestDup""" - assertEquals(12, result_sample.size()) + // Compare show whole table column stats result with show single column. + def result_all = sql """show column stats mvTestDup""" + assertEquals(12, result_all.size()) + def result_all_cached = sql """show column cached stats mvTestDup""" + assertEquals(12, result_all_cached.size()) - result_sample = sql """show column stats mvTestDup(key1)""" + def result_sample = sql """show column stats mvTestDup(key1)""" assertEquals(1, result_sample.size()) assertEquals("key1", result_sample[0][0]) assertEquals("N/A", result_sample[0][1]) @@ -108,6 +130,8 @@ suite("test_analyze_mv") { assertEquals("1", result_sample[0][7]) assertEquals("1001", result_sample[0][8]) assertEquals("FULL", result_sample[0][9]) + verify_column_stats(result_all, result_sample[0]) + verify_column_stats(result_all_cached, result_sample[0]) result_sample = sql """show column stats mvTestDup(value1)""" assertEquals(1, result_sample.size()) @@ -118,6 +142,8 @@ suite("test_analyze_mv") { assertEquals("3", result_sample[0][7]) assertEquals("3001", result_sample[0][8]) assertEquals("FULL", result_sample[0][9]) + verify_column_stats(result_all, result_sample[0]) + verify_column_stats(result_all_cached, result_sample[0]) result_sample = sql """show column stats mvTestDup(mv_key1)""" assertEquals(2, result_sample.size()) @@ -132,6 +158,10 @@ suite("test_analyze_mv") { assertEquals("1", result_sample[0][7]) assertEquals("1001", result_sample[0][8]) assertEquals("FULL", result_sample[0][9]) + verify_column_stats(result_all, result_sample[0]) + verify_column_stats(result_all_cached, result_sample[0]) + verify_column_stats(result_all, result_sample[1]) + verify_column_stats(result_all_cached, result_sample[1]) result_sample = sql """show column stats mvTestDup(`mva_SUM__CAST(``value1`` AS BIGINT)`)""" assertEquals(1, result_sample.size()) @@ -142,6 +172,8 @@ suite("test_analyze_mv") { assertEquals("6", result_sample[0][7]) assertEquals("3001", result_sample[0][8]) assertEquals("FULL", result_sample[0][9]) + verify_column_stats(result_all, result_sample[0]) + verify_column_stats(result_all_cached, result_sample[0]) result_sample = sql """show column stats mvTestDup(`mva_MAX__``value2```)""" assertEquals(1, result_sample.size()) @@ -152,6 +184,8 @@ suite("test_analyze_mv") { assertEquals("4", result_sample[0][7]) assertEquals("4001", result_sample[0][8]) assertEquals("FULL", result_sample[0][9]) + verify_column_stats(result_all, result_sample[0]) + verify_column_stats(result_all_cached, result_sample[0]) result_sample = sql """show column stats mvTestDup(`mva_MIN__``value3```)""" assertEquals(1, result_sample.size()) @@ -162,6 +196,8 @@ suite("test_analyze_mv") { assertEquals("5", result_sample[0][7]) assertEquals("5001", result_sample[0][8]) assertEquals("FULL", result_sample[0][9]) + verify_column_stats(result_all, result_sample[0]) + verify_column_stats(result_all_cached, result_sample[0]) sql """CREATE TABLE mvTestAgg ( --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org