This is an automated email from the ASF dual-hosted git repository. morrysnow pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new d2efc619b0 [Enchancement](statistics) Show histogram statistics, show specified column statistics (#18657) d2efc619b0 is described below commit d2efc619b0aa4c18c290044bc53a5ef83d37f6b1 Author: ElvinWei <zhengte....@outlook.com> AuthorDate: Fri Apr 14 22:36:40 2023 +0800 [Enchancement](statistics) Show histogram statistics, show specified column statistics (#18657) --- fe/fe-core/src/main/cup/sql_parser.cup | 9 ++- ...olumnStatsStmt.java => ShowColumnHistStmt.java} | 83 +++++++++++++--------- .../apache/doris/analysis/ShowColumnStatsStmt.java | 41 +++++++++-- .../java/org/apache/doris/qe/ShowExecutor.java | 30 +++++--- .../java/org/apache/doris/statistics/Bucket.java | 51 +++++-------- .../org/apache/doris/statistics/Histogram.java | 26 +++++-- .../apache/doris/statistics/HistogramBuilder.java | 2 +- .../doris/statistics/StatisticsRepository.java | 26 ++++++- .../org/apache/doris/statistics/HistogramTest.java | 28 ++++---- 9 files changed, 190 insertions(+), 106 deletions(-) diff --git a/fe/fe-core/src/main/cup/sql_parser.cup b/fe/fe-core/src/main/cup/sql_parser.cup index 9fdeaab511..a860b74d4f 100644 --- a/fe/fe-core/src/main/cup/sql_parser.cup +++ b/fe/fe-core/src/main/cup/sql_parser.cup @@ -3970,9 +3970,14 @@ show_param ::= RESULT = new ShowSyncJobStmt(dbName); :} /* show column stats */ - | KW_COLUMN KW_STATS table_name:tbl opt_partition_names:partitionNames + | KW_COLUMN KW_STATS table_name:tbl opt_col_list:cols opt_partition_names:partitionNames {: - RESULT = new ShowColumnStatsStmt(tbl, partitionNames); + RESULT = new ShowColumnStatsStmt(tbl, cols, partitionNames); + :} + /* show column histogram */ + | KW_COLUMN KW_HISTOGRAM table_name:tbl opt_col_list:cols + {: + RESULT = new ShowColumnHistStmt(tbl, cols); :} /* show table creation statement */ | KW_TABLE KW_CREATION opt_db:db opt_wild_where diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowColumnStatsStmt.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowColumnHistStmt.java similarity index 61% copy from fe/fe-core/src/main/java/org/apache/doris/analysis/ShowColumnStatsStmt.java copy to fe/fe-core/src/main/java/org/apache/doris/analysis/ShowColumnHistStmt.java index 557a5a8590..20b5dbbd0d 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowColumnStatsStmt.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowColumnHistStmt.java @@ -22,46 +22,49 @@ import org.apache.doris.catalog.DatabaseIf; import org.apache.doris.catalog.Env; import org.apache.doris.catalog.ScalarType; import org.apache.doris.catalog.TableIf; -import org.apache.doris.common.AnalysisException; +import org.apache.doris.catalog.Type; +import org.apache.doris.common.ErrorCode; import org.apache.doris.common.ErrorReport; import org.apache.doris.common.Pair; import org.apache.doris.common.UserException; import org.apache.doris.common.util.Util; import org.apache.doris.datasource.CatalogIf; +import org.apache.doris.mysql.privilege.PrivPredicate; +import org.apache.doris.qe.ConnectContext; import org.apache.doris.qe.ShowResultSet; import org.apache.doris.qe.ShowResultSetMetaData; -import org.apache.doris.statistics.ColumnStatistic; +import org.apache.doris.statistics.Histogram; import com.google.common.collect.ImmutableList; import com.google.common.collect.Lists; +import com.google.common.collect.Sets; import java.util.List; +import java.util.Objects; +import java.util.Optional; +import java.util.Set; +import java.util.stream.Collectors; -public class ShowColumnStatsStmt extends ShowStmt { +public class ShowColumnHistStmt extends ShowStmt { private static final ImmutableList<String> TITLE_NAMES = new ImmutableList.Builder<String>() .add("column_name") - .add("count") - .add("ndv") - .add("num_null") - .add("data_size") - .add("avg_size_byte") - .add("min") - .add("max") - .add("min_expr") - .add("max_expr") + .add("data_type") + .add("sample_rate") + .add("num_buckets") + .add("buckets") .build(); private final TableName tableName; - private final PartitionNames partitionNames; + private final List<String> columnNames; private TableIf table; - public ShowColumnStatsStmt(TableName tableName, PartitionNames partitionNames) { + public ShowColumnHistStmt(TableName tableName, List<String> columnNames) { this.tableName = tableName; - this.partitionNames = partitionNames; + this.columnNames = columnNames; } public TableName getTableName() { @@ -72,12 +75,7 @@ public class ShowColumnStatsStmt extends ShowStmt { public void analyze(Analyzer analyzer) throws UserException { super.analyze(analyzer); tableName.analyze(analyzer); - if (partitionNames != null) { - partitionNames.analyze(analyzer); - if (partitionNames.getPartitionNames().size() > 1) { - throw new AnalysisException("Only one partition name could be specified"); - } - } + // disallow external catalog Util.prohibitExternalCatalog(tableName.getCtl(), this.getClass().getSimpleName()); CatalogIf<DatabaseIf> catalog = Env.getCurrentEnv().getCatalogMgr().getCatalog(tableName.getCtl()); @@ -92,6 +90,23 @@ public class ShowColumnStatsStmt extends ShowStmt { if (table == null) { ErrorReport.reportAnalysisException("Table: {} not exists", tableName.getTbl()); } + + if (!Env.getCurrentEnv().getAccessManager() + .checkTblPriv(ConnectContext.get(), tableName.getDb(), tableName.getTbl(), PrivPredicate.SHOW)) { + ErrorReport.reportAnalysisException(ErrorCode.ERR_TABLEACCESS_DENIED_ERROR, "Permission denied", + ConnectContext.get().getQualifiedUser(), ConnectContext.get().getRemoteIP(), + tableName.getDb() + ": " + tableName.getTbl()); + } + + if (columnNames != null) { + Optional<Column> nullColumn = columnNames.stream() + .map(table::getColumn) + .filter(Objects::isNull) + .findFirst(); + if (nullColumn.isPresent()) { + ErrorReport.reportAnalysisException("Column: {} not exists", nullColumn.get()); + } + } } @Override @@ -108,29 +123,29 @@ public class ShowColumnStatsStmt extends ShowStmt { return table; } - public ShowResultSet constructResultSet(List<Pair<String, ColumnStatistic>> columnStatistics) { + public ShowResultSet constructResultSet(List<Pair<String, Histogram>> columnStatistics) { List<List<String>> result = Lists.newArrayList(); columnStatistics.forEach(p -> { - if (p.second == ColumnStatistic.UNKNOWN) { + if (p.second == null || p.second.dataType == Type.NULL) { return; } List<String> row = Lists.newArrayList(); row.add(p.first); - row.add(String.valueOf(p.second.count)); - row.add(String.valueOf(p.second.ndv)); - row.add(String.valueOf(p.second.numNulls)); - row.add(String.valueOf(p.second.dataSize)); - row.add(String.valueOf(p.second.avgSizeByte)); - row.add(String.valueOf(p.second.minValue)); - row.add(String.valueOf(p.second.maxValue)); - row.add(String.valueOf(p.second.minExpr == null ? "N/A" : p.second.minExpr.toSql())); - row.add(String.valueOf(p.second.maxExpr == null ? "N/A" : p.second.maxExpr.toSql())); + row.add(String.valueOf(p.second.dataType)); + row.add(String.valueOf(p.second.sampleRate)); + row.add(String.valueOf(p.second.numBuckets)); + row.add(Histogram.getBucketsJson(p.second.buckets).toString()); result.add(row); }); + return new ShowResultSet(getMetaData(), result); } - public PartitionNames getPartitionNames() { - return partitionNames; + public Set<String> getColumnNames() { + if (columnNames != null) { + return Sets.newHashSet(columnNames); + } + return table.getColumns().stream() + .map(Column::getName).collect(Collectors.toSet()); } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowColumnStatsStmt.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowColumnStatsStmt.java index 557a5a8590..fd67316df4 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowColumnStatsStmt.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowColumnStatsStmt.java @@ -23,19 +23,27 @@ import org.apache.doris.catalog.Env; import org.apache.doris.catalog.ScalarType; import org.apache.doris.catalog.TableIf; import org.apache.doris.common.AnalysisException; +import org.apache.doris.common.ErrorCode; import org.apache.doris.common.ErrorReport; import org.apache.doris.common.Pair; import org.apache.doris.common.UserException; import org.apache.doris.common.util.Util; import org.apache.doris.datasource.CatalogIf; +import org.apache.doris.mysql.privilege.PrivPredicate; +import org.apache.doris.qe.ConnectContext; import org.apache.doris.qe.ShowResultSet; import org.apache.doris.qe.ShowResultSetMetaData; import org.apache.doris.statistics.ColumnStatistic; import com.google.common.collect.ImmutableList; import com.google.common.collect.Lists; +import com.google.common.collect.Sets; import java.util.List; +import java.util.Objects; +import java.util.Optional; +import java.util.Set; +import java.util.stream.Collectors; public class ShowColumnStatsStmt extends ShowStmt { @@ -49,18 +57,18 @@ public class ShowColumnStatsStmt extends ShowStmt { .add("avg_size_byte") .add("min") .add("max") - .add("min_expr") - .add("max_expr") .build(); private final TableName tableName; + private final List<String> columnNames; private final PartitionNames partitionNames; private TableIf table; - public ShowColumnStatsStmt(TableName tableName, PartitionNames partitionNames) { + public ShowColumnStatsStmt(TableName tableName, List<String> columnNames, PartitionNames partitionNames) { this.tableName = tableName; + this.columnNames = columnNames; this.partitionNames = partitionNames; } @@ -92,6 +100,23 @@ public class ShowColumnStatsStmt extends ShowStmt { if (table == null) { ErrorReport.reportAnalysisException("Table: {} not exists", tableName.getTbl()); } + + if (!Env.getCurrentEnv().getAccessManager() + .checkTblPriv(ConnectContext.get(), tableName.getDb(), tableName.getTbl(), PrivPredicate.SHOW)) { + ErrorReport.reportAnalysisException(ErrorCode.ERR_TABLEACCESS_DENIED_ERROR, "Permission denied", + ConnectContext.get().getQualifiedUser(), ConnectContext.get().getRemoteIP(), + tableName.getDb() + ": " + tableName.getTbl()); + } + + if (columnNames != null) { + Optional<Column> nullColumn = columnNames.stream() + .map(table::getColumn) + .filter(Objects::isNull) + .findFirst(); + if (nullColumn.isPresent()) { + ErrorReport.reportAnalysisException("Column: {} not exists", nullColumn.get()); + } + } } @Override @@ -121,8 +146,6 @@ public class ShowColumnStatsStmt extends ShowStmt { row.add(String.valueOf(p.second.numNulls)); row.add(String.valueOf(p.second.dataSize)); row.add(String.valueOf(p.second.avgSizeByte)); - row.add(String.valueOf(p.second.minValue)); - row.add(String.valueOf(p.second.maxValue)); row.add(String.valueOf(p.second.minExpr == null ? "N/A" : p.second.minExpr.toSql())); row.add(String.valueOf(p.second.maxExpr == null ? "N/A" : p.second.maxExpr.toSql())); result.add(row); @@ -133,4 +156,12 @@ public class ShowColumnStatsStmt extends ShowStmt { public PartitionNames getPartitionNames() { return partitionNames; } + + public Set<String> getColumnNames() { + if (columnNames != null) { + return Sets.newHashSet(columnNames); + } + return table.getColumns().stream() + .map(Column::getName).collect(Collectors.toSet()); + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java b/fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java index 0c240e525f..4c9757616e 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java @@ -36,6 +36,7 @@ import org.apache.doris.analysis.ShowCatalogRecycleBinStmt; import org.apache.doris.analysis.ShowCatalogStmt; import org.apache.doris.analysis.ShowClusterStmt; import org.apache.doris.analysis.ShowCollationStmt; +import org.apache.doris.analysis.ShowColumnHistStmt; import org.apache.doris.analysis.ShowColumnStatsStmt; import org.apache.doris.analysis.ShowColumnStmt; import org.apache.doris.analysis.ShowCreateCatalogStmt; @@ -185,6 +186,7 @@ import org.apache.doris.mtmv.metadata.MTMVJob; import org.apache.doris.mtmv.metadata.MTMVTask; import org.apache.doris.mysql.privilege.PrivPredicate; import org.apache.doris.statistics.ColumnStatistic; +import org.apache.doris.statistics.Histogram; import org.apache.doris.statistics.StatisticsRepository; import org.apache.doris.system.Backend; import org.apache.doris.system.Diagnoser; @@ -378,6 +380,8 @@ public class ShowExecutor { handleShowSqlBlockRule(); } else if (stmt instanceof ShowColumnStatsStmt) { handleShowColumnStats(); + } else if (stmt instanceof ShowColumnHistStmt) { + handleShowColumnHist(); } else if (stmt instanceof ShowTableCreationStmt) { handleShowTableCreation(); } else if (stmt instanceof ShowLastInsertStmt) { @@ -2301,20 +2305,15 @@ public class ShowExecutor { ShowColumnStatsStmt showColumnStatsStmt = (ShowColumnStatsStmt) stmt; TableName tableName = showColumnStatsStmt.getTableName(); TableIf tableIf = showColumnStatsStmt.getTable(); - if (!Env.getCurrentEnv().getAccessManager() - .checkTblPriv(ConnectContext.get(), tableName.getDb(), tableName.getTbl(), PrivPredicate.SHOW)) { - ErrorReport.reportAnalysisException(ErrorCode.ERR_TABLEACCESS_DENIED_ERROR, "Permission denied", - ConnectContext.get().getQualifiedUser(), ConnectContext.get().getRemoteIP(), - tableName.getDb() + ": " + tableName.getTbl()); - } List<Pair<String, ColumnStatistic>> columnStatistics = new ArrayList<>(); + Set<String> columnNames = showColumnStatsStmt.getColumnNames(); PartitionNames partitionNames = showColumnStatsStmt.getPartitionNames(); - for (Column column : tableIf.getColumns()) { - String colName = column.getName(); + + for (String colName : columnNames) { if (partitionNames == null) { ColumnStatistic columnStatistic = StatisticsRepository.queryColumnStatisticsByName(tableIf.getId(), colName); - columnStatistics.add(Pair.of(column.getName(), columnStatistic)); + columnStatistics.add(Pair.of(colName, columnStatistic)); } else { columnStatistics.addAll(StatisticsRepository.queryColumnStatisticsByPartitions(tableName, colName, showColumnStatsStmt.getPartitionNames().getPartitionNames()) @@ -2326,6 +2325,19 @@ public class ShowExecutor { resultSet = showColumnStatsStmt.constructResultSet(columnStatistics); } + public void handleShowColumnHist() { + ShowColumnHistStmt showColumnHistStmt = (ShowColumnHistStmt) stmt; + TableIf tableIf = showColumnHistStmt.getTable(); + Set<String> columnNames = showColumnHistStmt.getColumnNames(); + + List<Pair<String, Histogram>> columnStatistics = columnNames.stream() + .map(colName -> Pair.of(colName, + StatisticsRepository.queryColumnHistogramByName(tableIf.getId(), colName))) + .collect(Collectors.toList()); + + resultSet = showColumnHistStmt.constructResultSet(columnStatistics); + } + public void handleShowSqlBlockRule() throws AnalysisException { ShowSqlBlockRuleStmt showStmt = (ShowSqlBlockRuleStmt) stmt; List<List<String>> rows = Lists.newArrayList(); diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/Bucket.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/Bucket.java index 8a63b4b31d..3137ffbde2 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/Bucket.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/Bucket.java @@ -17,6 +17,7 @@ package org.apache.doris.statistics; +import org.apache.doris.analysis.LiteralExpr; import org.apache.doris.catalog.Type; import org.apache.doris.common.AnalysisException; import org.apache.doris.statistics.util.StatisticsUtil; @@ -31,6 +32,10 @@ public class Bucket { public double preSum; public double ndv; + // For display only. + public LiteralExpr lowerExpr; + public LiteralExpr upperExpr; + public Bucket() { } @@ -42,44 +47,15 @@ public class Bucket { this.ndv = ndv; } - public double getLower() { - return lower; - } - - public void setLower(double lower) { + public Bucket(double lower, double upper, double count, double preSum, double ndv, + LiteralExpr lowerExpr, LiteralExpr upperExpr) { this.lower = lower; - } - - public double getUpper() { - return upper; - } - - public void setUpper(double upper) { this.upper = upper; - } - - public double getCount() { - return count; - } - - public void setCount(int count) { this.count = count; - } - - public double getPreSum() { - return preSum; - } - - public void setPreSum(int preSum) { this.preSum = preSum; - } - - public double getNdv() { - return ndv; - } - - public void setNdv(int ndv) { this.ndv = ndv; + this.lowerExpr = lowerExpr; + this.upperExpr = upperExpr; } public static Bucket deserializeFromJson(Type datatype, String json) throws AnalysisException { @@ -90,6 +66,11 @@ public class Bucket { bucket.count = bucketJson.get("count").getAsInt(); bucket.preSum = bucketJson.get("pre_sum").getAsInt(); bucket.ndv = bucketJson.get("ndv").getAsInt(); + + // LowerExpr and upperExpr for display only. + bucket.lowerExpr = StatisticsUtil.readableValue(datatype, bucketJson.get("lower").getAsString()); + bucket.upperExpr = StatisticsUtil.readableValue(datatype, bucketJson.get("upper").getAsString()); + return bucket; } @@ -99,8 +80,8 @@ public class Bucket { } JsonObject bucketJson = new JsonObject(); - bucketJson.addProperty("upper", bucket.upper); - bucketJson.addProperty("lower", bucket.lower); + bucketJson.addProperty("lower_expr", bucket.lowerExpr.getStringValue()); + bucketJson.addProperty("upper_expr", bucket.upperExpr.getStringValue()); bucketJson.addProperty("count", bucket.count); bucketJson.addProperty("pre_sum", bucket.preSum); bucketJson.addProperty("ndv", bucket.ndv); diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/Histogram.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/Histogram.java index d9324975d4..0592a241e1 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/Histogram.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/Histogram.java @@ -33,6 +33,7 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.apache.parquet.Strings; +import java.util.Collections; import java.util.List; public class Histogram { @@ -44,13 +45,18 @@ public class Histogram { public final List<Bucket> buckets; - public Histogram(Type dataType, double sampleRate, List<Bucket> buckets) { + public final int numBuckets; + + public Histogram(Type dataType, double sampleRate, int numBuckets, List<Bucket> buckets) { this.dataType = dataType; this.sampleRate = sampleRate; + this.numBuckets = numBuckets; this.buckets = buckets; } - + public static Histogram UNKNOWN = new HistogramBuilder().setDataType(Type.NULL) + .setSampleRate(0).setNumBuckets(0).setBuckets(Collections.emptyList()) + .build(); // TODO: use thrift public static Histogram fromResultRow(ResultRow resultRow) { @@ -151,18 +157,26 @@ public class Histogram { histogramJson.addProperty("sample_rate", histogram.sampleRate); histogramJson.addProperty("num_buckets", histogram.buckets.size()); - JsonArray bucketsJsonArray = new JsonArray(); - histogram.buckets.stream().map(Bucket::serializeToJsonObj).forEach(bucketsJsonArray::add); - histogramJson.add("buckets", bucketsJsonArray); + JsonArray bucketsJson = getBucketsJson(histogram.buckets); + histogramJson.add("buckets", bucketsJson); return histogramJson.toString(); } + public static JsonArray getBucketsJson(List<Bucket> buckets) { + if (buckets == null) { + return null; + } + JsonArray bucketsJsonArray = new JsonArray(); + buckets.stream().map(Bucket::serializeToJsonObj).forEach(bucketsJsonArray::add); + return bucketsJsonArray; + } + public double size() { if (CollectionUtils.isEmpty(buckets)) { return 0; } Bucket lastBucket = buckets.get(buckets.size() - 1); - return lastBucket.getPreSum() + lastBucket.getCount(); + return lastBucket.preSum + lastBucket.count; } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/HistogramBuilder.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/HistogramBuilder.java index 41ce66b94d..3ffc79bfb1 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/HistogramBuilder.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/HistogramBuilder.java @@ -64,6 +64,6 @@ public class HistogramBuilder { } public Histogram build() { - return new Histogram(dataType, sampleRate, buckets); + return new Histogram(dataType, sampleRate, numBuckets, buckets); } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java index 865dd3ca2c..4ce673a063 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java @@ -56,6 +56,9 @@ public class StatisticsRepository { private static final String FULL_QUALIFIED_COLUMN_STATISTICS_NAME = FULL_QUALIFIED_DB_NAME + "." + "`" + StatisticConstants.STATISTIC_TBL_NAME + "`"; + private static final String FULL_QUALIFIED_COLUMN_HISTOGRAM_NAME = FULL_QUALIFIED_DB_NAME + "." + + "`" + StatisticConstants.HISTOGRAM_TBL_NAME + "`"; + private static final String FULL_QUALIFIED_ANALYSIS_JOB_TABLE_NAME = FULL_QUALIFIED_DB_NAME + "." + "`" + StatisticConstants.ANALYSIS_JOB_TABLE + "`"; @@ -67,6 +70,10 @@ public class StatisticsRepository { + FULL_QUALIFIED_COLUMN_STATISTICS_NAME + " WHERE `id` IN (${idList})"; + private static final String FETCH_COLUMN_HISTOGRAM_TEMPLATE = "SELECT * FROM " + + FULL_QUALIFIED_COLUMN_HISTOGRAM_NAME + + " WHERE `id` = '${id}'"; + private static final String PERSIST_ANALYSIS_TASK_SQL_TEMPLATE = "INSERT INTO " + FULL_QUALIFIED_ANALYSIS_JOB_TABLE_NAME + " VALUES(${jobId}, ${taskId}, '${catalogName}', '${dbName}'," + "'${tblName}','${colName}', '${indexId}','${jobType}', '${analysisType}', " @@ -121,10 +128,19 @@ public class StatisticsRepository { } public static ResultRow queryColumnStatisticById(long tblId, String colName) { + return queryColumnStatisticById(tblId, colName, false); + } + + public static ResultRow queryColumnHistogramById(long tblId, String colName) { + return queryColumnStatisticById(tblId, colName, true); + } + + private static ResultRow queryColumnStatisticById(long tblId, String colName, boolean isHistogram) { Map<String, String> map = new HashMap<>(); String id = constructId(tblId, -1, colName); map.put("id", id); - List<ResultRow> rows = StatisticsUtil.executeQuery(FETCH_COLUMN_STATISTIC_TEMPLATE, map); + List<ResultRow> rows = isHistogram ? StatisticsUtil.executeQuery(FETCH_COLUMN_HISTOGRAM_TEMPLATE, map) : + StatisticsUtil.executeQuery(FETCH_COLUMN_STATISTIC_TEMPLATE, map); int size = rows.size(); if (size > 1) { throw new IllegalStateException(String.format("id: %s should be unique, but return more than one row", id)); @@ -143,6 +159,14 @@ public class StatisticsRepository { return rows == null ? Collections.emptyList() : rows; } + public static Histogram queryColumnHistogramByName(long tableId, String colName) { + ResultRow resultRow = queryColumnHistogramById(tableId, colName); + if (resultRow == null) { + return Histogram.UNKNOWN; + } + return Histogram.fromResultRow(resultRow); + } + private static String constructId(Object... params) { StringJoiner stringJoiner = new StringJoiner("-"); for (Object param : params) { diff --git a/fe/fe-core/src/test/java/org/apache/doris/statistics/HistogramTest.java b/fe/fe-core/src/test/java/org/apache/doris/statistics/HistogramTest.java index 515e3c0d3f..b5ca8d8095 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/statistics/HistogramTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/statistics/HistogramTest.java @@ -21,11 +21,11 @@ import org.apache.doris.analysis.LiteralExpr; import org.apache.doris.catalog.PrimitiveType; import org.apache.doris.catalog.Type; import org.apache.doris.common.AnalysisException; +import org.apache.doris.statistics.util.StatisticsUtil; import com.google.gson.JsonArray; import com.google.gson.JsonObject; import com.google.gson.JsonParser; -import org.apache.commons.math3.util.Precision; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; @@ -69,18 +69,17 @@ class HistogramTest { List<Bucket> buckets = histogramUnderTest.buckets; Assertions.assertEquals(5, buckets.size()); - double expectedLower = LiteralExpr.create("2022-09-21 17:30:29", - Objects.requireNonNull(Type.fromPrimitiveType(PrimitiveType.DATETIME))).getDoubleValue(); - double expectedUpper = LiteralExpr.create("2022-09-21 22:30:29", - Objects.requireNonNull(Type.fromPrimitiveType(PrimitiveType.DATETIME))).getDoubleValue(); + LiteralExpr expectedLower = LiteralExpr.create("2022-09-21 17:30:29", + Objects.requireNonNull(Type.fromPrimitiveType(PrimitiveType.DATETIME))); + LiteralExpr expectedUpper = LiteralExpr.create("2022-09-21 22:30:29", + Objects.requireNonNull(Type.fromPrimitiveType(PrimitiveType.DATETIME))); boolean flag = false; for (Bucket bucket : buckets) { - double lower = bucket.getLower(); - double upper = bucket.getUpper(); - if (Precision.equals(expectedLower, lower, 0.01) - && Precision.equals(expectedUpper, upper, 0.01)) { + LiteralExpr lower = bucket.lowerExpr; + LiteralExpr upper = bucket.upperExpr; + if (expectedLower.equals(lower) && expectedUpper.equals(upper)) { flag = true; break; } @@ -96,6 +95,8 @@ class HistogramTest { String typeStr = histogramJson.get("data_type").getAsString(); Assertions.assertEquals("DATETIME", typeStr); + Type datatype = Type.fromPrimitiveType(PrimitiveType.valueOf(typeStr)); + Assertions.assertNotNull(datatype); int numBuckets = histogramJson.get("num_buckets").getAsInt(); Assertions.assertEquals(5, numBuckets); @@ -116,13 +117,14 @@ class HistogramTest { for (int i = 0; i < jsonArray.size(); i++) { JsonObject bucketJson = jsonArray.get(i).getAsJsonObject(); - double lower = bucketJson.get("lower").getAsDouble(); - double upper = bucketJson.get("upper").getAsDouble(); + LiteralExpr lower = StatisticsUtil.readableValue(datatype, + bucketJson.get("lower_expr").getAsString()); + LiteralExpr upper = StatisticsUtil.readableValue(datatype, + bucketJson.get("upper_expr").getAsString()); int count = bucketJson.get("count").getAsInt(); int preSum = bucketJson.get("pre_sum").getAsInt(); int ndv = bucketJson.get("ndv").getAsInt(); - if (Precision.equals(expectedLower.getDoubleValue(), lower, 0.01) - && Precision.equals(expectedUpper.getDoubleValue(), upper, 0.01) + if (expectedLower.equals(lower) && expectedUpper.equals(upper) && count == 9 && preSum == 0 && ndv == 1) { flag = true; break; --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org