This is an automated email from the ASF dual-hosted git repository. englefly pushed a commit to branch branch-2.1 in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.1 by this push: new 1245df670fb [feat](nereids) adjust stats derive by delta row #39222 (2.1) (#42025) 1245df670fb is described below commit 1245df670fbe717958037dd5b40138b6d131c62a Author: minghong <engle...@gmail.com> AuthorDate: Thu Oct 17 19:54:52 2024 +0800 [feat](nereids) adjust stats derive by delta row #39222 (2.1) (#42025) ## Proposed changes pick #39222 wait JiBin merge updateRows Issue Number: close #xxx <!--Describe your changes.--> --- .../doris/nereids/stats/FilterEstimation.java | 19 +++++--- .../doris/nereids/stats/StatsCalculator.java | 15 ++---- .../org/apache/doris/statistics/Statistics.java | 37 ++++++++++++--- .../apache/doris/statistics/StatisticsBuilder.java | 10 +++- .../suites/nereids_p0/delta_row/delta_row.groovy | 55 ++++++++++++++++++++++ 5 files changed, 112 insertions(+), 24 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java index 65db271e394..df0edf8b159 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java @@ -99,12 +99,19 @@ public class FilterEstimation extends ExpressionVisitor<Statistics, EstimationCo /** * This method will update the stats according to the selectivity. */ - public Statistics estimate(Expression expression, Statistics statistics) { - // For a comparison predicate, only when it's left side is a slot and right side is a literal, we would - // consider is a valid predicate. - Statistics stats = expression.accept(this, new EstimationContext(statistics)); - stats.enforceValid(); - return stats; + public Statistics estimate(Expression expression, Statistics inputStats) { + Statistics outputStats = expression.accept(this, new EstimationContext(inputStats)); + if (outputStats.getRowCount() == 0 && inputStats.getDeltaRowCount() > 0) { + StatisticsBuilder deltaStats = new StatisticsBuilder(); + deltaStats.setDeltaRowCount(0); + deltaStats.setRowCount(inputStats.getDeltaRowCount()); + for (Expression expr : inputStats.columnStatistics().keySet()) { + deltaStats.putColumnStatistics(expr, ColumnStatistic.UNKNOWN); + } + outputStats = expression.accept(this, new EstimationContext(deltaStats.build())); + } + outputStats.enforceValid(); + return outputStats; } @Override diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java index cb7177569e6..1a983532a94 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java @@ -834,14 +834,6 @@ public class StatsCalculator extends DefaultPlanVisitor<Statistics, Void> { hasUnknownCol = true; } if (ConnectContext.get() != null && ConnectContext.get().getSessionVariable().enableStats) { - if (deltaRowCount > 0) { - // clear min-max to avoid error estimation - // for example, after yesterday data loaded, user send query about yesterday immediately. - // since yesterday data are not analyzed, the max date is before yesterday, and hence optimizer - // estimates the filter result is zero - colStatsBuilder.setMinExpr(null).setMinValue(Double.NEGATIVE_INFINITY) - .setMaxExpr(null).setMaxValue(Double.POSITIVE_INFINITY); - } columnStatisticBuilderMap.put(slotReference, colStatsBuilder); } else { columnStatisticBuilderMap.put(slotReference, new ColumnStatisticBuilder(ColumnStatistic.UNKNOWN)); @@ -851,17 +843,18 @@ public class StatsCalculator extends DefaultPlanVisitor<Statistics, Void> { if (hasUnknownCol && ConnectContext.get() != null && ConnectContext.get().getStatementContext() != null) { ConnectContext.get().getStatementContext().setHasUnknownColStats(true); } - return normalizeCatalogRelationColumnStatsRowCount(rowCount, columnStatisticBuilderMap); + return normalizeCatalogRelationColumnStatsRowCount(rowCount, columnStatisticBuilderMap, deltaRowCount); } private Statistics normalizeCatalogRelationColumnStatsRowCount(double rowCount, - Map<Expression, ColumnStatisticBuilder> columnStatisticBuilderMap) { + Map<Expression, ColumnStatisticBuilder> columnStatisticBuilderMap, + long deltaRowCount) { Map<Expression, ColumnStatistic> columnStatisticMap = new HashMap<>(); for (Expression slot : columnStatisticBuilderMap.keySet()) { columnStatisticMap.put(slot, columnStatisticBuilderMap.get(slot).setCount(rowCount).build()); } - return new Statistics(rowCount, columnStatisticMap); + return new Statistics(rowCount, 1, columnStatisticMap, deltaRowCount); } private Statistics computeTopN(TopN topN) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/Statistics.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/Statistics.java index b6bbebdd371..a907f6412f1 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/Statistics.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/Statistics.java @@ -38,15 +38,32 @@ public class Statistics { // the byte size of one tuple private double tupleSize; + private double deltaRowCount = 0.0; + + public Statistics(Statistics another) { + this.rowCount = another.rowCount; + this.widthInJoinCluster = another.widthInJoinCluster; + this.expressionToColumnStats = new HashMap<>(another.expressionToColumnStats); + this.tupleSize = another.tupleSize; + this.deltaRowCount = another.getDeltaRowCount(); + } + public Statistics(double rowCount, Map<Expression, ColumnStatistic> expressionToColumnStats) { - this(rowCount, 1, expressionToColumnStats); + this(rowCount, 1, expressionToColumnStats, 0); } public Statistics(double rowCount, int widthInJoinCluster, - Map<Expression, ColumnStatistic> expressionToColumnStats) { + Map<Expression, ColumnStatistic> expressionToColumnStats) { + this(rowCount, widthInJoinCluster, expressionToColumnStats, 0); + } + + public Statistics(double rowCount, int widthInJoinCluster, + Map<Expression, ColumnStatistic> expressionToColumnStats, + double deltaRowCount) { this.rowCount = rowCount; this.widthInJoinCluster = widthInJoinCluster; this.expressionToColumnStats = expressionToColumnStats; + this.deltaRowCount = deltaRowCount; } public ColumnStatistic findColumnStatistics(Expression expression) { @@ -62,18 +79,18 @@ public class Statistics { } public Statistics withRowCount(double rowCount) { - return new Statistics(rowCount, widthInJoinCluster, new HashMap<>(expressionToColumnStats)); + return new Statistics(rowCount, widthInJoinCluster, new HashMap<>(expressionToColumnStats), deltaRowCount); } public Statistics withExpressionToColumnStats(Map<Expression, ColumnStatistic> expressionToColumnStats) { - return new Statistics(rowCount, widthInJoinCluster, expressionToColumnStats); + return new Statistics(rowCount, widthInJoinCluster, expressionToColumnStats, deltaRowCount); } /** * Update by count. */ public Statistics withRowCountAndEnforceValid(double rowCount) { - Statistics statistics = new Statistics(rowCount, widthInJoinCluster, expressionToColumnStats); + Statistics statistics = new Statistics(rowCount, widthInJoinCluster, expressionToColumnStats, deltaRowCount); statistics.enforceValid(); return statistics; } @@ -155,7 +172,11 @@ public class Statistics { return "-Infinite"; } DecimalFormat format = new DecimalFormat("#,###.##"); - return format.format(rowCount); + String rows = format.format(rowCount); + if (deltaRowCount > 0) { + rows = rows + "(" + format.format(deltaRowCount) + ")"; + } + return rows; } public int getBENumber() { @@ -222,4 +243,8 @@ public class Statistics { } return builder.build(); } + + public double getDeltaRowCount() { + return deltaRowCount; + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsBuilder.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsBuilder.java index 53d8f49cb14..d0dc49db470 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsBuilder.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsBuilder.java @@ -28,6 +28,8 @@ public class StatisticsBuilder { private int widthInJoinCluster; private final Map<Expression, ColumnStatistic> expressionToColumnStats; + private double deltaRowCount = 0.0; + public StatisticsBuilder() { expressionToColumnStats = new HashMap<>(); } @@ -37,6 +39,7 @@ public class StatisticsBuilder { this.widthInJoinCluster = statistics.getWidthInJoinCluster(); expressionToColumnStats = new HashMap<>(); expressionToColumnStats.putAll(statistics.columnStatistics()); + this.deltaRowCount = statistics.getDeltaRowCount(); } public StatisticsBuilder setRowCount(double rowCount) { @@ -49,6 +52,11 @@ public class StatisticsBuilder { return this; } + public StatisticsBuilder setDeltaRowCount(double deltaRowCount) { + this.deltaRowCount = deltaRowCount; + return this; + } + public StatisticsBuilder putColumnStatistics( Map<Expression, ColumnStatistic> expressionToColumnStats) { this.expressionToColumnStats.putAll(expressionToColumnStats); @@ -61,6 +69,6 @@ public class StatisticsBuilder { } public Statistics build() { - return new Statistics(rowCount, widthInJoinCluster, expressionToColumnStats); + return new Statistics(rowCount, widthInJoinCluster, expressionToColumnStats, deltaRowCount); } } diff --git a/regression-test/suites/nereids_p0/delta_row/delta_row.groovy b/regression-test/suites/nereids_p0/delta_row/delta_row.groovy new file mode 100644 index 00000000000..c6f40f5363f --- /dev/null +++ b/regression-test/suites/nereids_p0/delta_row/delta_row.groovy @@ -0,0 +1,55 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("delta_row") { + String database = context.config.getDbNameByFile(context.file) + sql """ + drop database if exists ${database}; + create database ${database}; + use ${database}; + CREATE TABLE IF NOT EXISTS t ( + k int(11) null comment "", + v string replace null comment "", + ) engine=olap + DISTRIBUTED BY HASH(k) BUCKETS 5 properties("replication_num" = "1"); + + insert into t values (1, "a"),(2, "b"),(3, 'c'),(4,'d'); + analyze table t with sync; + """ + explain { + sql "physical plan select * from t where k > 6" + contains("stats=0,") + contains("stats=4 ") + // PhysicalResultSink[75] ( outputExprs=[k#0, v#1] ) + // +--PhysicalFilter[72]@1 ( stats=0, predicates=(k#0 > 6) ) + // +--PhysicalOlapScan[t]@0 ( stats=4 ) + } + + sql "set global enable_auto_analyze=false;" + + sql "insert into t values (10, 'c');" + explain { + sql "physical plan select * from t where k > 6" + contains("stats=0.5,") + contains("stats=5(1)") + notContains("stats=0,") + notContains("stats=4 ") +// PhysicalResultSink[75] ( outputExprs=[k#0, v#1] ) +// +--PhysicalFilter[72]@1 ( stats=0.5, predicates=(k#0 > 6) ) +// +--PhysicalOlapScan[t]@0 ( stats=5(1) ) + } +} \ No newline at end of file --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org