This is an automated email from the ASF dual-hosted git repository.

englefly pushed a commit to branch 2.0.13-tebu
in repository https://gitbox.apache.org/repos/asf/doris.git

commit 706265091c8467f3506ab9d26a267659646a06bd
Author: minghong <engle...@gmail.com>
AuthorDate: Wed Aug 14 09:18:09 2024 +0800

    [feat](nereids) adjust stats derive by delta row (#39185)
    
    #39222
    After analyzing, user may insert new rows.
    analyzed rows: the rows have been analyzed
    delta row: rows inserted after analyze job
    
    if analyzed rows are filtered out, then we try to estimate filter result
    by delta row with unknown column stats.
    
    
    ## Proposed changes
    
    Issue Number: close #xxx
    
    <!--Describe your changes.-->
---
 .../doris/nereids/stats/FilterEstimation.java      | 17 +++++--
 .../doris/nereids/stats/StatsCalculator.java       | 31 ++++++------
 .../org/apache/doris/statistics/Statistics.java    | 24 ++++++++-
 .../apache/doris/statistics/StatisticsBuilder.java | 10 +++-
 .../suites/nereids_p0/delta_row/delta_row.groovy   | 57 ++++++++++++++++++++++
 5 files changed, 117 insertions(+), 22 deletions(-)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java
index 0ce10ec0c3c..915a885a36a 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java
@@ -86,12 +86,21 @@ public class FilterEstimation extends 
ExpressionVisitor<Statistics, EstimationCo
     /**
      * This method will update the stats according to the selectivity.
      */
-    public Statistics estimate(Expression expression, Statistics statistics) {
+    public Statistics estimate(Expression expression, Statistics inputStats) {
         // For a comparison predicate, only when it's left side is a slot and 
right side is a literal, we would
         // consider is a valid predicate.
-        Statistics stats = expression.accept(this, new 
EstimationContext(statistics));
-        stats.enforceValid();
-        return stats;
+        Statistics outputStats = expression.accept(this, new 
EstimationContext(inputStats));
+        if (outputStats.getRowCount() == 0 && inputStats.getDeltaRowCount() > 
0) {
+            StatisticsBuilder deltaStats = new StatisticsBuilder();
+            deltaStats.setDeltaRowCount(0);
+            deltaStats.setRowCount(inputStats.getDeltaRowCount());
+            for (Expression expr : inputStats.columnStatistics().keySet()) {
+                deltaStats.putColumnStatistics(expr, ColumnStatistic.UNKNOWN);
+            }
+            outputStats = expression.accept(this, new 
EstimationContext(deltaStats.build()));
+        }
+        outputStats.enforceValid();
+        return outputStats;
     }
 
     @Override
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java
index d711b99655b..2c954f9ee43 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java
@@ -646,10 +646,10 @@ public class StatsCalculator extends 
DefaultPlanVisitor<Statistics, Void> {
                 idxId = olapScan.getSelectedIndexId();
             }
         }
-        if (deltaRowCount > 0 && LOG.isDebugEnabled()) {
-            LOG.debug("{} is partially analyzed, clear min/max values in 
column stats",
-                    catalogRelation.getTable().getName());
-        }
+        // if (deltaRowCount > 0 && LOG.isDebugEnabled()) {
+        //     LOG.debug("{} is partially analyzed, clear min/max values in 
column stats",
+        //             catalogRelation.getTable().getName());
+        // }
         for (SlotReference slotReference : slotSet) {
             String colName = slotReference.getColumn().isPresent()
                     ? slotReference.getColumn().get().getName()
@@ -676,14 +676,14 @@ public class StatsCalculator extends 
DefaultPlanVisitor<Statistics, Void> {
                 hasUnknownCol = true;
             }
             if (ConnectContext.get() != null && 
ConnectContext.get().getSessionVariable().enableStats) {
-                if (deltaRowCount > 0) {
-                    // clear min-max to avoid error estimation
-                    // for example, after yesterday data loaded, user send 
query about yesterday immediately.
-                    // since yesterday data are not analyzed, the max date is 
before yesterday, and hence optimizer
-                    // estimates the filter result is zero
-                    
colStatsBuilder.setMinExpr(null).setMinValue(Double.NEGATIVE_INFINITY)
-                            
.setMaxExpr(null).setMaxValue(Double.POSITIVE_INFINITY);
-                }
+                // if (deltaRowCount > 0) {
+                //     // clear min-max to avoid error estimation
+                //     // for example, after yesterday data loaded, user send 
query about yesterday immediately.
+                //     // since yesterday data are not analyzed, the max date 
is before yesterday, and hence optimizer
+                //     // estimates the filter result is zero
+                //     
colStatsBuilder.setMinExpr(null).setMinValue(Double.NEGATIVE_INFINITY)
+                //             
.setMaxExpr(null).setMaxValue(Double.POSITIVE_INFINITY);
+                // }
                 columnStatisticBuilderMap.put(slotReference, colStatsBuilder);
             } else {
                 columnStatisticBuilderMap.put(slotReference, new 
ColumnStatisticBuilder(ColumnStatistic.UNKNOWN));
@@ -693,17 +693,18 @@ public class StatsCalculator extends 
DefaultPlanVisitor<Statistics, Void> {
         if (hasUnknownCol && ConnectContext.get() != null && 
ConnectContext.get().getStatementContext() != null) {
             
ConnectContext.get().getStatementContext().setHasUnknownColStats(true);
         }
-        return normalizeCatalogRelationColumnStatsRowCount(rowCount, 
columnStatisticBuilderMap);
+        return normalizeCatalogRelationColumnStatsRowCount(rowCount, 
columnStatisticBuilderMap, deltaRowCount);
     }
 
     private Statistics normalizeCatalogRelationColumnStatsRowCount(double 
rowCount,
-            Map<Expression, ColumnStatisticBuilder> columnStatisticBuilderMap) 
{
+            Map<Expression, ColumnStatisticBuilder> columnStatisticBuilderMap,
+            double deltaRowCount) {
         Map<Expression, ColumnStatistic> columnStatisticMap = new HashMap<>();
         for (Expression slot : columnStatisticBuilderMap.keySet()) {
             columnStatisticMap.put(slot,
                     
columnStatisticBuilderMap.get(slot).setCount(rowCount).build());
         }
-        return new Statistics(rowCount, columnStatisticMap);
+        return new Statistics(rowCount, columnStatisticMap, deltaRowCount);
     }
 
     private Statistics computeTopN(TopN topN) {
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/Statistics.java 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/Statistics.java
index da6bf937593..9cec4cc18d4 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/Statistics.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/Statistics.java
@@ -37,15 +37,23 @@ public class Statistics {
     // the byte size of one tuple
     private double tupleSize;
 
+    private double deltaRowCount = 0.0;
+
     public Statistics(Statistics another) {
         this.rowCount = another.rowCount;
         this.expressionToColumnStats = new 
HashMap<>(another.expressionToColumnStats);
         this.tupleSize = another.tupleSize;
+        this.deltaRowCount = another.getDeltaRowCount();
     }
 
-    public Statistics(double rowCount, Map<Expression, ColumnStatistic> 
expressionToColumnStats) {
+    public Statistics(double rowCount, Map<Expression, ColumnStatistic> 
expressionToColumnStats, double deltaRowCount) {
         this.rowCount = rowCount;
         this.expressionToColumnStats = expressionToColumnStats;
+        this.deltaRowCount = deltaRowCount;
+    }
+
+    public Statistics(double rowCount, Map<Expression, ColumnStatistic> 
expressionToColumnStats) {
+        this(rowCount, expressionToColumnStats, 0);
     }
 
     public ColumnStatistic findColumnStatistics(Expression expression) {
@@ -150,7 +158,11 @@ public class Statistics {
             return "-Infinite";
         }
         DecimalFormat format = new DecimalFormat("#,###.##");
-        return format.format(rowCount);
+        String rows = format.format(rowCount);
+        if (deltaRowCount > 0) {
+            rows = rows + "(" + format.format(deltaRowCount) + ")";
+        }
+        return rows;
     }
 
     public int getBENumber() {
@@ -209,4 +221,12 @@ public class Statistics {
         }
         return builder.build();
     }
+
+    public double getDeltaRowCount() {
+        return deltaRowCount;
+    }
+
+    public void setDeltaRowCount(double deltaRowCount) {
+        this.deltaRowCount = deltaRowCount;
+    }
 }
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsBuilder.java 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsBuilder.java
index a0e75f7df38..1e399573546 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsBuilder.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsBuilder.java
@@ -28,6 +28,8 @@ public class StatisticsBuilder {
 
     private final Map<Expression, ColumnStatistic> expressionToColumnStats;
 
+    private double deltaRowCount = 0.0;
+
     public StatisticsBuilder() {
         expressionToColumnStats = new HashMap<>();
     }
@@ -36,6 +38,7 @@ public class StatisticsBuilder {
         this.rowCount = statistics.getRowCount();
         expressionToColumnStats = new HashMap<>();
         expressionToColumnStats.putAll(statistics.columnStatistics());
+        deltaRowCount = statistics.getDeltaRowCount();
     }
 
     public StatisticsBuilder setRowCount(double rowCount) {
@@ -54,7 +57,12 @@ public class StatisticsBuilder {
         return this;
     }
 
+    public StatisticsBuilder setDeltaRowCount(double deltaRowCount) {
+        this.deltaRowCount = deltaRowCount;
+        return this;
+    }
+
     public Statistics build() {
-        return new Statistics(rowCount, expressionToColumnStats);
+        return new Statistics(rowCount, expressionToColumnStats, 
deltaRowCount);
     }
 }
diff --git a/regression-test/suites/nereids_p0/delta_row/delta_row.groovy 
b/regression-test/suites/nereids_p0/delta_row/delta_row.groovy
new file mode 100644
index 00000000000..1550d3f6b15
--- /dev/null
+++ b/regression-test/suites/nereids_p0/delta_row/delta_row.groovy
@@ -0,0 +1,57 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("delta_row") {
+    String database = context.config.getDbNameByFile(context.file)
+    sql """
+        drop database if exists ${database};
+        create database ${database};
+        use ${database};
+        CREATE TABLE IF NOT EXISTS t (
+            k int(11) null comment "",
+            v string replace null comment "",
+        ) engine=olap
+        DISTRIBUTED BY HASH(k) BUCKETS 5 properties("replication_num" = "1");
+    
+        insert into t values (1, "a"),(2, "b"),(3, 'c'),(4,'d');
+        analyze table t with sync;
+    """
+    explain {
+        sql "physical plan select * from t where k > 6"
+        contains("stats=0 ")
+        contains("stats=4,")
+        // PhysicalResultSink[120] ( outputExprs=[k#0, v#1] )
+        //     +--PhysicalDistribute[117]@@1 ( 
distributionSpec=DistributionSpecGather, stats=0 )
+        //         +--PhysicalFilter[114]@1 ( predicates=(k#0 > 6), stats=0 )
+        //             +--PhysicalOlapScan[111]@0 ( 
qualified=internal.default_cluster:regression_test_nereids_p0_delta_row.t, 
stats=4, fr=Optional[1] )
+    }
+
+    sql "set global enable_auto_analyze=false;"
+
+    sql "insert into t values (10, 'c');"
+    explain {
+        sql "physical plan select * from t where k > 6"
+        contains("stats=0.5 ")
+        contains("stats=5(1),")
+        notContains("stats=0 ")
+        notContains("stats=4,")
+        // PhysicalResultSink[120] ( outputExprs=[k#0, v#1] )
+        //     +--PhysicalDistribute[117]@@1 ( 
distributionSpec=DistributionSpecGather, stats=0.5 )
+        //         +--PhysicalFilter[114]@1 ( predicates=(k#0 > 6), stats=0.5 )
+        //             +--PhysicalOlapScan[111]@0 ( 
qualified=internal.default_cluster:regression_test_nereids_p0_delta_row.t, 
stats=5(1), fr=Optional[1] )
+    }
+}
\ No newline at end of file


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to