[doris] branch master updated: [refactor](Nereids): avoid useless groupByColStats Map (#22000)

jakevin Sun, 23 Jul 2023 21:14:02 -0700

This is an automated email from the ASF dual-hosted git repository.

jakevin pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git



The following commit(s) were added to refs/heads/master by this push:
     new 66fa1bef6d [refactor](Nereids): avoid useless groupByColStats Map 
(#22000)
66fa1bef6d is described below

commit 66fa1bef6dec5a8b62416d1fa9d4bcecf2f98ebc
Author: jakevin <jakevin...@gmail.com>
AuthorDate: Mon Jul 24 12:13:52 2023 +0800

    [refactor](Nereids): avoid useless groupByColStats Map (#22000)
---
 .../doris/nereids/stats/StatsCalculator.java       | 50 +++++++++++-----------
 1 file changed, 24 insertions(+), 26 deletions(-)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java
index a7cf8648fe..2dda0cfb03 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java
@@ -130,8 +130,8 @@ import org.apache.logging.log4j.LogManager;
 import org.apache.logging.log4j.Logger;
 
 import java.util.AbstractMap.SimpleEntry;
+import java.util.ArrayList;
 import java.util.Collections;
-import java.util.Comparator;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
@@ -689,38 +689,36 @@ public class StatsCalculator extends 
DefaultPlanVisitor<Statistics, Void> {
 
     private double estimateGroupByRowCount(List<Expression> 
groupByExpressions, Statistics childStats) {
         double rowCount = 1;
-        Map<Expression, ColumnStatistic> groupByColStats = new HashMap<>();
+        // if there is group-bys, output row count is childStats.getRowCount() 
* DEFAULT_AGGREGATE_RATIO,
+        // o.w. output row count is 1
+        // example: select sum(A) from T;
+        if (groupByExpressions.isEmpty()) {
+            return 1;
+        }
+        List<Double> groupByNdvs = new ArrayList<>();
         for (Expression groupByExpr : groupByExpressions) {
             ColumnStatistic colStats = 
childStats.findColumnStatistics(groupByExpr);
             if (colStats == null) {
                 colStats = ExpressionEstimation.estimate(groupByExpr, 
childStats);
             }
-            groupByColStats.put(groupByExpr, colStats);
-        }
-        int groupByCount = groupByExpressions.size();
-        if 
(groupByColStats.values().stream().anyMatch(ColumnStatistic::isUnKnown)) {
-            // if there is group-bys, output row count is 
childStats.getRowCount() * DEFAULT_AGGREGATE_RATIO,
-            // o.w. output row count is 1
-            // example: select sum(A) from T;
-            if (groupByCount > 0) {
+            if (colStats.isUnKnown()) {
                 rowCount = childStats.getRowCount() * DEFAULT_AGGREGATE_RATIO;
-            } else {
-                rowCount = 1;
+                rowCount = Math.max(1, rowCount);
+                rowCount = Math.min(rowCount, childStats.getRowCount());
+                return rowCount;
             }
-        } else {
-            if (groupByCount > 0) {
-                List<Double> groupByNdvs = groupByColStats.values().stream()
-                        .map(colStats -> colStats.ndv)
-                        
.sorted(Comparator.reverseOrder()).collect(Collectors.toList());
-                rowCount = groupByNdvs.get(0);
-                for (int groupByIndex = 1; groupByIndex < groupByCount; 
++groupByIndex) {
-                    rowCount *= Math.max(1, groupByNdvs.get(groupByIndex) * 
Math.pow(
-                            AGGREGATE_COLUMN_CORRELATION_COEFFICIENT, 
groupByIndex + 1D));
-                    if (rowCount > childStats.getRowCount()) {
-                        rowCount = childStats.getRowCount();
-                        break;
-                    }
-                }
+            double ndv = colStats.ndv;
+            groupByNdvs.add(ndv);
+        }
+        groupByNdvs.sort(Collections.reverseOrder());
+
+        rowCount = groupByNdvs.get(0);
+        for (int groupByIndex = 1; groupByIndex < groupByExpressions.size(); 
++groupByIndex) {
+            rowCount *= Math.max(1, groupByNdvs.get(groupByIndex) * Math.pow(
+                    AGGREGATE_COLUMN_CORRELATION_COEFFICIENT, groupByIndex + 
1D));
+            if (rowCount > childStats.getRowCount()) {
+                rowCount = childStats.getRowCount();
+                break;
             }
         }
         rowCount = Math.max(1, rowCount);


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

[doris] branch master updated: [refactor](Nereids): avoid useless groupByColStats Map (#22000)

Reply via email to