This is an automated email from the ASF dual-hosted git repository.

morrysnow pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 6dcc221e44d [opt](nereids) fix non-null selectivity computing (#42286)
6dcc221e44d is described below

commit 6dcc221e44d1ebedac9e598e04a326f40e39c69c
Author: xzj7019 <131111794+xzj7...@users.noreply.github.com>
AuthorDate: Thu Oct 24 11:47:50 2024 +0800

    [opt](nereids) fix non-null selectivity computing (#42286)
    
    Introduced by pr: #40762
    which doesn't update numNulls and other related column stats when they
    needs to be normalized.
    This pr adds the updating trigger condition and makes the related case
    healthy.
---
 .../org/apache/doris/statistics/Statistics.java    |  3 +-
 .../doris/nereids/stats/FilterEstimationTest.java  | 72 +++++++++++++++++++++-
 .../doris/nereids/stats/StatsCalculatorTest.java   |  4 +-
 3 files changed, 75 insertions(+), 4 deletions(-)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/Statistics.java 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/Statistics.java
index 72000f3ce5a..6ad4297dcb1 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/Statistics.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/Statistics.java
@@ -119,7 +119,8 @@ public class Statistics {
             // the following columnStatistic.isUnKnown() judgment is loop 
inside since current doris
             // supports partial stats deriving, i.e, allowing part of tables 
have stats and other parts don't,
             // or part of columns have stats but other parts don't, especially 
join and filter estimation.
-            if (!checkColumnStatsValid(columnStatistic, rowCount) && 
!columnStatistic.isUnKnown()) {
+            if (!columnStatistic.isUnKnown() && 
(!checkColumnStatsValid(columnStatistic, rowCount)
+                    || isNumNullsDecreaseByProportion && 
columnStatistic.numNulls != 0)) {
                 ColumnStatisticBuilder columnStatisticBuilder = new 
ColumnStatisticBuilder(columnStatistic);
                 double ndv = Math.min(columnStatistic.ndv, rowCount);
                 double numNulls = Math.min(columnStatistic.numNulls * factor, 
rowCount - ndv);
diff --git 
a/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/FilterEstimationTest.java
 
b/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/FilterEstimationTest.java
index 28fe50d16ec..9b0fdc3880d 100644
--- 
a/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/FilterEstimationTest.java
+++ 
b/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/FilterEstimationTest.java
@@ -36,9 +36,11 @@ import 
org.apache.doris.nereids.trees.expressions.SlotReference;
 import org.apache.doris.nereids.trees.expressions.functions.scalar.Left;
 import org.apache.doris.nereids.trees.expressions.literal.BigIntLiteral;
 import org.apache.doris.nereids.trees.expressions.literal.DateLiteral;
+import org.apache.doris.nereids.trees.expressions.literal.DateTimeLiteral;
 import org.apache.doris.nereids.trees.expressions.literal.DoubleLiteral;
 import org.apache.doris.nereids.trees.expressions.literal.IntegerLiteral;
 import org.apache.doris.nereids.trees.expressions.literal.VarcharLiteral;
+import org.apache.doris.nereids.types.DateTimeType;
 import org.apache.doris.nereids.types.DateType;
 import org.apache.doris.nereids.types.DoubleType;
 import org.apache.doris.nereids.types.IntegerType;
@@ -1144,7 +1146,75 @@ class FilterEstimationTest {
         Statistics result = filterEstimation.estimate(and, stats);
         // result 1.0->2.0 bc happens because the calculation from 
normalization of
         // "Math.min(columnStatistic.numNulls * factor, rowCount - ndv);"
-        Assertions.assertEquals(result.getRowCount(), 2.0, 0.01);
+        Assertions.assertEquals(result.getRowCount(), 3.5, 0.01);
+    }
+
+    /**
+     * dt BETWEEN "2020-05-25 00:00:00" and "2020-05-25 23:59:59"
+     * and day BETWEEN "2020-05-24" and "2020-05-26"
+     * and game="mus" and plat = "37wan";
+     */
+    @Test
+    void testMultiAndWithNull() {
+        SlotReference dt = new SlotReference("dt", DateTimeType.INSTANCE);
+        ColumnStatisticBuilder dtBuilder = new ColumnStatisticBuilder(1000000)
+                .setNdv(783813.0)
+                .setNumNulls(50833.0)
+                .setMaxValue(new DateTimeLiteral("2020-05-31 
07:59:59").getDouble())
+                .setMinValue(new DateTimeLiteral("2020-05-01 
08:00:04").getDouble());
+        DateLiteral dtMin = new DateTimeLiteral("2020-05-25 00:00:00");
+        DateLiteral dtMax = new DateTimeLiteral("2020-05-25 23:59:59");
+        GreaterThanEqual dtGreater = new GreaterThanEqual(dt, dtMin);
+        LessThan dtLess = new LessThan(dt, dtMax);
+        And dtAnd = new And(dtLess, dtGreater);
+
+        SlotReference day = new SlotReference("day", DateType.INSTANCE);
+        ColumnStatisticBuilder dayBuilder = new ColumnStatisticBuilder(1000000)
+                .setNdv(31.0)
+                .setNumNulls(49699.0)
+                .setMaxValue(new DateLiteral("2020-05-31").getDouble())
+                .setMinValue(new DateLiteral("2020-05-01").getDouble());
+        DateLiteral dayMin = new DateLiteral("2020-05-24");
+        DateLiteral dayMax = new DateLiteral("2020-05-26");
+        GreaterThanEqual dayGreater = new GreaterThanEqual(day, dayMin);
+        LessThan dayLess = new LessThan(day, dayMax);
+        And dayAnd = new And(dayLess, dayGreater);
+
+        SlotReference game = new SlotReference("game", new VarcharType(500));
+        ColumnStatisticBuilder gameBuilder = new 
ColumnStatisticBuilder(1000000)
+                .setNdv(1.0)
+                .setNumNulls(49813.0)
+                .setMaxExpr(new StringLiteral("mus"))
+                .setMaxValue(new VarcharLiteral("mus").getDouble())
+                .setMinExpr(new StringLiteral("mus"))
+                .setMinValue(new VarcharLiteral("mus").getDouble());
+        VarcharLiteral mus = new VarcharLiteral("mus");
+        EqualTo gameEqualTo = new EqualTo(game, mus);
+
+        SlotReference plat = new SlotReference("plat", new VarcharType(500));
+        ColumnStatisticBuilder platBuilder = new 
ColumnStatisticBuilder(1000000)
+                .setNdv(1.0)
+                .setNumNulls(49691.0)
+                .setMaxExpr(new StringLiteral("37wan"))
+                .setMaxValue(new VarcharLiteral("37wan").getDouble())
+                .setMinExpr(new StringLiteral("37wan"))
+                .setMinValue(new VarcharLiteral("37wan").getDouble());
+        VarcharLiteral wan = new VarcharLiteral("37wan");
+        EqualTo wanEqualTo = new EqualTo(plat, wan);
+        And equalAnd = new And(gameEqualTo, wanEqualTo);
+
+        And partialAnd = new And(dtAnd, dayAnd);
+        And allAnd = new And(partialAnd, equalAnd);
+
+        Statistics stats = new Statistics(1000000, new HashMap<>());
+        stats.addColumnStats(dt, dtBuilder.build());
+        stats.addColumnStats(day, dayBuilder.build());
+        stats.addColumnStats(game, gameBuilder.build());
+        stats.addColumnStats(plat, platBuilder.build());
+
+        FilterEstimation filterEstimation = new FilterEstimation();
+        Statistics result = filterEstimation.estimate(allAnd, stats);
+        Assertions.assertEquals(result.getRowCount(), 2109.16, 0.01);
     }
 
     /**
diff --git 
a/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/StatsCalculatorTest.java
 
b/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/StatsCalculatorTest.java
index 49d295ea7c0..cf91eacb51c 100644
--- 
a/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/StatsCalculatorTest.java
+++ 
b/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/StatsCalculatorTest.java
@@ -145,13 +145,13 @@ public class StatsCalculatorTest {
         GroupExpression groupExpression = new GroupExpression(logicalFilter, 
ImmutableList.of(childGroup));
         Group ownerGroup = new Group(null, groupExpression, null);
         StatsCalculator.estimate(groupExpression, null);
-        Assertions.assertEquals(49.45, 
ownerGroup.getStatistics().getRowCount(), 0.001);
+        Assertions.assertEquals(49.945, 
ownerGroup.getStatistics().getRowCount(), 0.001);
 
         LogicalFilter<GroupPlan> logicalFilterOr = new LogicalFilter<>(or, 
groupPlan);
         GroupExpression groupExpressionOr = new 
GroupExpression(logicalFilterOr, ImmutableList.of(childGroup));
         Group ownerGroupOr = new Group(null, groupExpressionOr, null);
         StatsCalculator.estimate(groupExpressionOr, null);
-        Assertions.assertEquals(1449.05,
+        Assertions.assertEquals(1448.555,
                 ownerGroupOr.getStatistics().getRowCount(), 0.001);
     }
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to