This is an automated email from the ASF dual-hosted git repository.

englefly pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.0 by this push:
     new b136c44584f [feat](nereids)disable join reorder if column stats is 
invalid #41790 (branch-2.0) (#42913)
b136c44584f is described below

commit b136c44584f2f6b2f167b3e94bbcbe660ec76230
Author: minghong <engle...@gmail.com>
AuthorDate: Fri Nov 1 11:47:49 2024 +0800

    [feat](nereids)disable join reorder if column stats is invalid #41790 
(branch-2.0) (#42913)
    
    ## Proposed changes
    pick  #41790
    Issue Number: close #xxx
    
    <!--Describe your changes.-->
---
 .../org/apache/doris/nereids/NereidsPlanner.java   |  3 +-
 .../doris/nereids/stats/StatsCalculator.java       | 65 +++++++++++++++++++---
 .../nereids_tpcds_shape_sf100_p0/load.groovy       |  6 +-
 3 files changed, 61 insertions(+), 13 deletions(-)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/NereidsPlanner.java 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/NereidsPlanner.java
index 150780d95e1..24550f4f724 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/NereidsPlanner.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/NereidsPlanner.java
@@ -268,7 +268,8 @@ public class NereidsPlanner extends Planner {
         if (FeConstants.enableInternalSchemaDb && !FeConstants.runningUnitTest 
&& cascadesContext.isLeadingJoin()) {
             List<LogicalOlapScan> scans = cascadesContext.getRewritePlan()
                     .collectToList(LogicalOlapScan.class::isInstance);
-            
StatsCalculator.disableJoinReorderIfTableRowCountNotAvailable(scans, 
cascadesContext);
+            Optional<String> reason = 
StatsCalculator.disableJoinReorderIfStatsInvalid(scans, cascadesContext);
+            reason.ifPresent(LOG::info);
         }
 
         optimize();
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java
index 9458407295d..1fc3708545e 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java
@@ -17,6 +17,7 @@
 
 package org.apache.doris.nereids.stats;
 
+import org.apache.doris.catalog.Column;
 import org.apache.doris.catalog.Env;
 import org.apache.doris.catalog.OlapTable;
 import org.apache.doris.catalog.TableIf;
@@ -141,6 +142,7 @@ import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 import java.util.Objects;
+import java.util.Optional;
 import java.util.Set;
 import java.util.stream.Collectors;
 
@@ -1159,25 +1161,74 @@ public class StatsCalculator extends 
DefaultPlanVisitor<Statistics, Void> {
         return rowCount;
     }
 
+    private boolean isVisibleSlotReference(Slot slot) {
+        if (slot instanceof SlotReference) {
+            Optional<Column> colOpt = ((SlotReference) slot).getColumn();
+            if (colOpt.isPresent()) {
+                return colOpt.get().isVisible();
+            }
+        }
+        return false;
+    }
+
+    private ColumnStatistic getColumnStatsFromTableCache(CatalogRelation 
catalogRelation, SlotReference slot) {
+        long idxId = -1;
+        if (catalogRelation instanceof OlapScan) {
+            idxId = ((OlapScan) catalogRelation).getSelectedIndexId();
+        }
+        return getColumnStatistic(catalogRelation.getTable(), slot.getName(), 
idxId);
+    }
+
+    // check validation of ndv.
+    private Optional<String> checkNdvValidation(OlapScan olapScan, double 
rowCount) {
+        for (Slot slot : ((Plan) olapScan).getOutput()) {
+            if (isVisibleSlotReference(slot)) {
+                ColumnStatistic cache = 
getColumnStatsFromTableCache((CatalogRelation) olapScan, (SlotReference) slot);
+                if (!cache.isUnKnown) {
+                    if ((cache.ndv == 0 && (cache.minExpr != null || 
cache.maxExpr != null))
+                            || cache.ndv > rowCount * 10) {
+                        return Optional.of("slot " + slot.getName() + " has 
invalid column stats: " + cache);
+                    }
+                }
+            }
+        }
+        return Optional.empty();
+    }
+
     /**
-     * disable join reorder if any table row count is not available.
+     * disable join reorder if
+     * 1. any table rowCount is not available, or
+     * 2. col stats ndv=0 but minExpr or maxExpr is not null
+     * 3. ndv > 10 * rowCount
      */
-    public static void disableJoinReorderIfTableRowCountNotAvailable(
-            List<LogicalOlapScan> scans,
+    public static Optional<String> 
disableJoinReorderIfStatsInvalid(List<LogicalOlapScan> scans,
             CascadesContext context) {
         StatsCalculator calculator = new StatsCalculator(context);
+        if (ConnectContext.get() == null) {
+            // ut case
+            return Optional.empty();
+        }
         for (LogicalOlapScan scan : scans) {
             double rowCount = calculator.getOlapTableRowCount(scan);
-            if (rowCount == -1 && ConnectContext.get() != null) {
+            // row count not available
+            if (rowCount == -1) {
+                LOG.info("disable join reorder since row count not available: "
+                        + scan.getTable().getNameWithFullQualifiers());
+                return Optional.of("table[" + scan.getTable().getName() + "] 
row count is invalid");
+            }
+            // ndv abnormal
+            Optional<String> reason = calculator.checkNdvValidation(scan, 
rowCount);
+            if (reason.isPresent()) {
                 try {
                     
ConnectContext.get().getSessionVariable().disableNereidsJoinReorderOnce();
-                    LOG.info("disable join reorder since row count not 
available: "
-                            + scan.getTable().getNameWithFullQualifiers());
+                    LOG.info("disable join reorder since col stats invalid: "
+                            + reason.get());
                 } catch (Exception e) {
                     LOG.info("disableNereidsJoinReorderOnce failed");
                 }
-                return;
+                return reason;
             }
         }
+        return Optional.empty();
     }
 }
diff --git a/regression-test/suites/nereids_tpcds_shape_sf100_p0/load.groovy 
b/regression-test/suites/nereids_tpcds_shape_sf100_p0/load.groovy
index 169c192a1e4..c57e7297838 100644
--- a/regression-test/suites/nereids_tpcds_shape_sf100_p0/load.groovy
+++ b/regression-test/suites/nereids_tpcds_shape_sf100_p0/load.groovy
@@ -1268,10 +1268,6 @@ sql """
 alter table web_page modify column wp_max_ad_count set stats 
('row_count'='2040', 'ndv'='5', 'min_value'='0', 'max_value'='4', 
'avg_size'='8160', 'max_size'='8160' )
 """
 
-sql """
-alter table call_center modify column cc_closed_date_sk set stats 
('row_count'='30', 'ndv'='0', 'min_value'='2415022', 'max_value'='2488070', 
'avg_size'='120', 'max_size'='120' )
-"""
-
 sql """
 alter table web_returns modify column wr_return_ship_cost set stats 
('row_count'='7197670', 'ndv'='10429', 'min_value'='0.00', 
'max_value'='13602.60', 'avg_size'='28790680', 'max_size'='28790680' )
 """
@@ -1989,7 +1985,7 @@ alter table ship_mode modify column sm_contract set stats 
('row_count'='20', 'nd
 """
 
 sql """
-alter table call_center modify column cc_closed_date_sk set stats 
('row_count'='30', 'ndv'='0', 'min_value'='0', 'max_value'='0', 
'avg_size'='120', 'max_size'='120' )
+alter table call_center modify column cc_closed_date_sk set stats 
('row_count'='30', 'ndv'='0', 'num_nulls'='30', 'avg_size'='120', 
'max_size'='120' )
 """
 
 sql """


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to