This is an automated email from the ASF dual-hosted git repository.

englefly pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new e238a8705f2 [feat](nereids)disable join reorder if column stats is 
invalid (#41790)
e238a8705f2 is described below

commit e238a8705f25e2b5bc5a9e95b2025712f98dd682
Author: minghong <engle...@gmail.com>
AuthorDate: Mon Oct 28 09:48:30 2024 +0800

    [feat](nereids)disable join reorder if column stats is invalid (#41790)
    
    ## Proposed changes
    disable join reorder if any condition is matched:
    1. any table row count is -1
    2. any column, whose ndv is 0, but MinExpr or MaxExpr is not null
    3. ndv > 10* rowCount
    
    Issue Number: close #xxx
    
    <!--Describe your changes.-->
---
 .../org/apache/doris/nereids/NereidsPlanner.java   |   4 +-
 .../org/apache/doris/nereids/StatementContext.java |  10 ++
 .../doris/nereids/stats/StatsCalculator.java       |  46 +++++++--
 .../data/nereids_hint_tpcds_p0/shape/query64.out   | 101 +++++++++----------
 .../infer_predicate/infer_intersect_except.out     |  11 --
 .../suites/nereids_hint_tpcds_p0/load.groovy       |   2 +-
 .../suites/nereids_p0/stats/invalid_stats.groovy   | 111 +++++++++++++++++++++
 .../nereids_rules_p0/eager_aggregate/basic.groovy  |   4 +-
 .../eager_aggregate/basic_one_side.groovy          |   3 +-
 .../infer_predicate/infer_intersect_except.groovy  |  26 ++++-
 .../nereids_tpcds_shape_sf1000_p0/load.groovy      |   2 +-
 .../constraints/load.groovy                        |   2 +-
 .../nereids_tpcds_shape_sf100_p0/load.groovy       |   6 +-
 .../tpcds_sf100/constraints/load.groovy            |   2 +-
 .../suites/new_shapes_p0/tpcds_sf100/load.groovy   |   6 +-
 .../suites/new_shapes_p0/tpcds_sf1000/load.groovy  |   2 +-
 16 files changed, 247 insertions(+), 91 deletions(-)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/NereidsPlanner.java 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/NereidsPlanner.java
index 4acae7164f8..7ea92ee73b3 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/NereidsPlanner.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/NereidsPlanner.java
@@ -274,7 +274,9 @@ public class NereidsPlanner extends Planner {
                 && !cascadesContext.isLeadingDisableJoinReorder()) {
             List<LogicalOlapScan> scans = cascadesContext.getRewritePlan()
                     .collectToList(LogicalOlapScan.class::isInstance);
-            
StatsCalculator.disableJoinReorderIfTableRowCountNotAvailable(scans, 
cascadesContext);
+            Optional<String> disableJoinReorderReason = StatsCalculator
+                    .disableJoinReorderIfStatsInvalid(scans, cascadesContext);
+            
disableJoinReorderReason.ifPresent(statementContext::setDisableJoinReorderReason);
         }
 
         optimize();
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/StatementContext.java 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/StatementContext.java
index 08e1e3fa815..ed64864da50 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/StatementContext.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/StatementContext.java
@@ -169,6 +169,8 @@ public class StatementContext implements Closeable {
 
     private List<PlannerHook> plannerHooks = new ArrayList<>();
 
+    private String disableJoinReorderReason;
+
     public StatementContext() {
         this(ConnectContext.get(), null, 0);
     }
@@ -558,4 +560,12 @@ public class StatementContext implements Closeable {
         this.tableIdMapping.put(tableIdentifier, tableId);
         return tableId;
     }
+
+    public Optional<String> getDisableJoinReorderReason() {
+        return Optional.ofNullable(disableJoinReorderReason);
+    }
+
+    public void setDisableJoinReorderReason(String disableJoinReorderReason) {
+        this.disableJoinReorderReason = disableJoinReorderReason;
+    }
 }
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java
index 2e159ff8b68..d4a9a81fc07 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java
@@ -221,24 +221,40 @@ public class StatsCalculator extends 
DefaultPlanVisitor<Statistics, Void> {
     }
 
     /**
-     * disable join reorder if any table row count is not available.
+     * disable join reorder if
+     * 1. any table rowCount is not available, or
+     * 2. col stats ndv=0 but minExpr or maxExpr is not null
+     * 3. ndv > 10 * rowCount
      */
-    public static void disableJoinReorderIfTableRowCountNotAvailable(
-            List<LogicalOlapScan> scans, CascadesContext context) {
+    public static Optional<String> 
disableJoinReorderIfStatsInvalid(List<LogicalOlapScan> scans,
+            CascadesContext context) {
         StatsCalculator calculator = new StatsCalculator(context);
+        if (ConnectContext.get() == null) {
+            // ut case
+            return Optional.empty();
+        }
         for (LogicalOlapScan scan : scans) {
             double rowCount = calculator.getOlapTableRowCount(scan);
-            if (rowCount == -1 && ConnectContext.get() != null) {
+            // row count not available
+            if (rowCount == -1) {
+                LOG.info("disable join reorder since row count not available: "
+                        + scan.getTable().getNameWithFullQualifiers());
+                return Optional.of("table[" + scan.getTable().getName() + "] 
row count is invalid");
+            }
+            // ndv abnormal
+            Optional<String> reason = calculator.checkNdvValidation(scan, 
rowCount);
+            if (reason.isPresent()) {
                 try {
                     
ConnectContext.get().getSessionVariable().disableNereidsJoinReorderOnce();
-                    LOG.info("disable join reorder since row count not 
available: "
-                            + scan.getTable().getNameWithFullQualifiers());
+                    LOG.info("disable join reorder since col stats invalid: "
+                            + reason.get());
                 } catch (Exception e) {
                     LOG.info("disableNereidsJoinReorderOnce failed");
                 }
-                return;
+                return reason;
             }
         }
+        return Optional.empty();
     }
 
     /**
@@ -412,6 +428,22 @@ public class StatsCalculator extends 
DefaultPlanVisitor<Statistics, Void> {
         return rowCount;
     }
 
+    // check validation of ndv.
+    private Optional<String> checkNdvValidation(OlapScan olapScan, double 
rowCount) {
+        for (Slot slot : ((Plan) olapScan).getOutput()) {
+            if (isVisibleSlotReference(slot)) {
+                ColumnStatistic cache = 
getColumnStatsFromTableCache((CatalogRelation) olapScan, (SlotReference) slot);
+                if (!cache.isUnKnown) {
+                    if ((cache.ndv == 0 && (cache.minExpr != null || 
cache.maxExpr != null))
+                            || cache.ndv > rowCount * 10) {
+                        return Optional.of("slot " + slot.getName() + " has 
invalid column stats: " + cache);
+                    }
+                }
+            }
+        }
+        return Optional.empty();
+    }
+
     private Statistics computeOlapScan(OlapScan olapScan) {
         OlapTable olapTable = olapScan.getTable();
         double tableRowCount = getOlapTableRowCount(olapScan);
diff --git a/regression-test/data/nereids_hint_tpcds_p0/shape/query64.out 
b/regression-test/data/nereids_hint_tpcds_p0/shape/query64.out
index 1338812312e..26a67aa0d6e 100644
--- a/regression-test/data/nereids_hint_tpcds_p0/shape/query64.out
+++ b/regression-test/data/nereids_hint_tpcds_p0/shape/query64.out
@@ -7,84 +7,85 @@ PhysicalCteAnchor ( cteId=CTEId#1 )
 --------PhysicalDistribute[DistributionSpecHash]
 ----------hashAgg[LOCAL]
 ------------PhysicalProject
---------------hashJoin[INNER_JOIN colocated] 
hashCondition=((store_sales.ss_item_sk = store_returns.sr_item_sk) and 
(store_sales.ss_ticket_number = store_returns.sr_ticket_number)) 
otherCondition=() build RFs:RF18 
sr_item_sk->[cr_item_sk,cs_item_sk,i_item_sk,ss_item_sk];RF19 
sr_ticket_number->[ss_ticket_number]
+--------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((store_sales.ss_item_sk = cs_ui.cs_item_sk)) otherCondition=() 
build RFs:RF19 cs_item_sk->[i_item_sk,sr_item_sk,ss_item_sk]
 ----------------PhysicalProject
-------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((customer.c_first_shipto_date_sk = d3.d_date_sk)) 
otherCondition=() build RFs:RF17 d_date_sk->[c_first_shipto_date_sk]
+------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((store_sales.ss_promo_sk = promotion.p_promo_sk)) 
otherCondition=() build RFs:RF18 p_promo_sk->[ss_promo_sk]
 --------------------PhysicalProject
-----------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((store_sales.ss_hdemo_sk = hd1.hd_demo_sk)) otherCondition=() 
build RFs:RF16 hd_demo_sk->[ss_hdemo_sk]
+----------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((store_sales.ss_store_sk = store.s_store_sk)) otherCondition=() 
build RFs:RF17 s_store_sk->[ss_store_sk]
 ------------------------PhysicalProject
---------------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((store_sales.ss_store_sk = store.s_store_sk)) otherCondition=() 
build RFs:RF15 s_store_sk->[ss_store_sk]
+--------------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((store_sales.ss_sold_date_sk = d1.d_date_sk)) otherCondition=() 
build RFs:RF16 d_date_sk->[ss_sold_date_sk]
 ----------------------------PhysicalProject
-------------------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((store_sales.ss_promo_sk = promotion.p_promo_sk)) 
otherCondition=() build RFs:RF14 p_promo_sk->[ss_promo_sk]
+------------------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((hd2.hd_income_band_sk = ib2.ib_income_band_sk)) 
otherCondition=() build RFs:RF15 ib_income_band_sk->[hd_income_band_sk]
 --------------------------------PhysicalProject
-----------------------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((customer.c_first_sales_date_sk = d2.d_date_sk)) 
otherCondition=() build RFs:RF13 d_date_sk->[c_first_sales_date_sk]
+----------------------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((customer.c_current_addr_sk = ad2.ca_address_sk)) 
otherCondition=() build RFs:RF14 ca_address_sk->[c_current_addr_sk]
 ------------------------------------PhysicalProject
---------------------------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((customer.c_current_addr_sk = ad2.ca_address_sk)) 
otherCondition=() build RFs:RF12 ca_address_sk->[c_current_addr_sk]
+--------------------------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((customer.c_current_hdemo_sk = hd2.hd_demo_sk)) 
otherCondition=() build RFs:RF13 hd_demo_sk->[c_current_hdemo_sk]
 ----------------------------------------PhysicalProject
-------------------------------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((store_sales.ss_addr_sk = ad1.ca_address_sk)) otherCondition=() 
build RFs:RF11 ca_address_sk->[ss_addr_sk]
+------------------------------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((store_sales.ss_addr_sk = ad1.ca_address_sk)) otherCondition=() 
build RFs:RF12 ca_address_sk->[ss_addr_sk]
 --------------------------------------------PhysicalProject
-----------------------------------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((customer.c_current_hdemo_sk = hd2.hd_demo_sk)) 
otherCondition=() build RFs:RF10 hd_demo_sk->[c_current_hdemo_sk]
+----------------------------------------------hashJoin[INNER_JOIN colocated] 
hashCondition=((store_sales.ss_item_sk = store_returns.sr_item_sk) and 
(store_sales.ss_ticket_number = store_returns.sr_ticket_number)) 
otherCondition=() build RFs:RF10 sr_item_sk->[i_item_sk,ss_item_sk];RF11 
sr_ticket_number->[ss_ticket_number]
 ------------------------------------------------PhysicalProject
---------------------------------------------------hashJoin[INNER_JOIN 
broadcast] hashCondition=((store_sales.ss_sold_date_sk = d1.d_date_sk)) 
otherCondition=() build RFs:RF9 d_date_sk->[ss_sold_date_sk]
-----------------------------------------------------hashJoin[INNER_JOIN 
broadcast] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) 
otherCondition=() build RFs:RF8 i_item_sk->[cr_item_sk,cs_item_sk,ss_item_sk]
+--------------------------------------------------hashJoin[INNER_JOIN 
broadcast] hashCondition=((store_sales.ss_hdemo_sk = hd1.hd_demo_sk)) 
otherCondition=() build RFs:RF9 hd_demo_sk->[ss_hdemo_sk]
+----------------------------------------------------hashJoin[INNER_JOIN 
broadcast] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) 
otherCondition=() build RFs:RF8 i_item_sk->[ss_item_sk]
 ------------------------------------------------------PhysicalProject
---------------------------------------------------------hashJoin[INNER_JOIN 
broadcast] hashCondition=((customer.c_current_cdemo_sk = cd2.cd_demo_sk)) 
otherCondition=(( not (cd_marital_status = cd_marital_status))) build RFs:RF7 
cd_demo_sk->[c_current_cdemo_sk]
+--------------------------------------------------------hashJoin[INNER_JOIN 
broadcast] hashCondition=((customer.c_first_shipto_date_sk = d3.d_date_sk)) 
otherCondition=() build RFs:RF7 d_date_sk->[c_first_shipto_date_sk]
 ----------------------------------------------------------PhysicalProject
-------------------------------------------------------------hashJoin[INNER_JOIN
 broadcast] hashCondition=((store_sales.ss_customer_sk = 
customer.c_customer_sk)) otherCondition=() build RFs:RF6 
c_customer_sk->[ss_customer_sk]
+------------------------------------------------------------hashJoin[INNER_JOIN
 broadcast] hashCondition=((store_sales.ss_cdemo_sk = cd1.cd_demo_sk)) 
otherCondition=(( not (cd_marital_status = cd_marital_status))) build RFs:RF6 
cd_demo_sk->[ss_cdemo_sk]
 --------------------------------------------------------------PhysicalProject
-----------------------------------------------------------------hashJoin[INNER_JOIN
 broadcast] hashCondition=((store_sales.ss_cdemo_sk = cd1.cd_demo_sk)) 
otherCondition=() build RFs:RF5 cd_demo_sk->[ss_cdemo_sk]
-------------------------------------------------------------------hashJoin[INNER_JOIN
 broadcast] hashCondition=((store_sales.ss_item_sk = cs_ui.cs_item_sk)) 
otherCondition=() build RFs:RF4 cs_item_sk->[ss_item_sk]
---------------------------------------------------------------------PhysicalProject
-----------------------------------------------------------------------PhysicalOlapScan[store_sales]
 apply RFs: RF4 RF5 RF6 RF8 RF9 RF11 RF14 RF15 RF16 RF18 RF19
---------------------------------------------------------------------PhysicalProject
-----------------------------------------------------------------------filter((sale
 > (2 * refund)))
-------------------------------------------------------------------------hashAgg[GLOBAL]
---------------------------------------------------------------------------PhysicalDistribute[DistributionSpecHash]
-----------------------------------------------------------------------------hashAgg[LOCAL]
-------------------------------------------------------------------------------PhysicalProject
---------------------------------------------------------------------------------hashJoin[INNER_JOIN
 colocated] hashCondition=((catalog_sales.cs_item_sk = 
catalog_returns.cr_item_sk) and (catalog_sales.cs_order_number = 
catalog_returns.cr_order_number)) otherCondition=() build RFs:RF2 
cr_item_sk->[cs_item_sk];RF3 cr_order_number->[cs_order_number]
-----------------------------------------------------------------------------------PhysicalProject
-------------------------------------------------------------------------------------PhysicalOlapScan[catalog_sales]
 apply RFs: RF2 RF3 RF8 RF18
-----------------------------------------------------------------------------------PhysicalProject
-------------------------------------------------------------------------------------PhysicalOlapScan[catalog_returns]
 apply RFs: RF8 RF18
+----------------------------------------------------------------hashJoin[INNER_JOIN
 broadcast] hashCondition=((store_sales.ss_customer_sk = 
customer.c_customer_sk)) otherCondition=() build RFs:RF5 
c_customer_sk->[ss_customer_sk]
 
------------------------------------------------------------------PhysicalProject
---------------------------------------------------------------------PhysicalOlapScan[customer_demographics]
+--------------------------------------------------------------------PhysicalOlapScan[store_sales]
 apply RFs: RF5 RF6 RF8 RF9 RF10 RF11 RF12 RF16 RF17 RF18 RF19
+------------------------------------------------------------------PhysicalProject
+--------------------------------------------------------------------hashJoin[INNER_JOIN
 broadcast] hashCondition=((customer.c_current_cdemo_sk = cd2.cd_demo_sk)) 
otherCondition=() build RFs:RF4 cd_demo_sk->[c_current_cdemo_sk]
+----------------------------------------------------------------------PhysicalProject
+------------------------------------------------------------------------hashJoin[INNER_JOIN
 broadcast] hashCondition=((customer.c_first_sales_date_sk = d2.d_date_sk)) 
otherCondition=() build RFs:RF3 d_date_sk->[c_first_sales_date_sk]
+--------------------------------------------------------------------------PhysicalProject
+----------------------------------------------------------------------------PhysicalOlapScan[customer]
 apply RFs: RF3 RF4 RF7 RF13 RF14
+--------------------------------------------------------------------------PhysicalProject
+----------------------------------------------------------------------------PhysicalOlapScan[date_dim]
+----------------------------------------------------------------------PhysicalProject
+------------------------------------------------------------------------PhysicalOlapScan[customer_demographics]
 --------------------------------------------------------------PhysicalProject
-----------------------------------------------------------------PhysicalOlapScan[customer]
 apply RFs: RF7 RF10 RF12 RF13 RF17
+----------------------------------------------------------------PhysicalOlapScan[customer_demographics]
 ----------------------------------------------------------PhysicalProject
-------------------------------------------------------------PhysicalOlapScan[customer_demographics]
+------------------------------------------------------------PhysicalOlapScan[date_dim]
 ------------------------------------------------------PhysicalProject
 
--------------------------------------------------------filter((item.i_current_price
 <= 58.00) and (item.i_current_price >= 49.00) and i_color IN ('blush', 'lace', 
'lawn', 'misty', 'orange', 'pink'))
-----------------------------------------------------------PhysicalOlapScan[item]
 apply RFs: RF18
+----------------------------------------------------------PhysicalOlapScan[item]
 apply RFs: RF10 RF19
 ----------------------------------------------------PhysicalProject
-------------------------------------------------------filter(d_year IN (1999, 
2000))
---------------------------------------------------------PhysicalOlapScan[date_dim]
+------------------------------------------------------hashJoin[INNER_JOIN 
broadcast] hashCondition=((hd1.hd_income_band_sk = ib1.ib_income_band_sk)) 
otherCondition=() build RFs:RF2 ib_income_band_sk->[hd_income_band_sk]
+--------------------------------------------------------PhysicalProject
+----------------------------------------------------------PhysicalOlapScan[household_demographics]
 apply RFs: RF2
+--------------------------------------------------------PhysicalProject
+----------------------------------------------------------PhysicalOlapScan[income_band]
 ------------------------------------------------PhysicalProject
---------------------------------------------------hashJoin[INNER_JOIN 
broadcast] hashCondition=((hd2.hd_income_band_sk = ib2.ib_income_band_sk)) 
otherCondition=() build RFs:RF1 ib_income_band_sk->[hd_income_band_sk]
-----------------------------------------------------PhysicalProject
-------------------------------------------------------PhysicalOlapScan[household_demographics]
 apply RFs: RF1
-----------------------------------------------------PhysicalProject
-------------------------------------------------------PhysicalOlapScan[income_band]
+--------------------------------------------------PhysicalOlapScan[store_returns]
 apply RFs: RF19
 --------------------------------------------PhysicalProject
 
----------------------------------------------PhysicalOlapScan[customer_address]
 ----------------------------------------PhysicalProject
-------------------------------------------PhysicalOlapScan[customer_address]
+------------------------------------------PhysicalOlapScan[household_demographics]
 apply RFs: RF15
 ------------------------------------PhysicalProject
---------------------------------------PhysicalOlapScan[date_dim]
+--------------------------------------PhysicalOlapScan[customer_address]
 --------------------------------PhysicalProject
-----------------------------------PhysicalOlapScan[promotion]
+----------------------------------PhysicalOlapScan[income_band]
 ----------------------------PhysicalProject
-------------------------------PhysicalOlapScan[store]
+------------------------------filter(d_year IN (1999, 2000))
+--------------------------------PhysicalOlapScan[date_dim]
 ------------------------PhysicalProject
---------------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((hd1.hd_income_band_sk = ib1.ib_income_band_sk)) 
otherCondition=() build RFs:RF0 ib_income_band_sk->[hd_income_band_sk]
-----------------------------PhysicalProject
-------------------------------PhysicalOlapScan[household_demographics] apply 
RFs: RF0
-----------------------------PhysicalProject
-------------------------------PhysicalOlapScan[income_band]
+--------------------------PhysicalOlapScan[store]
 --------------------PhysicalProject
-----------------------PhysicalOlapScan[date_dim]
+----------------------PhysicalOlapScan[promotion]
 ----------------PhysicalProject
-------------------PhysicalOlapScan[store_returns]
+------------------filter((sale > (2 * refund)))
+--------------------hashAgg[GLOBAL]
+----------------------PhysicalDistribute[DistributionSpecHash]
+------------------------hashAgg[LOCAL]
+--------------------------PhysicalProject
+----------------------------hashJoin[INNER_JOIN colocated] 
hashCondition=((catalog_sales.cs_item_sk = catalog_returns.cr_item_sk) and 
(catalog_sales.cs_order_number = catalog_returns.cr_order_number)) 
otherCondition=() build RFs:RF0 cr_item_sk->[cs_item_sk];RF1 
cr_order_number->[cs_order_number]
+------------------------------PhysicalProject
+--------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF0 
RF1
+------------------------------PhysicalProject
+--------------------------------PhysicalOlapScan[catalog_returns]
 --PhysicalResultSink
 ----PhysicalQuickSort[MERGE_SORT]
 ------PhysicalDistribute[DistributionSpecGather]
diff --git 
a/regression-test/data/nereids_rules_p0/infer_predicate/infer_intersect_except.out
 
b/regression-test/data/nereids_rules_p0/infer_predicate/infer_intersect_except.out
index 2609ca5f4c9..d897514ee49 100644
--- 
a/regression-test/data/nereids_rules_p0/infer_predicate/infer_intersect_except.out
+++ 
b/regression-test/data/nereids_rules_p0/infer_predicate/infer_intersect_except.out
@@ -52,17 +52,6 @@ PhysicalResultSink
 ----filter((cast(a as BIGINT) < -1))
 ------PhysicalOlapScan[infer_intersect_except2]
 
--- !except_and_intersect --
-PhysicalResultSink
---PhysicalExcept
-----filter((infer_intersect_except1.a > 0))
-------PhysicalOlapScan[infer_intersect_except1]
-----PhysicalIntersect
-------filter((infer_intersect_except2.b > 'ab'))
---------PhysicalOlapScan[infer_intersect_except2]
-------filter((infer_intersect_except3.a = 1) and (infer_intersect_except3.b = 
'abc'))
---------PhysicalOlapScan[infer_intersect_except3]
-
 -- !except_and_intersect_except_predicate_to_right --
 PhysicalResultSink
 --PhysicalExcept
diff --git a/regression-test/suites/nereids_hint_tpcds_p0/load.groovy 
b/regression-test/suites/nereids_hint_tpcds_p0/load.groovy
index 77b37a9c1b4..f4cf57113a6 100644
--- a/regression-test/suites/nereids_hint_tpcds_p0/load.groovy
+++ b/regression-test/suites/nereids_hint_tpcds_p0/load.groovy
@@ -2336,7 +2336,7 @@ suite("load") {
     """
 
     sql """
-    alter table call_center modify column cc_closed_date_sk set stats 
('row_count'='42', 'ndv'='0', 'num_nulls'='42', 'min_value'='0', 
'max_value'='179769313', 'data_size'='168')
+    alter table call_center modify column cc_closed_date_sk set stats 
('row_count'='42', 'ndv'='0', 'num_nulls'='42', 'data_size'='168')
     """
 
     sql """
diff --git a/regression-test/suites/nereids_p0/stats/invalid_stats.groovy 
b/regression-test/suites/nereids_p0/stats/invalid_stats.groovy
new file mode 100644
index 00000000000..5304cd8c2c1
--- /dev/null
+++ b/regression-test/suites/nereids_p0/stats/invalid_stats.groovy
@@ -0,0 +1,111 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("invalid_stats") {
+    // multi_sql """
+    //     set global enable_auto_analyze=false;
+    //     SET enable_nereids_planner=true;
+    //     SET enable_fallback_to_original_planner=false;
+    //     set disable_nereids_rules=PRUNE_EMPTY_PARTITION;
+
+        
+    //     drop table if exists region;
+    //     CREATE TABLE region  (
+    //     r_regionkey      int NOT NULL,
+    //     r_name       VARCHAR(25) NOT NULL,
+    //     r_comment    VARCHAR(152)
+    //     )ENGINE=OLAP
+    //     DUPLICATE KEY(`r_regionkey`)
+    //     COMMENT "OLAP"
+    //     DISTRIBUTED BY HASH(`r_regionkey`) BUCKETS 1
+    //     PROPERTIES (
+    //         "replication_num" = "1"
+    //     );
+
+    //     drop table if exists nation;
+    //      CREATE TABLE `nation` (
+    //     `n_nationkey` int(11) NOT NULL,
+    //     `n_name`      varchar(25) NOT NULL,
+    //     `n_regionkey` int(11) NOT NULL,
+    //     `n_comment`   varchar(152) NULL
+    //     ) ENGINE=OLAP
+    //     DUPLICATE KEY(`N_NATIONKEY`)
+    //     COMMENT "OLAP"
+    //     DISTRIBUTED BY HASH(`N_NATIONKEY`) BUCKETS 1
+    //     PROPERTIES (
+    //         "replication_num" = "1"
+    //     );
+    //     alter table nation modify column n_nationkey set stats ('ndv'='25', 
'num_nulls'='0', 'min_value'='0', 'max_value'='24', 'row_count'='25');
+
+    //     alter table nation modify column n_regionkey set stats ('ndv'='5', 
'num_nulls'='0', 'min_value'='0', 'max_value'='4', 'row_count'='25');
+
+    // """
+
+    // explain {
+    //     sql "select * from region"
+    //     notContains("join reorder with unknown column statistics")
+    // }
+
+    // explain {
+    //     sql "select * from region where r_regionkey=1"
+    //     contains("join reorder with unknown column statistics")
+    // }
+
+    // explain {
+    //     sql "select r_regionkey from region group by r_regionkey"
+    //     contains("join reorder with unknown column statistics")
+    // }
+
+    // explain {
+    //     sql "select r_regionkey from region join nation on 
r_regionkey=n_regionkey"
+    //     contains("join reorder with unknown column statistics")
+    // }
+
+    // sql "alter table region modify column r_regionkey set stats ('ndv'='5', 
'num_nulls'='0', 'min_value'='0', 'max_value'='4', 'row_count'='5');"
+    
+    // explain {
+    //     sql "select * from region where r_regionkey=1"
+    //     notContains("join reorder with unknown column statistics")
+    // }
+
+    // explain {
+    //     sql "select r_regionkey from region group by r_regionkey"
+    //     notContains("join reorder with unknown column statistics")
+    // }
+
+    // explain {
+    //     sql "select r_regionkey from region join nation on 
r_regionkey=n_regionkey"
+    //     notContains("join reorder with unknown column statistics")
+    // }
+
+    // explain {
+    //     sql "select r_name from region join nation on 
r_regionkey=n_regionkey"
+    //     notContains("join reorder with unknown column statistics")
+    // }
+
+    // explain {
+    //     sql """
+    //         select r_name 
+    //         from (select r_name, r_regionkey + 1 x from region) T join 
nation on T.x=n_regionkey
+    //         """
+    //     notContains("join reorder with unknown column statistics")
+    // }
+}
+// disable jo: alter table region modify column r_regionkey set stats 
('ndv'='0', 'num_nulls'='0', 'min_value'='0', 'max_value'='4', 'row_count'='0');
+// disable jo: alter table region modify column r_regionkey set stats 
('ndv'='11', 'num_nulls'='0', 'min_value'='0', 'max_value'='4', 
'row_count'='1');
+
+// alter table region modify column r_regionkey set stats ('ndv'='10', 
'num_nulls'='0', 'min_value'='0', 'max_value'='4', 'row_count'='1');
diff --git 
a/regression-test/suites/nereids_rules_p0/eager_aggregate/basic.groovy 
b/regression-test/suites/nereids_rules_p0/eager_aggregate/basic.groovy
index 117d0c01f24..e37e9a65da1 100644
--- a/regression-test/suites/nereids_rules_p0/eager_aggregate/basic.groovy
+++ b/regression-test/suites/nereids_rules_p0/eager_aggregate/basic.groovy
@@ -15,14 +15,14 @@
 // specific language governing permissions and limitations
 // under the License.
 
-suite("eager_aggregate_basic") {
+suite("basic") {
     sql "SET enable_nereids_planner=true"
     sql "set runtime_filter_mode=OFF"
     sql "SET enable_fallback_to_original_planner=false"
     sql "SET ignore_shape_nodes='PhysicalDistribute,PhysicalProject'"
     
     sql "set disable_nereids_rules=PRUNE_EMPTY_PARTITION"
-
+    sql "set disable_join_reorder=true;"
     sql """
         DROP TABLE IF EXISTS shunt_log_com_dd_library;
     """
diff --git 
a/regression-test/suites/nereids_rules_p0/eager_aggregate/basic_one_side.groovy 
b/regression-test/suites/nereids_rules_p0/eager_aggregate/basic_one_side.groovy
index f3e6f593aa9..78503039d22 100644
--- 
a/regression-test/suites/nereids_rules_p0/eager_aggregate/basic_one_side.groovy
+++ 
b/regression-test/suites/nereids_rules_p0/eager_aggregate/basic_one_side.groovy
@@ -15,13 +15,14 @@
 // specific language governing permissions and limitations
 // under the License.
 
-suite("eager_aggregate_basic_one_side") {
+suite("basic_one_side") {
     sql "SET enable_nereids_planner=true"
     sql "set runtime_filter_mode=OFF"
     sql "SET enable_fallback_to_original_planner=false"
     sql "SET ignore_shape_nodes='PhysicalDistribute,PhysicalProject'"
 
     sql "set disable_nereids_rules=PRUNE_EMPTY_PARTITION"
+    sql "set disable_join_reorder=true"
 
     sql """
         DROP TABLE IF EXISTS shunt_log_com_dd_library_one_side;
diff --git 
a/regression-test/suites/nereids_rules_p0/infer_predicate/infer_intersect_except.groovy
 
b/regression-test/suites/nereids_rules_p0/infer_predicate/infer_intersect_except.groovy
index fb8ef0a75f9..aec3f3384e4 100644
--- 
a/regression-test/suites/nereids_rules_p0/infer_predicate/infer_intersect_except.groovy
+++ 
b/regression-test/suites/nereids_rules_p0/infer_predicate/infer_intersect_except.groovy
@@ -115,10 +115,28 @@ suite("infer_intersect_except") {
     select a+1,b from infer_intersect_except1 where a>0 intersect select a+1,b 
from infer_intersect_except2 where a+1<0;
     """
 
-    qt_except_and_intersect """
-    explain shape plan
-    select a,b from infer_intersect_except1 where a>0 except select 1,'abc' 
from infer_intersect_except2 where b>'ab' intersect select a,b from 
infer_intersect_except3 where a<10;
-    """
+    explain {
+        sql """
+            shape plan
+            select a,b from infer_intersect_except1 where a > 0
+            except
+            select 1, 'abc' from infer_intersect_except2 where b > 'ab'
+            intersect
+            select a, b from infer_intersect_except3 where a < 10;
+            """
+        notContains("a < 10")
+        contains("(infer_intersect_except3.a = 1) and 
(infer_intersect_except3.b = 'abc')")
+// PhysicalResultSink
+// --PhysicalExcept
+// ----filter((infer_intersect_except1.a > 0))
+// ------PhysicalOlapScan[infer_intersect_except1]
+// ----PhysicalIntersect
+// ------filter((infer_intersect_except3.a = 1) and (infer_intersect_except3.b 
= 'abc'))
+// --------PhysicalOlapScan[infer_intersect_except3]
+// ------filter((infer_intersect_except2.b > 'ab'))
+// --------PhysicalOlapScan[infer_intersect_except2]
+    }
+
 
     qt_except_and_intersect_except_predicate_to_right """
     explain shape plan
diff --git a/regression-test/suites/nereids_tpcds_shape_sf1000_p0/load.groovy 
b/regression-test/suites/nereids_tpcds_shape_sf1000_p0/load.groovy
index b868aab6130..1a5c3980e84 100644
--- a/regression-test/suites/nereids_tpcds_shape_sf1000_p0/load.groovy
+++ b/regression-test/suites/nereids_tpcds_shape_sf1000_p0/load.groovy
@@ -2336,7 +2336,7 @@ suite("load") {
     """
 
     sql """
-    alter table call_center modify column cc_closed_date_sk set stats 
('row_count'='42', 'ndv'='0', 'num_nulls'='42', 'min_value'='0', 
'max_value'='179769313', 'data_size'='168')
+    alter table call_center modify column cc_closed_date_sk set stats 
('row_count'='42', 'ndv'='0', 'num_nulls'='42', 'data_size'='168')
     """
 
     sql """
diff --git 
a/regression-test/suites/nereids_tpcds_shape_sf100_p0/constraints/load.groovy 
b/regression-test/suites/nereids_tpcds_shape_sf100_p0/constraints/load.groovy
index 3020a285e63..b2dca961e4d 100644
--- 
a/regression-test/suites/nereids_tpcds_shape_sf100_p0/constraints/load.groovy
+++ 
b/regression-test/suites/nereids_tpcds_shape_sf100_p0/constraints/load.groovy
@@ -2340,7 +2340,7 @@ suite("load") {
     """
 
     sql """
-    alter table call_center modify column cc_closed_date_sk set stats 
('row_count'='42', 'ndv'='0', 'num_nulls'='42', 'min_value'='0', 
'max_value'='179769313', 'data_size'='168')
+    alter table call_center modify column cc_closed_date_sk set stats 
('row_count'='42', 'ndv'='0', 'num_nulls'='42', 'data_size'='168')
     """
 
     sql """
diff --git a/regression-test/suites/nereids_tpcds_shape_sf100_p0/load.groovy 
b/regression-test/suites/nereids_tpcds_shape_sf100_p0/load.groovy
index 120ebf97298..48ad6bda53d 100644
--- a/regression-test/suites/nereids_tpcds_shape_sf100_p0/load.groovy
+++ b/regression-test/suites/nereids_tpcds_shape_sf100_p0/load.groovy
@@ -1299,7 +1299,7 @@ alter table web_page modify column wp_max_ad_count set 
stats ('row_count'='2040'
 """
 
 sql """
-alter table call_center modify column cc_closed_date_sk set stats 
('row_count'='30', 'ndv'='0', 'min_value'='2415022', 'max_value'='2488070', 
'avg_size'='120', 'max_size'='120' )
+alter table call_center modify column cc_closed_date_sk set stats 
('row_count'='30', 'ndv'='0', 'num_nulls'='42', 'avg_size'='120', 
'max_size'='120' )
 """
 
 sql """
@@ -2018,10 +2018,6 @@ sql """
 alter table ship_mode modify column sm_contract set stats ('row_count'='20', 
'ndv'='20', 'min_value'='2mM8l', 'max_value'='yVfotg7Tio3MVhBg6Bkn', 
'avg_size'='252', 'max_size'='252' )
 """
 
-sql """
-alter table call_center modify column cc_closed_date_sk set stats 
('row_count'='30', 'ndv'='0', 'min_value'='0', 'max_value'='0', 
'avg_size'='120', 'max_size'='120' )
-"""
-
 sql """
 alter table customer_address modify column ca_zip set stats 
('row_count'='1000000', 'ndv'='7733', 'min_value'='', 'max_value'='99981', 
'avg_size'='4848150', 'max_size'='4848150' )
 """
diff --git 
a/regression-test/suites/new_shapes_p0/tpcds_sf100/constraints/load.groovy 
b/regression-test/suites/new_shapes_p0/tpcds_sf100/constraints/load.groovy
index 8b4fdeee69a..1ed3ebba10e 100644
--- a/regression-test/suites/new_shapes_p0/tpcds_sf100/constraints/load.groovy
+++ b/regression-test/suites/new_shapes_p0/tpcds_sf100/constraints/load.groovy
@@ -2343,7 +2343,7 @@ suite("load") {
     """
 
     sql """
-    alter table call_center modify column cc_closed_date_sk set stats 
('row_count'='42', 'ndv'='0', 'num_nulls'='42', 'min_value'='0', 
'max_value'='179769313', 'data_size'='168')
+    alter table call_center modify column cc_closed_date_sk set stats 
('row_count'='42', 'ndv'='0', 'num_nulls'='42', 'data_size'='168')
     """
 
     sql """
diff --git a/regression-test/suites/new_shapes_p0/tpcds_sf100/load.groovy 
b/regression-test/suites/new_shapes_p0/tpcds_sf100/load.groovy
index 23ce70d931e..4b99eafdea9 100644
--- a/regression-test/suites/new_shapes_p0/tpcds_sf100/load.groovy
+++ b/regression-test/suites/new_shapes_p0/tpcds_sf100/load.groovy
@@ -1302,7 +1302,7 @@ alter table web_page modify column wp_max_ad_count set 
stats ('row_count'='2040'
 """
 
 sql """
-alter table call_center modify column cc_closed_date_sk set stats 
('row_count'='30', 'ndv'='0', 'min_value'='2415022', 'max_value'='2488070', 
'avg_size'='120', 'max_size'='120' )
+alter table call_center modify column cc_closed_date_sk set stats 
('row_count'='30', 'ndv'='0', 'num_nulls'='42', 'avg_size'='120', 
'max_size'='120' )
 """
 
 sql """
@@ -2021,10 +2021,6 @@ sql """
 alter table ship_mode modify column sm_contract set stats ('row_count'='20', 
'ndv'='20', 'min_value'='2mM8l', 'max_value'='yVfotg7Tio3MVhBg6Bkn', 
'avg_size'='252', 'max_size'='252' )
 """
 
-sql """
-alter table call_center modify column cc_closed_date_sk set stats 
('row_count'='30', 'ndv'='0', 'min_value'='0', 'max_value'='0', 
'avg_size'='120', 'max_size'='120' )
-"""
-
 sql """
 alter table customer_address modify column ca_zip set stats 
('row_count'='1000000', 'ndv'='7733', 'min_value'='', 'max_value'='99981', 
'avg_size'='4848150', 'max_size'='4848150' )
 """
diff --git a/regression-test/suites/new_shapes_p0/tpcds_sf1000/load.groovy 
b/regression-test/suites/new_shapes_p0/tpcds_sf1000/load.groovy
index bc28fdde8c0..14c11d3ea84 100644
--- a/regression-test/suites/new_shapes_p0/tpcds_sf1000/load.groovy
+++ b/regression-test/suites/new_shapes_p0/tpcds_sf1000/load.groovy
@@ -2339,7 +2339,7 @@ suite("load") {
     """
 
     sql """
-    alter table call_center modify column cc_closed_date_sk set stats 
('row_count'='42', 'ndv'='0', 'num_nulls'='42', 'min_value'='0', 
'max_value'='179769313', 'data_size'='168')
+    alter table call_center modify column cc_closed_date_sk set stats 
('row_count'='42', 'ndv'='0', 'num_nulls'='42', 'data_size'='168')
     """
 
     sql """


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org


Reply via email to