This is an automated email from the ASF dual-hosted git repository. englefly pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new e238a8705f2 [feat](nereids)disable join reorder if column stats is invalid (#41790) e238a8705f2 is described below commit e238a8705f25e2b5bc5a9e95b2025712f98dd682 Author: minghong <engle...@gmail.com> AuthorDate: Mon Oct 28 09:48:30 2024 +0800 [feat](nereids)disable join reorder if column stats is invalid (#41790) ## Proposed changes disable join reorder if any condition is matched: 1. any table row count is -1 2. any column, whose ndv is 0, but MinExpr or MaxExpr is not null 3. ndv > 10* rowCount Issue Number: close #xxx <!--Describe your changes.--> --- .../org/apache/doris/nereids/NereidsPlanner.java | 4 +- .../org/apache/doris/nereids/StatementContext.java | 10 ++ .../doris/nereids/stats/StatsCalculator.java | 46 +++++++-- .../data/nereids_hint_tpcds_p0/shape/query64.out | 101 +++++++++---------- .../infer_predicate/infer_intersect_except.out | 11 -- .../suites/nereids_hint_tpcds_p0/load.groovy | 2 +- .../suites/nereids_p0/stats/invalid_stats.groovy | 111 +++++++++++++++++++++ .../nereids_rules_p0/eager_aggregate/basic.groovy | 4 +- .../eager_aggregate/basic_one_side.groovy | 3 +- .../infer_predicate/infer_intersect_except.groovy | 26 ++++- .../nereids_tpcds_shape_sf1000_p0/load.groovy | 2 +- .../constraints/load.groovy | 2 +- .../nereids_tpcds_shape_sf100_p0/load.groovy | 6 +- .../tpcds_sf100/constraints/load.groovy | 2 +- .../suites/new_shapes_p0/tpcds_sf100/load.groovy | 6 +- .../suites/new_shapes_p0/tpcds_sf1000/load.groovy | 2 +- 16 files changed, 247 insertions(+), 91 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/NereidsPlanner.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/NereidsPlanner.java index 4acae7164f8..7ea92ee73b3 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/NereidsPlanner.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/NereidsPlanner.java @@ -274,7 +274,9 @@ public class NereidsPlanner extends Planner { && !cascadesContext.isLeadingDisableJoinReorder()) { List<LogicalOlapScan> scans = cascadesContext.getRewritePlan() .collectToList(LogicalOlapScan.class::isInstance); - StatsCalculator.disableJoinReorderIfTableRowCountNotAvailable(scans, cascadesContext); + Optional<String> disableJoinReorderReason = StatsCalculator + .disableJoinReorderIfStatsInvalid(scans, cascadesContext); + disableJoinReorderReason.ifPresent(statementContext::setDisableJoinReorderReason); } optimize(); diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/StatementContext.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/StatementContext.java index 08e1e3fa815..ed64864da50 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/StatementContext.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/StatementContext.java @@ -169,6 +169,8 @@ public class StatementContext implements Closeable { private List<PlannerHook> plannerHooks = new ArrayList<>(); + private String disableJoinReorderReason; + public StatementContext() { this(ConnectContext.get(), null, 0); } @@ -558,4 +560,12 @@ public class StatementContext implements Closeable { this.tableIdMapping.put(tableIdentifier, tableId); return tableId; } + + public Optional<String> getDisableJoinReorderReason() { + return Optional.ofNullable(disableJoinReorderReason); + } + + public void setDisableJoinReorderReason(String disableJoinReorderReason) { + this.disableJoinReorderReason = disableJoinReorderReason; + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java index 2e159ff8b68..d4a9a81fc07 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java @@ -221,24 +221,40 @@ public class StatsCalculator extends DefaultPlanVisitor<Statistics, Void> { } /** - * disable join reorder if any table row count is not available. + * disable join reorder if + * 1. any table rowCount is not available, or + * 2. col stats ndv=0 but minExpr or maxExpr is not null + * 3. ndv > 10 * rowCount */ - public static void disableJoinReorderIfTableRowCountNotAvailable( - List<LogicalOlapScan> scans, CascadesContext context) { + public static Optional<String> disableJoinReorderIfStatsInvalid(List<LogicalOlapScan> scans, + CascadesContext context) { StatsCalculator calculator = new StatsCalculator(context); + if (ConnectContext.get() == null) { + // ut case + return Optional.empty(); + } for (LogicalOlapScan scan : scans) { double rowCount = calculator.getOlapTableRowCount(scan); - if (rowCount == -1 && ConnectContext.get() != null) { + // row count not available + if (rowCount == -1) { + LOG.info("disable join reorder since row count not available: " + + scan.getTable().getNameWithFullQualifiers()); + return Optional.of("table[" + scan.getTable().getName() + "] row count is invalid"); + } + // ndv abnormal + Optional<String> reason = calculator.checkNdvValidation(scan, rowCount); + if (reason.isPresent()) { try { ConnectContext.get().getSessionVariable().disableNereidsJoinReorderOnce(); - LOG.info("disable join reorder since row count not available: " - + scan.getTable().getNameWithFullQualifiers()); + LOG.info("disable join reorder since col stats invalid: " + + reason.get()); } catch (Exception e) { LOG.info("disableNereidsJoinReorderOnce failed"); } - return; + return reason; } } + return Optional.empty(); } /** @@ -412,6 +428,22 @@ public class StatsCalculator extends DefaultPlanVisitor<Statistics, Void> { return rowCount; } + // check validation of ndv. + private Optional<String> checkNdvValidation(OlapScan olapScan, double rowCount) { + for (Slot slot : ((Plan) olapScan).getOutput()) { + if (isVisibleSlotReference(slot)) { + ColumnStatistic cache = getColumnStatsFromTableCache((CatalogRelation) olapScan, (SlotReference) slot); + if (!cache.isUnKnown) { + if ((cache.ndv == 0 && (cache.minExpr != null || cache.maxExpr != null)) + || cache.ndv > rowCount * 10) { + return Optional.of("slot " + slot.getName() + " has invalid column stats: " + cache); + } + } + } + } + return Optional.empty(); + } + private Statistics computeOlapScan(OlapScan olapScan) { OlapTable olapTable = olapScan.getTable(); double tableRowCount = getOlapTableRowCount(olapScan); diff --git a/regression-test/data/nereids_hint_tpcds_p0/shape/query64.out b/regression-test/data/nereids_hint_tpcds_p0/shape/query64.out index 1338812312e..26a67aa0d6e 100644 --- a/regression-test/data/nereids_hint_tpcds_p0/shape/query64.out +++ b/regression-test/data/nereids_hint_tpcds_p0/shape/query64.out @@ -7,84 +7,85 @@ PhysicalCteAnchor ( cteId=CTEId#1 ) --------PhysicalDistribute[DistributionSpecHash] ----------hashAgg[LOCAL] ------------PhysicalProject ---------------hashJoin[INNER_JOIN colocated] hashCondition=((store_sales.ss_item_sk = store_returns.sr_item_sk) and (store_sales.ss_ticket_number = store_returns.sr_ticket_number)) otherCondition=() build RFs:RF18 sr_item_sk->[cr_item_sk,cs_item_sk,i_item_sk,ss_item_sk];RF19 sr_ticket_number->[ss_ticket_number] +--------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_item_sk = cs_ui.cs_item_sk)) otherCondition=() build RFs:RF19 cs_item_sk->[i_item_sk,sr_item_sk,ss_item_sk] ----------------PhysicalProject -------------------hashJoin[INNER_JOIN broadcast] hashCondition=((customer.c_first_shipto_date_sk = d3.d_date_sk)) otherCondition=() build RFs:RF17 d_date_sk->[c_first_shipto_date_sk] +------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_promo_sk = promotion.p_promo_sk)) otherCondition=() build RFs:RF18 p_promo_sk->[ss_promo_sk] --------------------PhysicalProject -----------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_hdemo_sk = hd1.hd_demo_sk)) otherCondition=() build RFs:RF16 hd_demo_sk->[ss_hdemo_sk] +----------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_store_sk = store.s_store_sk)) otherCondition=() build RFs:RF17 s_store_sk->[ss_store_sk] ------------------------PhysicalProject ---------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_store_sk = store.s_store_sk)) otherCondition=() build RFs:RF15 s_store_sk->[ss_store_sk] +--------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = d1.d_date_sk)) otherCondition=() build RFs:RF16 d_date_sk->[ss_sold_date_sk] ----------------------------PhysicalProject -------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_promo_sk = promotion.p_promo_sk)) otherCondition=() build RFs:RF14 p_promo_sk->[ss_promo_sk] +------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((hd2.hd_income_band_sk = ib2.ib_income_band_sk)) otherCondition=() build RFs:RF15 ib_income_band_sk->[hd_income_band_sk] --------------------------------PhysicalProject -----------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((customer.c_first_sales_date_sk = d2.d_date_sk)) otherCondition=() build RFs:RF13 d_date_sk->[c_first_sales_date_sk] +----------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((customer.c_current_addr_sk = ad2.ca_address_sk)) otherCondition=() build RFs:RF14 ca_address_sk->[c_current_addr_sk] ------------------------------------PhysicalProject ---------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((customer.c_current_addr_sk = ad2.ca_address_sk)) otherCondition=() build RFs:RF12 ca_address_sk->[c_current_addr_sk] +--------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((customer.c_current_hdemo_sk = hd2.hd_demo_sk)) otherCondition=() build RFs:RF13 hd_demo_sk->[c_current_hdemo_sk] ----------------------------------------PhysicalProject -------------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_addr_sk = ad1.ca_address_sk)) otherCondition=() build RFs:RF11 ca_address_sk->[ss_addr_sk] +------------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_addr_sk = ad1.ca_address_sk)) otherCondition=() build RFs:RF12 ca_address_sk->[ss_addr_sk] --------------------------------------------PhysicalProject -----------------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((customer.c_current_hdemo_sk = hd2.hd_demo_sk)) otherCondition=() build RFs:RF10 hd_demo_sk->[c_current_hdemo_sk] +----------------------------------------------hashJoin[INNER_JOIN colocated] hashCondition=((store_sales.ss_item_sk = store_returns.sr_item_sk) and (store_sales.ss_ticket_number = store_returns.sr_ticket_number)) otherCondition=() build RFs:RF10 sr_item_sk->[i_item_sk,ss_item_sk];RF11 sr_ticket_number->[ss_ticket_number] ------------------------------------------------PhysicalProject ---------------------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = d1.d_date_sk)) otherCondition=() build RFs:RF9 d_date_sk->[ss_sold_date_sk] -----------------------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF8 i_item_sk->[cr_item_sk,cs_item_sk,ss_item_sk] +--------------------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_hdemo_sk = hd1.hd_demo_sk)) otherCondition=() build RFs:RF9 hd_demo_sk->[ss_hdemo_sk] +----------------------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF8 i_item_sk->[ss_item_sk] ------------------------------------------------------PhysicalProject ---------------------------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((customer.c_current_cdemo_sk = cd2.cd_demo_sk)) otherCondition=(( not (cd_marital_status = cd_marital_status))) build RFs:RF7 cd_demo_sk->[c_current_cdemo_sk] +--------------------------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((customer.c_first_shipto_date_sk = d3.d_date_sk)) otherCondition=() build RFs:RF7 d_date_sk->[c_first_shipto_date_sk] ----------------------------------------------------------PhysicalProject -------------------------------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF6 c_customer_sk->[ss_customer_sk] +------------------------------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_cdemo_sk = cd1.cd_demo_sk)) otherCondition=(( not (cd_marital_status = cd_marital_status))) build RFs:RF6 cd_demo_sk->[ss_cdemo_sk] --------------------------------------------------------------PhysicalProject -----------------------------------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_cdemo_sk = cd1.cd_demo_sk)) otherCondition=() build RFs:RF5 cd_demo_sk->[ss_cdemo_sk] -------------------------------------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_item_sk = cs_ui.cs_item_sk)) otherCondition=() build RFs:RF4 cs_item_sk->[ss_item_sk] ---------------------------------------------------------------------PhysicalProject -----------------------------------------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF4 RF5 RF6 RF8 RF9 RF11 RF14 RF15 RF16 RF18 RF19 ---------------------------------------------------------------------PhysicalProject -----------------------------------------------------------------------filter((sale > (2 * refund))) -------------------------------------------------------------------------hashAgg[GLOBAL] ---------------------------------------------------------------------------PhysicalDistribute[DistributionSpecHash] -----------------------------------------------------------------------------hashAgg[LOCAL] -------------------------------------------------------------------------------PhysicalProject ---------------------------------------------------------------------------------hashJoin[INNER_JOIN colocated] hashCondition=((catalog_sales.cs_item_sk = catalog_returns.cr_item_sk) and (catalog_sales.cs_order_number = catalog_returns.cr_order_number)) otherCondition=() build RFs:RF2 cr_item_sk->[cs_item_sk];RF3 cr_order_number->[cs_order_number] -----------------------------------------------------------------------------------PhysicalProject -------------------------------------------------------------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF2 RF3 RF8 RF18 -----------------------------------------------------------------------------------PhysicalProject -------------------------------------------------------------------------------------PhysicalOlapScan[catalog_returns] apply RFs: RF8 RF18 +----------------------------------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF5 c_customer_sk->[ss_customer_sk] ------------------------------------------------------------------PhysicalProject ---------------------------------------------------------------------PhysicalOlapScan[customer_demographics] +--------------------------------------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF5 RF6 RF8 RF9 RF10 RF11 RF12 RF16 RF17 RF18 RF19 +------------------------------------------------------------------PhysicalProject +--------------------------------------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((customer.c_current_cdemo_sk = cd2.cd_demo_sk)) otherCondition=() build RFs:RF4 cd_demo_sk->[c_current_cdemo_sk] +----------------------------------------------------------------------PhysicalProject +------------------------------------------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((customer.c_first_sales_date_sk = d2.d_date_sk)) otherCondition=() build RFs:RF3 d_date_sk->[c_first_sales_date_sk] +--------------------------------------------------------------------------PhysicalProject +----------------------------------------------------------------------------PhysicalOlapScan[customer] apply RFs: RF3 RF4 RF7 RF13 RF14 +--------------------------------------------------------------------------PhysicalProject +----------------------------------------------------------------------------PhysicalOlapScan[date_dim] +----------------------------------------------------------------------PhysicalProject +------------------------------------------------------------------------PhysicalOlapScan[customer_demographics] --------------------------------------------------------------PhysicalProject -----------------------------------------------------------------PhysicalOlapScan[customer] apply RFs: RF7 RF10 RF12 RF13 RF17 +----------------------------------------------------------------PhysicalOlapScan[customer_demographics] ----------------------------------------------------------PhysicalProject -------------------------------------------------------------PhysicalOlapScan[customer_demographics] +------------------------------------------------------------PhysicalOlapScan[date_dim] ------------------------------------------------------PhysicalProject --------------------------------------------------------filter((item.i_current_price <= 58.00) and (item.i_current_price >= 49.00) and i_color IN ('blush', 'lace', 'lawn', 'misty', 'orange', 'pink')) -----------------------------------------------------------PhysicalOlapScan[item] apply RFs: RF18 +----------------------------------------------------------PhysicalOlapScan[item] apply RFs: RF10 RF19 ----------------------------------------------------PhysicalProject -------------------------------------------------------filter(d_year IN (1999, 2000)) ---------------------------------------------------------PhysicalOlapScan[date_dim] +------------------------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((hd1.hd_income_band_sk = ib1.ib_income_band_sk)) otherCondition=() build RFs:RF2 ib_income_band_sk->[hd_income_band_sk] +--------------------------------------------------------PhysicalProject +----------------------------------------------------------PhysicalOlapScan[household_demographics] apply RFs: RF2 +--------------------------------------------------------PhysicalProject +----------------------------------------------------------PhysicalOlapScan[income_band] ------------------------------------------------PhysicalProject ---------------------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((hd2.hd_income_band_sk = ib2.ib_income_band_sk)) otherCondition=() build RFs:RF1 ib_income_band_sk->[hd_income_band_sk] -----------------------------------------------------PhysicalProject -------------------------------------------------------PhysicalOlapScan[household_demographics] apply RFs: RF1 -----------------------------------------------------PhysicalProject -------------------------------------------------------PhysicalOlapScan[income_band] +--------------------------------------------------PhysicalOlapScan[store_returns] apply RFs: RF19 --------------------------------------------PhysicalProject ----------------------------------------------PhysicalOlapScan[customer_address] ----------------------------------------PhysicalProject -------------------------------------------PhysicalOlapScan[customer_address] +------------------------------------------PhysicalOlapScan[household_demographics] apply RFs: RF15 ------------------------------------PhysicalProject ---------------------------------------PhysicalOlapScan[date_dim] +--------------------------------------PhysicalOlapScan[customer_address] --------------------------------PhysicalProject -----------------------------------PhysicalOlapScan[promotion] +----------------------------------PhysicalOlapScan[income_band] ----------------------------PhysicalProject -------------------------------PhysicalOlapScan[store] +------------------------------filter(d_year IN (1999, 2000)) +--------------------------------PhysicalOlapScan[date_dim] ------------------------PhysicalProject ---------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((hd1.hd_income_band_sk = ib1.ib_income_band_sk)) otherCondition=() build RFs:RF0 ib_income_band_sk->[hd_income_band_sk] -----------------------------PhysicalProject -------------------------------PhysicalOlapScan[household_demographics] apply RFs: RF0 -----------------------------PhysicalProject -------------------------------PhysicalOlapScan[income_band] +--------------------------PhysicalOlapScan[store] --------------------PhysicalProject -----------------------PhysicalOlapScan[date_dim] +----------------------PhysicalOlapScan[promotion] ----------------PhysicalProject -------------------PhysicalOlapScan[store_returns] +------------------filter((sale > (2 * refund))) +--------------------hashAgg[GLOBAL] +----------------------PhysicalDistribute[DistributionSpecHash] +------------------------hashAgg[LOCAL] +--------------------------PhysicalProject +----------------------------hashJoin[INNER_JOIN colocated] hashCondition=((catalog_sales.cs_item_sk = catalog_returns.cr_item_sk) and (catalog_sales.cs_order_number = catalog_returns.cr_order_number)) otherCondition=() build RFs:RF0 cr_item_sk->[cs_item_sk];RF1 cr_order_number->[cs_order_number] +------------------------------PhysicalProject +--------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF0 RF1 +------------------------------PhysicalProject +--------------------------------PhysicalOlapScan[catalog_returns] --PhysicalResultSink ----PhysicalQuickSort[MERGE_SORT] ------PhysicalDistribute[DistributionSpecGather] diff --git a/regression-test/data/nereids_rules_p0/infer_predicate/infer_intersect_except.out b/regression-test/data/nereids_rules_p0/infer_predicate/infer_intersect_except.out index 2609ca5f4c9..d897514ee49 100644 --- a/regression-test/data/nereids_rules_p0/infer_predicate/infer_intersect_except.out +++ b/regression-test/data/nereids_rules_p0/infer_predicate/infer_intersect_except.out @@ -52,17 +52,6 @@ PhysicalResultSink ----filter((cast(a as BIGINT) < -1)) ------PhysicalOlapScan[infer_intersect_except2] --- !except_and_intersect -- -PhysicalResultSink ---PhysicalExcept -----filter((infer_intersect_except1.a > 0)) -------PhysicalOlapScan[infer_intersect_except1] -----PhysicalIntersect -------filter((infer_intersect_except2.b > 'ab')) ---------PhysicalOlapScan[infer_intersect_except2] -------filter((infer_intersect_except3.a = 1) and (infer_intersect_except3.b = 'abc')) ---------PhysicalOlapScan[infer_intersect_except3] - -- !except_and_intersect_except_predicate_to_right -- PhysicalResultSink --PhysicalExcept diff --git a/regression-test/suites/nereids_hint_tpcds_p0/load.groovy b/regression-test/suites/nereids_hint_tpcds_p0/load.groovy index 77b37a9c1b4..f4cf57113a6 100644 --- a/regression-test/suites/nereids_hint_tpcds_p0/load.groovy +++ b/regression-test/suites/nereids_hint_tpcds_p0/load.groovy @@ -2336,7 +2336,7 @@ suite("load") { """ sql """ - alter table call_center modify column cc_closed_date_sk set stats ('row_count'='42', 'ndv'='0', 'num_nulls'='42', 'min_value'='0', 'max_value'='179769313', 'data_size'='168') + alter table call_center modify column cc_closed_date_sk set stats ('row_count'='42', 'ndv'='0', 'num_nulls'='42', 'data_size'='168') """ sql """ diff --git a/regression-test/suites/nereids_p0/stats/invalid_stats.groovy b/regression-test/suites/nereids_p0/stats/invalid_stats.groovy new file mode 100644 index 00000000000..5304cd8c2c1 --- /dev/null +++ b/regression-test/suites/nereids_p0/stats/invalid_stats.groovy @@ -0,0 +1,111 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("invalid_stats") { + // multi_sql """ + // set global enable_auto_analyze=false; + // SET enable_nereids_planner=true; + // SET enable_fallback_to_original_planner=false; + // set disable_nereids_rules=PRUNE_EMPTY_PARTITION; + + + // drop table if exists region; + // CREATE TABLE region ( + // r_regionkey int NOT NULL, + // r_name VARCHAR(25) NOT NULL, + // r_comment VARCHAR(152) + // )ENGINE=OLAP + // DUPLICATE KEY(`r_regionkey`) + // COMMENT "OLAP" + // DISTRIBUTED BY HASH(`r_regionkey`) BUCKETS 1 + // PROPERTIES ( + // "replication_num" = "1" + // ); + + // drop table if exists nation; + // CREATE TABLE `nation` ( + // `n_nationkey` int(11) NOT NULL, + // `n_name` varchar(25) NOT NULL, + // `n_regionkey` int(11) NOT NULL, + // `n_comment` varchar(152) NULL + // ) ENGINE=OLAP + // DUPLICATE KEY(`N_NATIONKEY`) + // COMMENT "OLAP" + // DISTRIBUTED BY HASH(`N_NATIONKEY`) BUCKETS 1 + // PROPERTIES ( + // "replication_num" = "1" + // ); + // alter table nation modify column n_nationkey set stats ('ndv'='25', 'num_nulls'='0', 'min_value'='0', 'max_value'='24', 'row_count'='25'); + + // alter table nation modify column n_regionkey set stats ('ndv'='5', 'num_nulls'='0', 'min_value'='0', 'max_value'='4', 'row_count'='25'); + + // """ + + // explain { + // sql "select * from region" + // notContains("join reorder with unknown column statistics") + // } + + // explain { + // sql "select * from region where r_regionkey=1" + // contains("join reorder with unknown column statistics") + // } + + // explain { + // sql "select r_regionkey from region group by r_regionkey" + // contains("join reorder with unknown column statistics") + // } + + // explain { + // sql "select r_regionkey from region join nation on r_regionkey=n_regionkey" + // contains("join reorder with unknown column statistics") + // } + + // sql "alter table region modify column r_regionkey set stats ('ndv'='5', 'num_nulls'='0', 'min_value'='0', 'max_value'='4', 'row_count'='5');" + + // explain { + // sql "select * from region where r_regionkey=1" + // notContains("join reorder with unknown column statistics") + // } + + // explain { + // sql "select r_regionkey from region group by r_regionkey" + // notContains("join reorder with unknown column statistics") + // } + + // explain { + // sql "select r_regionkey from region join nation on r_regionkey=n_regionkey" + // notContains("join reorder with unknown column statistics") + // } + + // explain { + // sql "select r_name from region join nation on r_regionkey=n_regionkey" + // notContains("join reorder with unknown column statistics") + // } + + // explain { + // sql """ + // select r_name + // from (select r_name, r_regionkey + 1 x from region) T join nation on T.x=n_regionkey + // """ + // notContains("join reorder with unknown column statistics") + // } +} +// disable jo: alter table region modify column r_regionkey set stats ('ndv'='0', 'num_nulls'='0', 'min_value'='0', 'max_value'='4', 'row_count'='0'); +// disable jo: alter table region modify column r_regionkey set stats ('ndv'='11', 'num_nulls'='0', 'min_value'='0', 'max_value'='4', 'row_count'='1'); + +// alter table region modify column r_regionkey set stats ('ndv'='10', 'num_nulls'='0', 'min_value'='0', 'max_value'='4', 'row_count'='1'); diff --git a/regression-test/suites/nereids_rules_p0/eager_aggregate/basic.groovy b/regression-test/suites/nereids_rules_p0/eager_aggregate/basic.groovy index 117d0c01f24..e37e9a65da1 100644 --- a/regression-test/suites/nereids_rules_p0/eager_aggregate/basic.groovy +++ b/regression-test/suites/nereids_rules_p0/eager_aggregate/basic.groovy @@ -15,14 +15,14 @@ // specific language governing permissions and limitations // under the License. -suite("eager_aggregate_basic") { +suite("basic") { sql "SET enable_nereids_planner=true" sql "set runtime_filter_mode=OFF" sql "SET enable_fallback_to_original_planner=false" sql "SET ignore_shape_nodes='PhysicalDistribute,PhysicalProject'" sql "set disable_nereids_rules=PRUNE_EMPTY_PARTITION" - + sql "set disable_join_reorder=true;" sql """ DROP TABLE IF EXISTS shunt_log_com_dd_library; """ diff --git a/regression-test/suites/nereids_rules_p0/eager_aggregate/basic_one_side.groovy b/regression-test/suites/nereids_rules_p0/eager_aggregate/basic_one_side.groovy index f3e6f593aa9..78503039d22 100644 --- a/regression-test/suites/nereids_rules_p0/eager_aggregate/basic_one_side.groovy +++ b/regression-test/suites/nereids_rules_p0/eager_aggregate/basic_one_side.groovy @@ -15,13 +15,14 @@ // specific language governing permissions and limitations // under the License. -suite("eager_aggregate_basic_one_side") { +suite("basic_one_side") { sql "SET enable_nereids_planner=true" sql "set runtime_filter_mode=OFF" sql "SET enable_fallback_to_original_planner=false" sql "SET ignore_shape_nodes='PhysicalDistribute,PhysicalProject'" sql "set disable_nereids_rules=PRUNE_EMPTY_PARTITION" + sql "set disable_join_reorder=true" sql """ DROP TABLE IF EXISTS shunt_log_com_dd_library_one_side; diff --git a/regression-test/suites/nereids_rules_p0/infer_predicate/infer_intersect_except.groovy b/regression-test/suites/nereids_rules_p0/infer_predicate/infer_intersect_except.groovy index fb8ef0a75f9..aec3f3384e4 100644 --- a/regression-test/suites/nereids_rules_p0/infer_predicate/infer_intersect_except.groovy +++ b/regression-test/suites/nereids_rules_p0/infer_predicate/infer_intersect_except.groovy @@ -115,10 +115,28 @@ suite("infer_intersect_except") { select a+1,b from infer_intersect_except1 where a>0 intersect select a+1,b from infer_intersect_except2 where a+1<0; """ - qt_except_and_intersect """ - explain shape plan - select a,b from infer_intersect_except1 where a>0 except select 1,'abc' from infer_intersect_except2 where b>'ab' intersect select a,b from infer_intersect_except3 where a<10; - """ + explain { + sql """ + shape plan + select a,b from infer_intersect_except1 where a > 0 + except + select 1, 'abc' from infer_intersect_except2 where b > 'ab' + intersect + select a, b from infer_intersect_except3 where a < 10; + """ + notContains("a < 10") + contains("(infer_intersect_except3.a = 1) and (infer_intersect_except3.b = 'abc')") +// PhysicalResultSink +// --PhysicalExcept +// ----filter((infer_intersect_except1.a > 0)) +// ------PhysicalOlapScan[infer_intersect_except1] +// ----PhysicalIntersect +// ------filter((infer_intersect_except3.a = 1) and (infer_intersect_except3.b = 'abc')) +// --------PhysicalOlapScan[infer_intersect_except3] +// ------filter((infer_intersect_except2.b > 'ab')) +// --------PhysicalOlapScan[infer_intersect_except2] + } + qt_except_and_intersect_except_predicate_to_right """ explain shape plan diff --git a/regression-test/suites/nereids_tpcds_shape_sf1000_p0/load.groovy b/regression-test/suites/nereids_tpcds_shape_sf1000_p0/load.groovy index b868aab6130..1a5c3980e84 100644 --- a/regression-test/suites/nereids_tpcds_shape_sf1000_p0/load.groovy +++ b/regression-test/suites/nereids_tpcds_shape_sf1000_p0/load.groovy @@ -2336,7 +2336,7 @@ suite("load") { """ sql """ - alter table call_center modify column cc_closed_date_sk set stats ('row_count'='42', 'ndv'='0', 'num_nulls'='42', 'min_value'='0', 'max_value'='179769313', 'data_size'='168') + alter table call_center modify column cc_closed_date_sk set stats ('row_count'='42', 'ndv'='0', 'num_nulls'='42', 'data_size'='168') """ sql """ diff --git a/regression-test/suites/nereids_tpcds_shape_sf100_p0/constraints/load.groovy b/regression-test/suites/nereids_tpcds_shape_sf100_p0/constraints/load.groovy index 3020a285e63..b2dca961e4d 100644 --- a/regression-test/suites/nereids_tpcds_shape_sf100_p0/constraints/load.groovy +++ b/regression-test/suites/nereids_tpcds_shape_sf100_p0/constraints/load.groovy @@ -2340,7 +2340,7 @@ suite("load") { """ sql """ - alter table call_center modify column cc_closed_date_sk set stats ('row_count'='42', 'ndv'='0', 'num_nulls'='42', 'min_value'='0', 'max_value'='179769313', 'data_size'='168') + alter table call_center modify column cc_closed_date_sk set stats ('row_count'='42', 'ndv'='0', 'num_nulls'='42', 'data_size'='168') """ sql """ diff --git a/regression-test/suites/nereids_tpcds_shape_sf100_p0/load.groovy b/regression-test/suites/nereids_tpcds_shape_sf100_p0/load.groovy index 120ebf97298..48ad6bda53d 100644 --- a/regression-test/suites/nereids_tpcds_shape_sf100_p0/load.groovy +++ b/regression-test/suites/nereids_tpcds_shape_sf100_p0/load.groovy @@ -1299,7 +1299,7 @@ alter table web_page modify column wp_max_ad_count set stats ('row_count'='2040' """ sql """ -alter table call_center modify column cc_closed_date_sk set stats ('row_count'='30', 'ndv'='0', 'min_value'='2415022', 'max_value'='2488070', 'avg_size'='120', 'max_size'='120' ) +alter table call_center modify column cc_closed_date_sk set stats ('row_count'='30', 'ndv'='0', 'num_nulls'='42', 'avg_size'='120', 'max_size'='120' ) """ sql """ @@ -2018,10 +2018,6 @@ sql """ alter table ship_mode modify column sm_contract set stats ('row_count'='20', 'ndv'='20', 'min_value'='2mM8l', 'max_value'='yVfotg7Tio3MVhBg6Bkn', 'avg_size'='252', 'max_size'='252' ) """ -sql """ -alter table call_center modify column cc_closed_date_sk set stats ('row_count'='30', 'ndv'='0', 'min_value'='0', 'max_value'='0', 'avg_size'='120', 'max_size'='120' ) -""" - sql """ alter table customer_address modify column ca_zip set stats ('row_count'='1000000', 'ndv'='7733', 'min_value'='', 'max_value'='99981', 'avg_size'='4848150', 'max_size'='4848150' ) """ diff --git a/regression-test/suites/new_shapes_p0/tpcds_sf100/constraints/load.groovy b/regression-test/suites/new_shapes_p0/tpcds_sf100/constraints/load.groovy index 8b4fdeee69a..1ed3ebba10e 100644 --- a/regression-test/suites/new_shapes_p0/tpcds_sf100/constraints/load.groovy +++ b/regression-test/suites/new_shapes_p0/tpcds_sf100/constraints/load.groovy @@ -2343,7 +2343,7 @@ suite("load") { """ sql """ - alter table call_center modify column cc_closed_date_sk set stats ('row_count'='42', 'ndv'='0', 'num_nulls'='42', 'min_value'='0', 'max_value'='179769313', 'data_size'='168') + alter table call_center modify column cc_closed_date_sk set stats ('row_count'='42', 'ndv'='0', 'num_nulls'='42', 'data_size'='168') """ sql """ diff --git a/regression-test/suites/new_shapes_p0/tpcds_sf100/load.groovy b/regression-test/suites/new_shapes_p0/tpcds_sf100/load.groovy index 23ce70d931e..4b99eafdea9 100644 --- a/regression-test/suites/new_shapes_p0/tpcds_sf100/load.groovy +++ b/regression-test/suites/new_shapes_p0/tpcds_sf100/load.groovy @@ -1302,7 +1302,7 @@ alter table web_page modify column wp_max_ad_count set stats ('row_count'='2040' """ sql """ -alter table call_center modify column cc_closed_date_sk set stats ('row_count'='30', 'ndv'='0', 'min_value'='2415022', 'max_value'='2488070', 'avg_size'='120', 'max_size'='120' ) +alter table call_center modify column cc_closed_date_sk set stats ('row_count'='30', 'ndv'='0', 'num_nulls'='42', 'avg_size'='120', 'max_size'='120' ) """ sql """ @@ -2021,10 +2021,6 @@ sql """ alter table ship_mode modify column sm_contract set stats ('row_count'='20', 'ndv'='20', 'min_value'='2mM8l', 'max_value'='yVfotg7Tio3MVhBg6Bkn', 'avg_size'='252', 'max_size'='252' ) """ -sql """ -alter table call_center modify column cc_closed_date_sk set stats ('row_count'='30', 'ndv'='0', 'min_value'='0', 'max_value'='0', 'avg_size'='120', 'max_size'='120' ) -""" - sql """ alter table customer_address modify column ca_zip set stats ('row_count'='1000000', 'ndv'='7733', 'min_value'='', 'max_value'='99981', 'avg_size'='4848150', 'max_size'='4848150' ) """ diff --git a/regression-test/suites/new_shapes_p0/tpcds_sf1000/load.groovy b/regression-test/suites/new_shapes_p0/tpcds_sf1000/load.groovy index bc28fdde8c0..14c11d3ea84 100644 --- a/regression-test/suites/new_shapes_p0/tpcds_sf1000/load.groovy +++ b/regression-test/suites/new_shapes_p0/tpcds_sf1000/load.groovy @@ -2339,7 +2339,7 @@ suite("load") { """ sql """ - alter table call_center modify column cc_closed_date_sk set stats ('row_count'='42', 'ndv'='0', 'num_nulls'='42', 'min_value'='0', 'max_value'='179769313', 'data_size'='168') + alter table call_center modify column cc_closed_date_sk set stats ('row_count'='42', 'ndv'='0', 'num_nulls'='42', 'data_size'='168') """ sql """ --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org