This is an automated email from the ASF dual-hosted git repository. englefly pushed a commit to branch branch-2.0 in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.0 by this push: new b136c44584f [feat](nereids)disable join reorder if column stats is invalid #41790 (branch-2.0) (#42913) b136c44584f is described below commit b136c44584f2f6b2f167b3e94bbcbe660ec76230 Author: minghong <engle...@gmail.com> AuthorDate: Fri Nov 1 11:47:49 2024 +0800 [feat](nereids)disable join reorder if column stats is invalid #41790 (branch-2.0) (#42913) ## Proposed changes pick #41790 Issue Number: close #xxx <!--Describe your changes.--> --- .../org/apache/doris/nereids/NereidsPlanner.java | 3 +- .../doris/nereids/stats/StatsCalculator.java | 65 +++++++++++++++++++--- .../nereids_tpcds_shape_sf100_p0/load.groovy | 6 +- 3 files changed, 61 insertions(+), 13 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/NereidsPlanner.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/NereidsPlanner.java index 150780d95e1..24550f4f724 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/NereidsPlanner.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/NereidsPlanner.java @@ -268,7 +268,8 @@ public class NereidsPlanner extends Planner { if (FeConstants.enableInternalSchemaDb && !FeConstants.runningUnitTest && cascadesContext.isLeadingJoin()) { List<LogicalOlapScan> scans = cascadesContext.getRewritePlan() .collectToList(LogicalOlapScan.class::isInstance); - StatsCalculator.disableJoinReorderIfTableRowCountNotAvailable(scans, cascadesContext); + Optional<String> reason = StatsCalculator.disableJoinReorderIfStatsInvalid(scans, cascadesContext); + reason.ifPresent(LOG::info); } optimize(); diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java index 9458407295d..1fc3708545e 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java @@ -17,6 +17,7 @@ package org.apache.doris.nereids.stats; +import org.apache.doris.catalog.Column; import org.apache.doris.catalog.Env; import org.apache.doris.catalog.OlapTable; import org.apache.doris.catalog.TableIf; @@ -141,6 +142,7 @@ import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Objects; +import java.util.Optional; import java.util.Set; import java.util.stream.Collectors; @@ -1159,25 +1161,74 @@ public class StatsCalculator extends DefaultPlanVisitor<Statistics, Void> { return rowCount; } + private boolean isVisibleSlotReference(Slot slot) { + if (slot instanceof SlotReference) { + Optional<Column> colOpt = ((SlotReference) slot).getColumn(); + if (colOpt.isPresent()) { + return colOpt.get().isVisible(); + } + } + return false; + } + + private ColumnStatistic getColumnStatsFromTableCache(CatalogRelation catalogRelation, SlotReference slot) { + long idxId = -1; + if (catalogRelation instanceof OlapScan) { + idxId = ((OlapScan) catalogRelation).getSelectedIndexId(); + } + return getColumnStatistic(catalogRelation.getTable(), slot.getName(), idxId); + } + + // check validation of ndv. + private Optional<String> checkNdvValidation(OlapScan olapScan, double rowCount) { + for (Slot slot : ((Plan) olapScan).getOutput()) { + if (isVisibleSlotReference(slot)) { + ColumnStatistic cache = getColumnStatsFromTableCache((CatalogRelation) olapScan, (SlotReference) slot); + if (!cache.isUnKnown) { + if ((cache.ndv == 0 && (cache.minExpr != null || cache.maxExpr != null)) + || cache.ndv > rowCount * 10) { + return Optional.of("slot " + slot.getName() + " has invalid column stats: " + cache); + } + } + } + } + return Optional.empty(); + } + /** - * disable join reorder if any table row count is not available. + * disable join reorder if + * 1. any table rowCount is not available, or + * 2. col stats ndv=0 but minExpr or maxExpr is not null + * 3. ndv > 10 * rowCount */ - public static void disableJoinReorderIfTableRowCountNotAvailable( - List<LogicalOlapScan> scans, + public static Optional<String> disableJoinReorderIfStatsInvalid(List<LogicalOlapScan> scans, CascadesContext context) { StatsCalculator calculator = new StatsCalculator(context); + if (ConnectContext.get() == null) { + // ut case + return Optional.empty(); + } for (LogicalOlapScan scan : scans) { double rowCount = calculator.getOlapTableRowCount(scan); - if (rowCount == -1 && ConnectContext.get() != null) { + // row count not available + if (rowCount == -1) { + LOG.info("disable join reorder since row count not available: " + + scan.getTable().getNameWithFullQualifiers()); + return Optional.of("table[" + scan.getTable().getName() + "] row count is invalid"); + } + // ndv abnormal + Optional<String> reason = calculator.checkNdvValidation(scan, rowCount); + if (reason.isPresent()) { try { ConnectContext.get().getSessionVariable().disableNereidsJoinReorderOnce(); - LOG.info("disable join reorder since row count not available: " - + scan.getTable().getNameWithFullQualifiers()); + LOG.info("disable join reorder since col stats invalid: " + + reason.get()); } catch (Exception e) { LOG.info("disableNereidsJoinReorderOnce failed"); } - return; + return reason; } } + return Optional.empty(); } } diff --git a/regression-test/suites/nereids_tpcds_shape_sf100_p0/load.groovy b/regression-test/suites/nereids_tpcds_shape_sf100_p0/load.groovy index 169c192a1e4..c57e7297838 100644 --- a/regression-test/suites/nereids_tpcds_shape_sf100_p0/load.groovy +++ b/regression-test/suites/nereids_tpcds_shape_sf100_p0/load.groovy @@ -1268,10 +1268,6 @@ sql """ alter table web_page modify column wp_max_ad_count set stats ('row_count'='2040', 'ndv'='5', 'min_value'='0', 'max_value'='4', 'avg_size'='8160', 'max_size'='8160' ) """ -sql """ -alter table call_center modify column cc_closed_date_sk set stats ('row_count'='30', 'ndv'='0', 'min_value'='2415022', 'max_value'='2488070', 'avg_size'='120', 'max_size'='120' ) -""" - sql """ alter table web_returns modify column wr_return_ship_cost set stats ('row_count'='7197670', 'ndv'='10429', 'min_value'='0.00', 'max_value'='13602.60', 'avg_size'='28790680', 'max_size'='28790680' ) """ @@ -1989,7 +1985,7 @@ alter table ship_mode modify column sm_contract set stats ('row_count'='20', 'nd """ sql """ -alter table call_center modify column cc_closed_date_sk set stats ('row_count'='30', 'ndv'='0', 'min_value'='0', 'max_value'='0', 'avg_size'='120', 'max_size'='120' ) +alter table call_center modify column cc_closed_date_sk set stats ('row_count'='30', 'ndv'='0', 'num_nulls'='30', 'avg_size'='120', 'max_size'='120' ) """ sql """ --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org