This is an automated email from the ASF dual-hosted git repository. englefly pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new bc400256317 [opt](Nereids)Join cluster connectivity (#27833) bc400256317 is described below commit bc4002563175d1a24f3a1b5ac8ae7ddd4eb4df49 Author: minghong <engle...@gmail.com> AuthorDate: Fri Dec 8 14:55:10 2023 +0800 [opt](Nereids)Join cluster connectivity (#27833) * estimation join stats by connectivity --- .../java/org/apache/doris/nereids/PlanContext.java | 4 ++ .../org/apache/doris/nereids/StatementContext.java | 12 ++++++ .../org/apache/doris/nereids/cost/CostModelV1.java | 28 ++++++++++++ .../rules/exploration/join/JoinCommute.java | 2 +- .../doris/nereids/rules/rewrite/ReorderJoin.java | 1 + .../org/apache/doris/nereids/memo/RankTest.java | 2 +- .../predicate_infer/infer_predicate.out | 29 +++++++------ .../noStatsRfPrune/query14.out | 8 ++-- .../noStatsRfPrune/query16.out | 8 ++-- .../noStatsRfPrune/query31.out | 8 ++-- .../noStatsRfPrune/query58.out | 24 +++++------ .../noStatsRfPrune/query59.out | 35 ++++++++------- .../noStatsRfPrune/query94.out | 8 ++-- .../noStatsRfPrune/query95.out | 50 +++++++++++----------- .../no_stats_shape/query14.out | 8 ++-- .../no_stats_shape/query16.out | 10 ++--- .../no_stats_shape/query31.out | 8 ++-- .../no_stats_shape/query58.out | 30 ++++++------- .../no_stats_shape/query59.out | 35 ++++++++------- .../no_stats_shape/query94.out | 10 ++--- .../no_stats_shape/query95.out | 48 ++++++++++----------- .../nostats_rf_prune/q21.out | 22 +++++----- .../shape_no_stats/q21.out | 22 +++++----- 23 files changed, 228 insertions(+), 184 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/PlanContext.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/PlanContext.java index f560dabf8b3..3ab95423e24 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/PlanContext.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/PlanContext.java @@ -88,4 +88,8 @@ public class PlanContext { public List<Statistics> getChildrenStatistics() { return childrenStats; } + + public StatementContext getStatementContext() { + return connectContext.getStatementContext(); + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/StatementContext.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/StatementContext.java index da81698a8b3..33ef6997835 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/StatementContext.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/StatementContext.java @@ -43,6 +43,7 @@ import com.google.common.collect.Maps; import com.google.common.collect.Sets; import java.util.ArrayList; +import java.util.Collection; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -88,6 +89,9 @@ public class StatementContext { private final Map<CTEId, LogicalPlan> rewrittenCteConsumer = new HashMap<>(); private final Set<String> viewDdlSqlSet = Sets.newHashSet(); + // collect all hash join conditions to compute node connectivity in join graph + private final List<Expression> joinFilters = new ArrayList<>(); + private final List<Hint> hints = new ArrayList<>(); public StatementContext() { @@ -242,4 +246,12 @@ public class StatementContext { public List<Hint> getHints() { return ImmutableList.copyOf(hints); } + + public List<Expression> getJoinFilters() { + return joinFilters; + } + + public void addJoinFilters(Collection<Expression> newJoinFilters) { + this.joinFilters.addAll(newJoinFilters); + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/cost/CostModelV1.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/cost/CostModelV1.java index 1ff398cce09..7704c9630fd 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/cost/CostModelV1.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/cost/CostModelV1.java @@ -22,6 +22,7 @@ import org.apache.doris.nereids.properties.DistributionSpec; import org.apache.doris.nereids.properties.DistributionSpecGather; import org.apache.doris.nereids.properties.DistributionSpecHash; import org.apache.doris.nereids.properties.DistributionSpecReplicated; +import org.apache.doris.nereids.trees.expressions.Expression; import org.apache.doris.nereids.trees.plans.Plan; import org.apache.doris.nereids.trees.plans.physical.PhysicalAssertNumRows; import org.apache.doris.nereids.trees.plans.physical.PhysicalDeferMaterializeOlapScan; @@ -48,6 +49,8 @@ import org.apache.doris.statistics.Statistics; import com.google.common.base.Preconditions; +import java.util.Collections; + class CostModelV1 extends PlanVisitor<Cost, PlanContext> { // for a join, skew = leftRowCount/rightRowCount @@ -262,6 +265,17 @@ class CostModelV1 extends PlanVisitor<Cost, PlanContext> { double leftRowCount = probeStats.getRowCount(); double rightRowCount = buildStats.getRowCount(); + if (leftRowCount == rightRowCount + && physicalHashJoin.getGroupExpression().isPresent() + && physicalHashJoin.getGroupExpression().get().getOwnerGroup() != null + && !physicalHashJoin.getGroupExpression().get().getOwnerGroup().isStatsReliable()) { + int leftConnectivity = computeConnectivity(physicalHashJoin.left(), context); + int rightConnectivity = computeConnectivity(physicalHashJoin.right(), context); + if (rightConnectivity < leftConnectivity) { + leftRowCount += 1; + } + } + /* pattern1: L join1 (Agg1() join2 Agg2()) result number of join2 may much less than Agg1. @@ -310,6 +324,20 @@ class CostModelV1 extends PlanVisitor<Cost, PlanContext> { ); } + /* + in a join cluster graph, if a node has higher connectivity, it is more likely to be reduced + by runtime filters, and it is also more likely to produce effective runtime filters. + Thus, we prefer to put the node with higher connectivity on the join right side. + */ + private int computeConnectivity( + Plan plan, PlanContext context) { + int connectCount = 0; + for (Expression expr : context.getStatementContext().getJoinFilters()) { + connectCount += Collections.disjoint(expr.getInputSlots(), plan.getOutputSet()) ? 0 : 1; + } + return connectCount; + } + @Override public Cost visitPhysicalNestedLoopJoin( PhysicalNestedLoopJoin<? extends Plan, ? extends Plan> nestedLoopJoin, diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/join/JoinCommute.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/join/JoinCommute.java index d6df03e1c0b..efdd46f821f 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/join/JoinCommute.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/join/JoinCommute.java @@ -93,7 +93,7 @@ public class JoinCommute extends OneExplorationRuleFactory { if (swapType == SwapType.LEFT_ZIG_ZAG) { double leftRows = join.left().getGroup().getStatistics().getRowCount(); double rightRows = join.right().getGroup().getStatistics().getRowCount(); - return leftRows < rightRows && isZigZagJoin(join); + return leftRows <= rightRows && isZigZagJoin(join); } return true; diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/ReorderJoin.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/ReorderJoin.java index a9e685b1c89..31b0dc904ab 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/ReorderJoin.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/ReorderJoin.java @@ -87,6 +87,7 @@ public class ReorderJoin extends OneRewriteRuleFactory { Plan plan = joinToMultiJoin(filter, planToHintType); Preconditions.checkState(plan instanceof MultiJoin); MultiJoin multiJoin = (MultiJoin) plan; + ctx.statementContext.addJoinFilters(multiJoin.getJoinFilter()); ctx.statementContext.setMaxNAryInnerJoin(multiJoin.children().size()); Plan after = multiJoinToJoin(multiJoin, planToHintType); return after; diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/memo/RankTest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/memo/RankTest.java index e3571395e47..57773bb0c4f 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/nereids/memo/RankTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/memo/RankTest.java @@ -55,7 +55,7 @@ class RankTest extends TestWithFeService { shape.add(memo.unrank(memo.rank(i + 1).first).shape("")); } System.out.println(shape); - Assertions.assertEquals(1, shape.size()); + Assertions.assertEquals(2, shape.size()); Assertions.assertEquals(bestPlan.shape(""), memo.unrank(memo.rank(1).first).shape("")); } } diff --git a/regression-test/data/nereids_rules_p0/predicate_infer/infer_predicate.out b/regression-test/data/nereids_rules_p0/predicate_infer/infer_predicate.out index c106bb9407a..bed06fa4848 100644 --- a/regression-test/data/nereids_rules_p0/predicate_infer/infer_predicate.out +++ b/regression-test/data/nereids_rules_p0/predicate_infer/infer_predicate.out @@ -465,21 +465,22 @@ PhysicalResultSink -- !infer5 -- PhysicalResultSink ---PhysicalProject -----hashJoin[INNER_JOIN] hashCondition=((t1.id = t3.id)) otherCondition=() build RFs:RF1 id->[id] -------hashJoin[INNER_JOIN] hashCondition=((t1.id = t2.id)) otherCondition=() build RFs:RF0 id->[id] ---------filter((t1.id = 1)) -----------PhysicalLimit[GLOBAL] -------------PhysicalDistribute ---------------PhysicalLimit[LOCAL] -----------------PhysicalProject -------------------PhysicalOlapScan[t1] apply RFs: RF0 RF1 ---------PhysicalDistribute +--PhysicalDistribute +----PhysicalProject +------hashJoin[INNER_JOIN] hashCondition=((t1.id = t3.id)) otherCondition=() build RFs:RF1 id->[id] +--------hashJoin[INNER_JOIN] hashCondition=((t1.id = t2.id)) otherCondition=() build RFs:RF0 id->[id] ----------filter((t2.id = 1)) -------------PhysicalOlapScan[t2] -------PhysicalDistribute ---------filter((t3.id = 1)) -----------PhysicalOlapScan[t] +------------PhysicalOlapScan[t2] apply RFs: RF0 +----------PhysicalDistribute +------------filter((t1.id = 1)) +--------------PhysicalLimit[GLOBAL] +----------------PhysicalDistribute +------------------PhysicalLimit[LOCAL] +--------------------PhysicalProject +----------------------PhysicalOlapScan[t1] apply RFs: RF1 +--------PhysicalDistribute +----------filter((t3.id = 1)) +------------PhysicalOlapScan[t] -- !infer6 -- PhysicalResultSink diff --git a/regression-test/data/nereids_tpcds_shape_sf100_p0/noStatsRfPrune/query14.out b/regression-test/data/nereids_tpcds_shape_sf100_p0/noStatsRfPrune/query14.out index 619c5f69f02..ad7cd03a059 100644 --- a/regression-test/data/nereids_tpcds_shape_sf100_p0/noStatsRfPrune/query14.out +++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/noStatsRfPrune/query14.out @@ -157,12 +157,12 @@ PhysicalCteAnchor ( cteId=CTEId#0 ) --------------------------------------hashJoin[INNER_JOIN] hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF20 d_date_sk->[ws_sold_date_sk] ----------------------------------------PhysicalProject ------------------------------------------hashJoin[INNER_JOIN] hashCondition=((web_sales.ws_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF19 i_item_sk->[ws_item_sk] ---------------------------------------------hashJoin[LEFT_SEMI_JOIN] hashCondition=((web_sales.ws_item_sk = cross_items.ss_item_sk)) otherCondition=() build RFs:RF18 ss_item_sk->[ws_item_sk] -----------------------------------------------PhysicalDistribute -------------------------------------------------PhysicalProject ---------------------------------------------------PhysicalOlapScan[web_sales] apply RFs: RF18 RF19 RF20 +--------------------------------------------hashJoin[RIGHT_SEMI_JOIN] hashCondition=((web_sales.ws_item_sk = cross_items.ss_item_sk)) otherCondition=() build RFs:RF18 ws_item_sk->[ss_item_sk] ----------------------------------------------PhysicalDistribute ------------------------------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) +----------------------------------------------PhysicalDistribute +------------------------------------------------PhysicalProject +--------------------------------------------------PhysicalOlapScan[web_sales] apply RFs: RF19 RF20 --------------------------------------------PhysicalDistribute ----------------------------------------------PhysicalProject ------------------------------------------------PhysicalOlapScan[item] diff --git a/regression-test/data/nereids_tpcds_shape_sf100_p0/noStatsRfPrune/query16.out b/regression-test/data/nereids_tpcds_shape_sf100_p0/noStatsRfPrune/query16.out index 2fac57e7525..b08f9d62730 100644 --- a/regression-test/data/nereids_tpcds_shape_sf100_p0/noStatsRfPrune/query16.out +++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/noStatsRfPrune/query16.out @@ -11,7 +11,10 @@ PhysicalResultSink ----------------PhysicalProject ------------------hashJoin[LEFT_ANTI_JOIN] hashCondition=((cs1.cs_order_number = cr1.cr_order_number)) otherCondition=() --------------------hashJoin[INNER_JOIN] hashCondition=((cs1.cs_call_center_sk = call_center.cc_call_center_sk)) otherCondition=() build RFs:RF3 cc_call_center_sk->[cs_call_center_sk] -----------------------hashJoin[LEFT_SEMI_JOIN] hashCondition=((cs1.cs_order_number = cs2.cs_order_number)) otherCondition=(( not (cs_warehouse_sk = cs_warehouse_sk))) +----------------------hashJoin[RIGHT_SEMI_JOIN] hashCondition=((cs1.cs_order_number = cs2.cs_order_number)) otherCondition=(( not (cs_warehouse_sk = cs_warehouse_sk))) build RFs:RF2 cs_order_number->[cs_order_number] +------------------------PhysicalDistribute +--------------------------PhysicalProject +----------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF2 ------------------------PhysicalDistribute --------------------------hashJoin[INNER_JOIN] hashCondition=((cs1.cs_ship_addr_sk = customer_address.ca_address_sk)) otherCondition=() build RFs:RF1 ca_address_sk->[cs_ship_addr_sk] ----------------------------hashJoin[INNER_JOIN] hashCondition=((cs1.cs_ship_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[cs_ship_date_sk] @@ -25,9 +28,6 @@ PhysicalResultSink ------------------------------PhysicalProject --------------------------------filter((customer_address.ca_state = 'WV')) ----------------------------------PhysicalOlapScan[customer_address] -------------------------PhysicalDistribute ---------------------------PhysicalProject -----------------------------PhysicalOlapScan[catalog_sales] ----------------------PhysicalDistribute ------------------------PhysicalProject --------------------------filter(cc_county IN ('Barrow County', 'Daviess County', 'Luce County', 'Richland County', 'Ziebach County')) diff --git a/regression-test/data/nereids_tpcds_shape_sf100_p0/noStatsRfPrune/query31.out b/regression-test/data/nereids_tpcds_shape_sf100_p0/noStatsRfPrune/query31.out index 67bf996d44b..b99861d990f 100644 --- a/regression-test/data/nereids_tpcds_shape_sf100_p0/noStatsRfPrune/query31.out +++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/noStatsRfPrune/query31.out @@ -46,6 +46,10 @@ PhysicalCteAnchor ( cteId=CTEId#0 ) --------------------hashJoin[INNER_JOIN] hashCondition=((ss1.ca_county = ws1.ca_county)) otherCondition=() ----------------------PhysicalProject ------------------------hashJoin[INNER_JOIN] hashCondition=((ss2.ca_county = ss3.ca_county)) otherCondition=() +--------------------------PhysicalDistribute +----------------------------PhysicalProject +------------------------------filter((ss3.d_qoy = 3) and (ss3.d_year = 2000)) +--------------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) --------------------------hashJoin[INNER_JOIN] hashCondition=((ss1.ca_county = ss2.ca_county)) otherCondition=() ----------------------------PhysicalDistribute ------------------------------PhysicalProject @@ -55,10 +59,6 @@ PhysicalCteAnchor ( cteId=CTEId#0 ) ------------------------------PhysicalProject --------------------------------filter((ss2.d_qoy = 2) and (ss2.d_year = 2000)) ----------------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) ---------------------------PhysicalDistribute -----------------------------PhysicalProject -------------------------------filter((ss3.d_qoy = 3) and (ss3.d_year = 2000)) ---------------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) ----------------------PhysicalDistribute ------------------------PhysicalProject --------------------------filter((ws1.d_qoy = 1) and (ws1.d_year = 2000)) diff --git a/regression-test/data/nereids_tpcds_shape_sf100_p0/noStatsRfPrune/query58.out b/regression-test/data/nereids_tpcds_shape_sf100_p0/noStatsRfPrune/query58.out index 080d5b2afea..1389ea6996c 100644 --- a/regression-test/data/nereids_tpcds_shape_sf100_p0/noStatsRfPrune/query58.out +++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/noStatsRfPrune/query58.out @@ -24,10 +24,7 @@ PhysicalResultSink --------------------------------------PhysicalOlapScan[item] apply RFs: RF12 RF13 ------------------------------PhysicalDistribute --------------------------------PhysicalProject -----------------------------------hashJoin[LEFT_SEMI_JOIN] hashCondition=((date_dim.d_date = date_dim.d_date)) otherCondition=() build RFs:RF9 d_date->[d_date] -------------------------------------PhysicalDistribute ---------------------------------------PhysicalProject -----------------------------------------PhysicalOlapScan[date_dim] apply RFs: RF9 +----------------------------------hashJoin[RIGHT_SEMI_JOIN] hashCondition=((date_dim.d_date = date_dim.d_date)) otherCondition=() ------------------------------------PhysicalDistribute --------------------------------------PhysicalProject ----------------------------------------hashJoin[INNER_JOIN] hashCondition=((date_dim.d_week_seq = date_dim.d_week_seq)) otherCondition=() build RFs:RF8 d_week_seq->[d_week_seq] @@ -40,6 +37,9 @@ PhysicalResultSink ------------------------------------------------PhysicalProject --------------------------------------------------filter((date_dim.d_date = '2001-03-24')) ----------------------------------------------------PhysicalOlapScan[date_dim] +------------------------------------PhysicalDistribute +--------------------------------------PhysicalProject +----------------------------------------PhysicalOlapScan[date_dim] ------------------PhysicalProject --------------------hashAgg[GLOBAL] ----------------------PhysicalDistribute @@ -55,10 +55,7 @@ PhysicalResultSink --------------------------------------PhysicalOlapScan[item] ------------------------------PhysicalDistribute --------------------------------PhysicalProject -----------------------------------hashJoin[LEFT_SEMI_JOIN] hashCondition=((date_dim.d_date = date_dim.d_date)) otherCondition=() build RFs:RF5 d_date->[d_date] -------------------------------------PhysicalDistribute ---------------------------------------PhysicalProject -----------------------------------------PhysicalOlapScan[date_dim] apply RFs: RF5 +----------------------------------hashJoin[RIGHT_SEMI_JOIN] hashCondition=((date_dim.d_date = date_dim.d_date)) otherCondition=() ------------------------------------PhysicalDistribute --------------------------------------PhysicalProject ----------------------------------------hashJoin[INNER_JOIN] hashCondition=((date_dim.d_week_seq = date_dim.d_week_seq)) otherCondition=() build RFs:RF4 d_week_seq->[d_week_seq] @@ -71,6 +68,9 @@ PhysicalResultSink ------------------------------------------------PhysicalProject --------------------------------------------------filter((date_dim.d_date = '2001-03-24')) ----------------------------------------------------PhysicalOlapScan[date_dim] +------------------------------------PhysicalDistribute +--------------------------------------PhysicalProject +----------------------------------------PhysicalOlapScan[date_dim] --------------PhysicalProject ----------------hashAgg[GLOBAL] ------------------PhysicalDistribute @@ -86,10 +86,7 @@ PhysicalResultSink ----------------------------------PhysicalOlapScan[item] --------------------------PhysicalDistribute ----------------------------PhysicalProject -------------------------------hashJoin[LEFT_SEMI_JOIN] hashCondition=((date_dim.d_date = date_dim.d_date)) otherCondition=() build RFs:RF1 d_date->[d_date] ---------------------------------PhysicalDistribute -----------------------------------PhysicalProject -------------------------------------PhysicalOlapScan[date_dim] apply RFs: RF1 +------------------------------hashJoin[RIGHT_SEMI_JOIN] hashCondition=((date_dim.d_date = date_dim.d_date)) otherCondition=() --------------------------------PhysicalDistribute ----------------------------------PhysicalProject ------------------------------------hashJoin[INNER_JOIN] hashCondition=((date_dim.d_week_seq = date_dim.d_week_seq)) otherCondition=() build RFs:RF0 d_week_seq->[d_week_seq] @@ -102,4 +99,7 @@ PhysicalResultSink --------------------------------------------PhysicalProject ----------------------------------------------filter((date_dim.d_date = '2001-03-24')) ------------------------------------------------PhysicalOlapScan[date_dim] +--------------------------------PhysicalDistribute +----------------------------------PhysicalProject +------------------------------------PhysicalOlapScan[date_dim] diff --git a/regression-test/data/nereids_tpcds_shape_sf100_p0/noStatsRfPrune/query59.out b/regression-test/data/nereids_tpcds_shape_sf100_p0/noStatsRfPrune/query59.out index 5c2a0e7a195..d731b71a900 100644 --- a/regression-test/data/nereids_tpcds_shape_sf100_p0/noStatsRfPrune/query59.out +++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/noStatsRfPrune/query59.out @@ -18,31 +18,30 @@ PhysicalCteAnchor ( cteId=CTEId#0 ) --------PhysicalDistribute ----------PhysicalTopN[LOCAL_SORT] ------------PhysicalProject ---------------hashJoin[INNER_JOIN] hashCondition=((d.d_week_seq = d_week_seq1)) otherCondition=() build RFs:RF5 d_week_seq->[d_week_seq] +--------------hashJoin[INNER_JOIN] hashCondition=((wss.ss_store_sk = store.s_store_sk) and (y.s_store_id1 = x.s_store_id2)) otherCondition=() build RFs:RF4 s_store_id2->[s_store_id];RF5 s_store_sk->[ss_store_sk] ----------------PhysicalProject -------------------hashJoin[INNER_JOIN] hashCondition=((wss.ss_store_sk = store.s_store_sk) and (y.s_store_id1 = x.s_store_id2)) otherCondition=() build RFs:RF3 s_store_id1->[s_store_id];RF4 s_store_sk->[ss_store_sk] ---------------------PhysicalProject -----------------------hashJoin[INNER_JOIN] hashCondition=((wss.ss_store_sk = store.s_store_sk)) otherCondition=() build RFs:RF2 s_store_sk->[ss_store_sk] -------------------------hashJoin[INNER_JOIN] hashCondition=((d.d_week_seq = d_week_seq2)) otherCondition=() build RFs:RF1 d_week_seq->[d_week_seq] ---------------------------hashJoin[INNER_JOIN] hashCondition=((expr_cast(d_week_seq1 as BIGINT) = expr_(d_week_seq2 - 52))) otherCondition=() -----------------------------PhysicalDistribute -------------------------------PhysicalProject ---------------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) -----------------------------PhysicalDistribute -------------------------------PhysicalProject ---------------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) +------------------hashJoin[INNER_JOIN] hashCondition=((wss.ss_store_sk = store.s_store_sk)) otherCondition=() build RFs:RF3 s_store_sk->[ss_store_sk] +--------------------hashJoin[INNER_JOIN] hashCondition=((d.d_week_seq = d_week_seq1)) otherCondition=() build RFs:RF2 d_week_seq->[d_week_seq] +----------------------hashJoin[INNER_JOIN] hashCondition=((d.d_week_seq = d_week_seq2)) otherCondition=() build RFs:RF1 d_week_seq->[d_week_seq] +------------------------hashJoin[INNER_JOIN] hashCondition=((expr_cast(d_week_seq1 as BIGINT) = expr_(d_week_seq2 - 52))) otherCondition=() --------------------------PhysicalDistribute ----------------------------PhysicalProject -------------------------------filter((d.d_month_seq <= 1219) and (d.d_month_seq >= 1208)) ---------------------------------PhysicalOlapScan[date_dim] +------------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) +--------------------------PhysicalDistribute +----------------------------PhysicalProject +------------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) ------------------------PhysicalDistribute --------------------------PhysicalProject -----------------------------PhysicalOlapScan[store] apply RFs: RF3 +----------------------------filter((d.d_month_seq <= 1219) and (d.d_month_seq >= 1208)) +------------------------------PhysicalOlapScan[date_dim] +----------------------PhysicalDistribute +------------------------PhysicalProject +--------------------------filter((d.d_month_seq <= 1207) and (d.d_month_seq >= 1196)) +----------------------------PhysicalOlapScan[date_dim] --------------------PhysicalDistribute ----------------------PhysicalProject -------------------------PhysicalOlapScan[store] +------------------------PhysicalOlapScan[store] apply RFs: RF4 ----------------PhysicalDistribute ------------------PhysicalProject ---------------------filter((d.d_month_seq <= 1207) and (d.d_month_seq >= 1196)) -----------------------PhysicalOlapScan[date_dim] +--------------------PhysicalOlapScan[store] diff --git a/regression-test/data/nereids_tpcds_shape_sf100_p0/noStatsRfPrune/query94.out b/regression-test/data/nereids_tpcds_shape_sf100_p0/noStatsRfPrune/query94.out index 43482826444..a7f921c597b 100644 --- a/regression-test/data/nereids_tpcds_shape_sf100_p0/noStatsRfPrune/query94.out +++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/noStatsRfPrune/query94.out @@ -11,7 +11,10 @@ PhysicalResultSink ----------------PhysicalProject ------------------hashJoin[LEFT_ANTI_JOIN] hashCondition=((ws1.ws_order_number = wr1.wr_order_number)) otherCondition=() --------------------hashJoin[INNER_JOIN] hashCondition=((ws1.ws_web_site_sk = web_site.web_site_sk)) otherCondition=() build RFs:RF3 web_site_sk->[ws_web_site_sk] -----------------------hashJoin[LEFT_SEMI_JOIN] hashCondition=((ws1.ws_order_number = ws2.ws_order_number)) otherCondition=(( not (ws_warehouse_sk = ws_warehouse_sk))) +----------------------hashJoin[RIGHT_SEMI_JOIN] hashCondition=((ws1.ws_order_number = ws2.ws_order_number)) otherCondition=(( not (ws_warehouse_sk = ws_warehouse_sk))) build RFs:RF2 ws_order_number->[ws_order_number] +------------------------PhysicalDistribute +--------------------------PhysicalProject +----------------------------PhysicalOlapScan[web_sales] apply RFs: RF2 ------------------------PhysicalDistribute --------------------------hashJoin[INNER_JOIN] hashCondition=((ws1.ws_ship_addr_sk = customer_address.ca_address_sk)) otherCondition=() build RFs:RF1 ca_address_sk->[ws_ship_addr_sk] ----------------------------hashJoin[INNER_JOIN] hashCondition=((ws1.ws_ship_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ws_ship_date_sk] @@ -25,9 +28,6 @@ PhysicalResultSink ------------------------------PhysicalProject --------------------------------filter((customer_address.ca_state = 'OK')) ----------------------------------PhysicalOlapScan[customer_address] -------------------------PhysicalDistribute ---------------------------PhysicalProject -----------------------------PhysicalOlapScan[web_sales] ----------------------PhysicalDistribute ------------------------PhysicalProject --------------------------filter((web_site.web_company_name = 'pri')) diff --git a/regression-test/data/nereids_tpcds_shape_sf100_p0/noStatsRfPrune/query95.out b/regression-test/data/nereids_tpcds_shape_sf100_p0/noStatsRfPrune/query95.out index 47884ed767d..62afa403979 100644 --- a/regression-test/data/nereids_tpcds_shape_sf100_p0/noStatsRfPrune/query95.out +++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/noStatsRfPrune/query95.out @@ -3,13 +3,13 @@ PhysicalCteAnchor ( cteId=CTEId#0 ) --PhysicalCteProducer ( cteId=CTEId#0 ) ----PhysicalProject -------hashJoin[INNER_JOIN] hashCondition=((ws1.ws_order_number = ws2.ws_order_number)) otherCondition=(( not (ws_warehouse_sk = ws_warehouse_sk))) build RFs:RF0 ws_order_number->[ws_order_number] +------hashJoin[INNER_JOIN] hashCondition=((ws1.ws_order_number = ws2.ws_order_number)) otherCondition=(( not (ws_warehouse_sk = ws_warehouse_sk))) --------PhysicalDistribute ----------PhysicalProject -------------PhysicalOlapScan[web_sales] apply RFs: RF0 RF6 +------------PhysicalOlapScan[web_sales] --------PhysicalDistribute ----------PhysicalProject -------------PhysicalOlapScan[web_sales] apply RFs: RF6 +------------PhysicalOlapScan[web_sales] --PhysicalResultSink ----PhysicalTopN[MERGE_SORT] ------PhysicalTopN[LOCAL_SORT] @@ -19,14 +19,25 @@ PhysicalCteAnchor ( cteId=CTEId#0 ) --------------hashAgg[GLOBAL] ----------------hashAgg[LOCAL] ------------------PhysicalProject ---------------------hashJoin[LEFT_SEMI_JOIN] hashCondition=((ws1.ws_order_number = web_returns.wr_order_number)) otherCondition=() -----------------------hashJoin[INNER_JOIN] hashCondition=((ws1.ws_web_site_sk = web_site.web_site_sk)) otherCondition=() build RFs:RF5 web_site_sk->[ws_web_site_sk] -------------------------hashJoin[LEFT_SEMI_JOIN] hashCondition=((ws1.ws_order_number = ws_wh.ws_order_number)) otherCondition=() build RFs:RF4 ws_order_number->[ws_order_number] ---------------------------PhysicalDistribute -----------------------------hashJoin[INNER_JOIN] hashCondition=((ws1.ws_ship_addr_sk = customer_address.ca_address_sk)) otherCondition=() build RFs:RF3 ca_address_sk->[ws_ship_addr_sk] -------------------------------hashJoin[INNER_JOIN] hashCondition=((ws1.ws_ship_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF2 d_date_sk->[ws_ship_date_sk] +--------------------hashJoin[RIGHT_SEMI_JOIN] hashCondition=((ws1.ws_order_number = ws_wh.ws_order_number)) otherCondition=() +----------------------PhysicalDistribute +------------------------PhysicalProject +--------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) +----------------------hashJoin[RIGHT_SEMI_JOIN] hashCondition=((ws1.ws_order_number = web_returns.wr_order_number)) otherCondition=() build RFs:RF5 ws_order_number->[wr_order_number] +------------------------PhysicalProject +--------------------------hashJoin[INNER_JOIN] hashCondition=((web_returns.wr_order_number = ws_wh.ws_order_number)) otherCondition=() build RFs:RF4 wr_order_number->[ws_order_number] +----------------------------PhysicalDistribute +------------------------------PhysicalProject +--------------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) +----------------------------PhysicalDistribute +------------------------------PhysicalProject +--------------------------------PhysicalOlapScan[web_returns] apply RFs: RF5 +------------------------PhysicalDistribute +--------------------------hashJoin[INNER_JOIN] hashCondition=((ws1.ws_web_site_sk = web_site.web_site_sk)) otherCondition=() build RFs:RF3 web_site_sk->[ws_web_site_sk] +----------------------------hashJoin[INNER_JOIN] hashCondition=((ws1.ws_ship_addr_sk = customer_address.ca_address_sk)) otherCondition=() build RFs:RF2 ca_address_sk->[ws_ship_addr_sk] +------------------------------hashJoin[INNER_JOIN] hashCondition=((ws1.ws_ship_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[ws_ship_date_sk] --------------------------------PhysicalProject -----------------------------------PhysicalOlapScan[web_sales] apply RFs: RF2 RF3 RF4 RF5 +----------------------------------PhysicalOlapScan[web_sales] apply RFs: RF1 RF2 RF3 --------------------------------PhysicalDistribute ----------------------------------PhysicalProject ------------------------------------filter((date_dim.d_date <= '1999-04-02') and (date_dim.d_date >= '1999-02-01')) @@ -35,19 +46,8 @@ PhysicalCteAnchor ( cteId=CTEId#0 ) --------------------------------PhysicalProject ----------------------------------filter((customer_address.ca_state = 'NC')) ------------------------------------PhysicalOlapScan[customer_address] ---------------------------PhysicalDistribute -----------------------------PhysicalProject -------------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) -------------------------PhysicalDistribute ---------------------------PhysicalProject -----------------------------filter((web_site.web_company_name = 'pri')) -------------------------------PhysicalOlapScan[web_site] -----------------------PhysicalProject -------------------------hashJoin[INNER_JOIN] hashCondition=((web_returns.wr_order_number = ws_wh.ws_order_number)) otherCondition=() build RFs:RF1 wr_order_number->[ws_order_number];RF6 wr_order_number->[ws_order_number,ws_order_number] ---------------------------PhysicalDistribute -----------------------------PhysicalProject -------------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) ---------------------------PhysicalDistribute -----------------------------PhysicalProject -------------------------------PhysicalOlapScan[web_returns] +----------------------------PhysicalDistribute +------------------------------PhysicalProject +--------------------------------filter((web_site.web_company_name = 'pri')) +----------------------------------PhysicalOlapScan[web_site] diff --git a/regression-test/data/nereids_tpcds_shape_sf100_p0/no_stats_shape/query14.out b/regression-test/data/nereids_tpcds_shape_sf100_p0/no_stats_shape/query14.out index ce01e32c216..5aebc906be6 100644 --- a/regression-test/data/nereids_tpcds_shape_sf100_p0/no_stats_shape/query14.out +++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/no_stats_shape/query14.out @@ -157,12 +157,12 @@ PhysicalCteAnchor ( cteId=CTEId#0 ) --------------------------------------hashJoin[INNER_JOIN] hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF20 d_date_sk->[ws_sold_date_sk] ----------------------------------------PhysicalProject ------------------------------------------hashJoin[INNER_JOIN] hashCondition=((web_sales.ws_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF19 i_item_sk->[ws_item_sk] ---------------------------------------------hashJoin[LEFT_SEMI_JOIN] hashCondition=((web_sales.ws_item_sk = cross_items.ss_item_sk)) otherCondition=() build RFs:RF18 ss_item_sk->[ws_item_sk] -----------------------------------------------PhysicalDistribute -------------------------------------------------PhysicalProject ---------------------------------------------------PhysicalOlapScan[web_sales] apply RFs: RF18 RF19 RF20 +--------------------------------------------hashJoin[RIGHT_SEMI_JOIN] hashCondition=((web_sales.ws_item_sk = cross_items.ss_item_sk)) otherCondition=() build RFs:RF18 ws_item_sk->[ss_item_sk] ----------------------------------------------PhysicalDistribute ------------------------------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) +----------------------------------------------PhysicalDistribute +------------------------------------------------PhysicalProject +--------------------------------------------------PhysicalOlapScan[web_sales] apply RFs: RF19 RF20 --------------------------------------------PhysicalDistribute ----------------------------------------------PhysicalProject ------------------------------------------------PhysicalOlapScan[item] diff --git a/regression-test/data/nereids_tpcds_shape_sf100_p0/no_stats_shape/query16.out b/regression-test/data/nereids_tpcds_shape_sf100_p0/no_stats_shape/query16.out index d6737a1cd78..b08f9d62730 100644 --- a/regression-test/data/nereids_tpcds_shape_sf100_p0/no_stats_shape/query16.out +++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/no_stats_shape/query16.out @@ -11,12 +11,15 @@ PhysicalResultSink ----------------PhysicalProject ------------------hashJoin[LEFT_ANTI_JOIN] hashCondition=((cs1.cs_order_number = cr1.cr_order_number)) otherCondition=() --------------------hashJoin[INNER_JOIN] hashCondition=((cs1.cs_call_center_sk = call_center.cc_call_center_sk)) otherCondition=() build RFs:RF3 cc_call_center_sk->[cs_call_center_sk] -----------------------hashJoin[LEFT_SEMI_JOIN] hashCondition=((cs1.cs_order_number = cs2.cs_order_number)) otherCondition=(( not (cs_warehouse_sk = cs_warehouse_sk))) build RFs:RF2 cs_order_number->[cs_order_number] +----------------------hashJoin[RIGHT_SEMI_JOIN] hashCondition=((cs1.cs_order_number = cs2.cs_order_number)) otherCondition=(( not (cs_warehouse_sk = cs_warehouse_sk))) build RFs:RF2 cs_order_number->[cs_order_number] +------------------------PhysicalDistribute +--------------------------PhysicalProject +----------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF2 ------------------------PhysicalDistribute --------------------------hashJoin[INNER_JOIN] hashCondition=((cs1.cs_ship_addr_sk = customer_address.ca_address_sk)) otherCondition=() build RFs:RF1 ca_address_sk->[cs_ship_addr_sk] ----------------------------hashJoin[INNER_JOIN] hashCondition=((cs1.cs_ship_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[cs_ship_date_sk] ------------------------------PhysicalProject ---------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF0 RF1 RF2 RF3 +--------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF0 RF1 RF3 ------------------------------PhysicalDistribute --------------------------------PhysicalProject ----------------------------------filter((date_dim.d_date <= '2002-05-31') and (date_dim.d_date >= '2002-04-01')) @@ -25,9 +28,6 @@ PhysicalResultSink ------------------------------PhysicalProject --------------------------------filter((customer_address.ca_state = 'WV')) ----------------------------------PhysicalOlapScan[customer_address] -------------------------PhysicalDistribute ---------------------------PhysicalProject -----------------------------PhysicalOlapScan[catalog_sales] ----------------------PhysicalDistribute ------------------------PhysicalProject --------------------------filter(cc_county IN ('Barrow County', 'Daviess County', 'Luce County', 'Richland County', 'Ziebach County')) diff --git a/regression-test/data/nereids_tpcds_shape_sf100_p0/no_stats_shape/query31.out b/regression-test/data/nereids_tpcds_shape_sf100_p0/no_stats_shape/query31.out index 299fb8974d7..281bc604c6b 100644 --- a/regression-test/data/nereids_tpcds_shape_sf100_p0/no_stats_shape/query31.out +++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/no_stats_shape/query31.out @@ -46,6 +46,10 @@ PhysicalCteAnchor ( cteId=CTEId#0 ) --------------------hashJoin[INNER_JOIN] hashCondition=((ss1.ca_county = ws1.ca_county)) otherCondition=() ----------------------PhysicalProject ------------------------hashJoin[INNER_JOIN] hashCondition=((ss2.ca_county = ss3.ca_county)) otherCondition=() +--------------------------PhysicalDistribute +----------------------------PhysicalProject +------------------------------filter((ss3.d_qoy = 3) and (ss3.d_year = 2000)) +--------------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) --------------------------hashJoin[INNER_JOIN] hashCondition=((ss1.ca_county = ss2.ca_county)) otherCondition=() ----------------------------PhysicalDistribute ------------------------------PhysicalProject @@ -55,10 +59,6 @@ PhysicalCteAnchor ( cteId=CTEId#0 ) ------------------------------PhysicalProject --------------------------------filter((ss2.d_qoy = 2) and (ss2.d_year = 2000)) ----------------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) ---------------------------PhysicalDistribute -----------------------------PhysicalProject -------------------------------filter((ss3.d_qoy = 3) and (ss3.d_year = 2000)) ---------------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) ----------------------PhysicalDistribute ------------------------PhysicalProject --------------------------filter((ws1.d_qoy = 1) and (ws1.d_year = 2000)) diff --git a/regression-test/data/nereids_tpcds_shape_sf100_p0/no_stats_shape/query58.out b/regression-test/data/nereids_tpcds_shape_sf100_p0/no_stats_shape/query58.out index 0e27975e186..e54081b840d 100644 --- a/regression-test/data/nereids_tpcds_shape_sf100_p0/no_stats_shape/query58.out +++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/no_stats_shape/query58.out @@ -24,22 +24,22 @@ PhysicalResultSink --------------------------------------PhysicalOlapScan[item] apply RFs: RF12 RF13 ------------------------------PhysicalDistribute --------------------------------PhysicalProject -----------------------------------hashJoin[LEFT_SEMI_JOIN] hashCondition=((date_dim.d_date = date_dim.d_date)) otherCondition=() build RFs:RF9 d_date->[d_date] -------------------------------------PhysicalDistribute ---------------------------------------PhysicalProject -----------------------------------------PhysicalOlapScan[date_dim] apply RFs: RF9 +----------------------------------hashJoin[RIGHT_SEMI_JOIN] hashCondition=((date_dim.d_date = date_dim.d_date)) otherCondition=() build RFs:RF9 d_date->[d_date] ------------------------------------PhysicalDistribute --------------------------------------PhysicalProject ----------------------------------------hashJoin[INNER_JOIN] hashCondition=((date_dim.d_week_seq = date_dim.d_week_seq)) otherCondition=() build RFs:RF8 d_week_seq->[d_week_seq] ------------------------------------------PhysicalDistribute --------------------------------------------PhysicalProject -----------------------------------------------PhysicalOlapScan[date_dim] apply RFs: RF8 +----------------------------------------------PhysicalOlapScan[date_dim] apply RFs: RF8 RF9 ------------------------------------------PhysicalDistribute --------------------------------------------PhysicalAssertNumRows ----------------------------------------------PhysicalDistribute ------------------------------------------------PhysicalProject --------------------------------------------------filter((date_dim.d_date = '2001-03-24')) ----------------------------------------------------PhysicalOlapScan[date_dim] +------------------------------------PhysicalDistribute +--------------------------------------PhysicalProject +----------------------------------------PhysicalOlapScan[date_dim] ------------------PhysicalProject --------------------hashAgg[GLOBAL] ----------------------PhysicalDistribute @@ -55,22 +55,22 @@ PhysicalResultSink --------------------------------------PhysicalOlapScan[item] ------------------------------PhysicalDistribute --------------------------------PhysicalProject -----------------------------------hashJoin[LEFT_SEMI_JOIN] hashCondition=((date_dim.d_date = date_dim.d_date)) otherCondition=() build RFs:RF5 d_date->[d_date] -------------------------------------PhysicalDistribute ---------------------------------------PhysicalProject -----------------------------------------PhysicalOlapScan[date_dim] apply RFs: RF5 +----------------------------------hashJoin[RIGHT_SEMI_JOIN] hashCondition=((date_dim.d_date = date_dim.d_date)) otherCondition=() build RFs:RF5 d_date->[d_date] ------------------------------------PhysicalDistribute --------------------------------------PhysicalProject ----------------------------------------hashJoin[INNER_JOIN] hashCondition=((date_dim.d_week_seq = date_dim.d_week_seq)) otherCondition=() build RFs:RF4 d_week_seq->[d_week_seq] ------------------------------------------PhysicalDistribute --------------------------------------------PhysicalProject -----------------------------------------------PhysicalOlapScan[date_dim] apply RFs: RF4 +----------------------------------------------PhysicalOlapScan[date_dim] apply RFs: RF4 RF5 ------------------------------------------PhysicalDistribute --------------------------------------------PhysicalAssertNumRows ----------------------------------------------PhysicalDistribute ------------------------------------------------PhysicalProject --------------------------------------------------filter((date_dim.d_date = '2001-03-24')) ----------------------------------------------------PhysicalOlapScan[date_dim] +------------------------------------PhysicalDistribute +--------------------------------------PhysicalProject +----------------------------------------PhysicalOlapScan[date_dim] --------------PhysicalProject ----------------hashAgg[GLOBAL] ------------------PhysicalDistribute @@ -86,20 +86,20 @@ PhysicalResultSink ----------------------------------PhysicalOlapScan[item] --------------------------PhysicalDistribute ----------------------------PhysicalProject -------------------------------hashJoin[LEFT_SEMI_JOIN] hashCondition=((date_dim.d_date = date_dim.d_date)) otherCondition=() build RFs:RF1 d_date->[d_date] ---------------------------------PhysicalDistribute -----------------------------------PhysicalProject -------------------------------------PhysicalOlapScan[date_dim] apply RFs: RF1 +------------------------------hashJoin[RIGHT_SEMI_JOIN] hashCondition=((date_dim.d_date = date_dim.d_date)) otherCondition=() build RFs:RF1 d_date->[d_date] --------------------------------PhysicalDistribute ----------------------------------PhysicalProject ------------------------------------hashJoin[INNER_JOIN] hashCondition=((date_dim.d_week_seq = date_dim.d_week_seq)) otherCondition=() build RFs:RF0 d_week_seq->[d_week_seq] --------------------------------------PhysicalDistribute ----------------------------------------PhysicalProject -------------------------------------------PhysicalOlapScan[date_dim] apply RFs: RF0 +------------------------------------------PhysicalOlapScan[date_dim] apply RFs: RF0 RF1 --------------------------------------PhysicalDistribute ----------------------------------------PhysicalAssertNumRows ------------------------------------------PhysicalDistribute --------------------------------------------PhysicalProject ----------------------------------------------filter((date_dim.d_date = '2001-03-24')) ------------------------------------------------PhysicalOlapScan[date_dim] +--------------------------------PhysicalDistribute +----------------------------------PhysicalProject +------------------------------------PhysicalOlapScan[date_dim] diff --git a/regression-test/data/nereids_tpcds_shape_sf100_p0/no_stats_shape/query59.out b/regression-test/data/nereids_tpcds_shape_sf100_p0/no_stats_shape/query59.out index ebc8191498f..65458d6bf5d 100644 --- a/regression-test/data/nereids_tpcds_shape_sf100_p0/no_stats_shape/query59.out +++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/no_stats_shape/query59.out @@ -18,31 +18,30 @@ PhysicalCteAnchor ( cteId=CTEId#0 ) --------PhysicalDistribute ----------PhysicalTopN[LOCAL_SORT] ------------PhysicalProject ---------------hashJoin[INNER_JOIN] hashCondition=((d.d_week_seq = d_week_seq1)) otherCondition=() build RFs:RF5 d_week_seq->[d_week_seq] +--------------hashJoin[INNER_JOIN] hashCondition=((wss.ss_store_sk = store.s_store_sk) and (y.s_store_id1 = x.s_store_id2)) otherCondition=() build RFs:RF4 s_store_id2->[s_store_id];RF5 s_store_sk->[ss_store_sk] ----------------PhysicalProject -------------------hashJoin[INNER_JOIN] hashCondition=((wss.ss_store_sk = store.s_store_sk) and (y.s_store_id1 = x.s_store_id2)) otherCondition=() build RFs:RF3 s_store_id1->[s_store_id];RF4 s_store_sk->[ss_store_sk] ---------------------PhysicalProject -----------------------hashJoin[INNER_JOIN] hashCondition=((wss.ss_store_sk = store.s_store_sk)) otherCondition=() build RFs:RF2 s_store_sk->[ss_store_sk] -------------------------hashJoin[INNER_JOIN] hashCondition=((d.d_week_seq = d_week_seq2)) otherCondition=() build RFs:RF1 d_week_seq->[d_week_seq] ---------------------------hashJoin[INNER_JOIN] hashCondition=((expr_cast(d_week_seq1 as BIGINT) = expr_(d_week_seq2 - 52))) otherCondition=() -----------------------------PhysicalDistribute -------------------------------PhysicalProject ---------------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) -----------------------------PhysicalDistribute -------------------------------PhysicalProject ---------------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) +------------------hashJoin[INNER_JOIN] hashCondition=((wss.ss_store_sk = store.s_store_sk)) otherCondition=() build RFs:RF3 s_store_sk->[ss_store_sk] +--------------------hashJoin[INNER_JOIN] hashCondition=((d.d_week_seq = d_week_seq1)) otherCondition=() build RFs:RF2 d_week_seq->[d_week_seq] +----------------------hashJoin[INNER_JOIN] hashCondition=((d.d_week_seq = d_week_seq2)) otherCondition=() build RFs:RF1 d_week_seq->[d_week_seq] +------------------------hashJoin[INNER_JOIN] hashCondition=((expr_cast(d_week_seq1 as BIGINT) = expr_(d_week_seq2 - 52))) otherCondition=() --------------------------PhysicalDistribute ----------------------------PhysicalProject -------------------------------filter((d.d_month_seq <= 1219) and (d.d_month_seq >= 1208)) ---------------------------------PhysicalOlapScan[date_dim] +------------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) +--------------------------PhysicalDistribute +----------------------------PhysicalProject +------------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) ------------------------PhysicalDistribute --------------------------PhysicalProject -----------------------------PhysicalOlapScan[store] apply RFs: RF3 +----------------------------filter((d.d_month_seq <= 1219) and (d.d_month_seq >= 1208)) +------------------------------PhysicalOlapScan[date_dim] +----------------------PhysicalDistribute +------------------------PhysicalProject +--------------------------filter((d.d_month_seq <= 1207) and (d.d_month_seq >= 1196)) +----------------------------PhysicalOlapScan[date_dim] --------------------PhysicalDistribute ----------------------PhysicalProject -------------------------PhysicalOlapScan[store] +------------------------PhysicalOlapScan[store] apply RFs: RF4 ----------------PhysicalDistribute ------------------PhysicalProject ---------------------filter((d.d_month_seq <= 1207) and (d.d_month_seq >= 1196)) -----------------------PhysicalOlapScan[date_dim] +--------------------PhysicalOlapScan[store] diff --git a/regression-test/data/nereids_tpcds_shape_sf100_p0/no_stats_shape/query94.out b/regression-test/data/nereids_tpcds_shape_sf100_p0/no_stats_shape/query94.out index 107f8f94a90..a7f921c597b 100644 --- a/regression-test/data/nereids_tpcds_shape_sf100_p0/no_stats_shape/query94.out +++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/no_stats_shape/query94.out @@ -11,12 +11,15 @@ PhysicalResultSink ----------------PhysicalProject ------------------hashJoin[LEFT_ANTI_JOIN] hashCondition=((ws1.ws_order_number = wr1.wr_order_number)) otherCondition=() --------------------hashJoin[INNER_JOIN] hashCondition=((ws1.ws_web_site_sk = web_site.web_site_sk)) otherCondition=() build RFs:RF3 web_site_sk->[ws_web_site_sk] -----------------------hashJoin[LEFT_SEMI_JOIN] hashCondition=((ws1.ws_order_number = ws2.ws_order_number)) otherCondition=(( not (ws_warehouse_sk = ws_warehouse_sk))) build RFs:RF2 ws_order_number->[ws_order_number] +----------------------hashJoin[RIGHT_SEMI_JOIN] hashCondition=((ws1.ws_order_number = ws2.ws_order_number)) otherCondition=(( not (ws_warehouse_sk = ws_warehouse_sk))) build RFs:RF2 ws_order_number->[ws_order_number] +------------------------PhysicalDistribute +--------------------------PhysicalProject +----------------------------PhysicalOlapScan[web_sales] apply RFs: RF2 ------------------------PhysicalDistribute --------------------------hashJoin[INNER_JOIN] hashCondition=((ws1.ws_ship_addr_sk = customer_address.ca_address_sk)) otherCondition=() build RFs:RF1 ca_address_sk->[ws_ship_addr_sk] ----------------------------hashJoin[INNER_JOIN] hashCondition=((ws1.ws_ship_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ws_ship_date_sk] ------------------------------PhysicalProject ---------------------------------PhysicalOlapScan[web_sales] apply RFs: RF0 RF1 RF2 RF3 +--------------------------------PhysicalOlapScan[web_sales] apply RFs: RF0 RF1 RF3 ------------------------------PhysicalDistribute --------------------------------PhysicalProject ----------------------------------filter((date_dim.d_date <= '2000-04-01') and (date_dim.d_date >= '2000-02-01')) @@ -25,9 +28,6 @@ PhysicalResultSink ------------------------------PhysicalProject --------------------------------filter((customer_address.ca_state = 'OK')) ----------------------------------PhysicalOlapScan[customer_address] -------------------------PhysicalDistribute ---------------------------PhysicalProject -----------------------------PhysicalOlapScan[web_sales] ----------------------PhysicalDistribute ------------------------PhysicalProject --------------------------filter((web_site.web_company_name = 'pri')) diff --git a/regression-test/data/nereids_tpcds_shape_sf100_p0/no_stats_shape/query95.out b/regression-test/data/nereids_tpcds_shape_sf100_p0/no_stats_shape/query95.out index 47884ed767d..41041719b24 100644 --- a/regression-test/data/nereids_tpcds_shape_sf100_p0/no_stats_shape/query95.out +++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/no_stats_shape/query95.out @@ -6,10 +6,10 @@ PhysicalCteAnchor ( cteId=CTEId#0 ) ------hashJoin[INNER_JOIN] hashCondition=((ws1.ws_order_number = ws2.ws_order_number)) otherCondition=(( not (ws_warehouse_sk = ws_warehouse_sk))) build RFs:RF0 ws_order_number->[ws_order_number] --------PhysicalDistribute ----------PhysicalProject -------------PhysicalOlapScan[web_sales] apply RFs: RF0 RF6 +------------PhysicalOlapScan[web_sales] apply RFs: RF0 --------PhysicalDistribute ----------PhysicalProject -------------PhysicalOlapScan[web_sales] apply RFs: RF6 +------------PhysicalOlapScan[web_sales] --PhysicalResultSink ----PhysicalTopN[MERGE_SORT] ------PhysicalTopN[LOCAL_SORT] @@ -19,14 +19,25 @@ PhysicalCteAnchor ( cteId=CTEId#0 ) --------------hashAgg[GLOBAL] ----------------hashAgg[LOCAL] ------------------PhysicalProject ---------------------hashJoin[LEFT_SEMI_JOIN] hashCondition=((ws1.ws_order_number = web_returns.wr_order_number)) otherCondition=() -----------------------hashJoin[INNER_JOIN] hashCondition=((ws1.ws_web_site_sk = web_site.web_site_sk)) otherCondition=() build RFs:RF5 web_site_sk->[ws_web_site_sk] -------------------------hashJoin[LEFT_SEMI_JOIN] hashCondition=((ws1.ws_order_number = ws_wh.ws_order_number)) otherCondition=() build RFs:RF4 ws_order_number->[ws_order_number] ---------------------------PhysicalDistribute -----------------------------hashJoin[INNER_JOIN] hashCondition=((ws1.ws_ship_addr_sk = customer_address.ca_address_sk)) otherCondition=() build RFs:RF3 ca_address_sk->[ws_ship_addr_sk] -------------------------------hashJoin[INNER_JOIN] hashCondition=((ws1.ws_ship_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF2 d_date_sk->[ws_ship_date_sk] +--------------------hashJoin[RIGHT_SEMI_JOIN] hashCondition=((ws1.ws_order_number = ws_wh.ws_order_number)) otherCondition=() +----------------------PhysicalDistribute +------------------------PhysicalProject +--------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) +----------------------hashJoin[RIGHT_SEMI_JOIN] hashCondition=((ws1.ws_order_number = web_returns.wr_order_number)) otherCondition=() build RFs:RF5 ws_order_number->[wr_order_number] +------------------------PhysicalProject +--------------------------hashJoin[INNER_JOIN] hashCondition=((web_returns.wr_order_number = ws_wh.ws_order_number)) otherCondition=() build RFs:RF4 wr_order_number->[ws_order_number] +----------------------------PhysicalDistribute +------------------------------PhysicalProject +--------------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) +----------------------------PhysicalDistribute +------------------------------PhysicalProject +--------------------------------PhysicalOlapScan[web_returns] apply RFs: RF5 +------------------------PhysicalDistribute +--------------------------hashJoin[INNER_JOIN] hashCondition=((ws1.ws_web_site_sk = web_site.web_site_sk)) otherCondition=() build RFs:RF3 web_site_sk->[ws_web_site_sk] +----------------------------hashJoin[INNER_JOIN] hashCondition=((ws1.ws_ship_addr_sk = customer_address.ca_address_sk)) otherCondition=() build RFs:RF2 ca_address_sk->[ws_ship_addr_sk] +------------------------------hashJoin[INNER_JOIN] hashCondition=((ws1.ws_ship_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[ws_ship_date_sk] --------------------------------PhysicalProject -----------------------------------PhysicalOlapScan[web_sales] apply RFs: RF2 RF3 RF4 RF5 +----------------------------------PhysicalOlapScan[web_sales] apply RFs: RF1 RF2 RF3 --------------------------------PhysicalDistribute ----------------------------------PhysicalProject ------------------------------------filter((date_dim.d_date <= '1999-04-02') and (date_dim.d_date >= '1999-02-01')) @@ -35,19 +46,8 @@ PhysicalCteAnchor ( cteId=CTEId#0 ) --------------------------------PhysicalProject ----------------------------------filter((customer_address.ca_state = 'NC')) ------------------------------------PhysicalOlapScan[customer_address] ---------------------------PhysicalDistribute -----------------------------PhysicalProject -------------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) -------------------------PhysicalDistribute ---------------------------PhysicalProject -----------------------------filter((web_site.web_company_name = 'pri')) -------------------------------PhysicalOlapScan[web_site] -----------------------PhysicalProject -------------------------hashJoin[INNER_JOIN] hashCondition=((web_returns.wr_order_number = ws_wh.ws_order_number)) otherCondition=() build RFs:RF1 wr_order_number->[ws_order_number];RF6 wr_order_number->[ws_order_number,ws_order_number] ---------------------------PhysicalDistribute -----------------------------PhysicalProject -------------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) ---------------------------PhysicalDistribute -----------------------------PhysicalProject -------------------------------PhysicalOlapScan[web_returns] +----------------------------PhysicalDistribute +------------------------------PhysicalProject +--------------------------------filter((web_site.web_company_name = 'pri')) +----------------------------------PhysicalOlapScan[web_site] diff --git a/regression-test/data/nereids_tpch_shape_sf1000_p0/nostats_rf_prune/q21.out b/regression-test/data/nereids_tpch_shape_sf1000_p0/nostats_rf_prune/q21.out index b248bc83d64..d744f431889 100644 --- a/regression-test/data/nereids_tpch_shape_sf1000_p0/nostats_rf_prune/q21.out +++ b/regression-test/data/nereids_tpch_shape_sf1000_p0/nostats_rf_prune/q21.out @@ -8,27 +8,27 @@ PhysicalResultSink ----------PhysicalDistribute ------------hashAgg[LOCAL] --------------PhysicalProject -----------------hashJoin[INNER_JOIN] hashCondition=((supplier.s_nationkey = nation.n_nationkey)) otherCondition=() build RFs:RF3 n_nationkey->[s_nationkey] +----------------hashJoin[INNER_JOIN] hashCondition=((supplier.s_nationkey = nation.n_nationkey)) otherCondition=() build RFs:RF4 n_nationkey->[s_nationkey] ------------------PhysicalProject ---------------------hashJoin[INNER_JOIN] hashCondition=((supplier.s_suppkey = l1.l_suppkey)) otherCondition=() build RFs:RF2 s_suppkey->[l_suppkey] +--------------------hashJoin[INNER_JOIN] hashCondition=((supplier.s_suppkey = l1.l_suppkey)) otherCondition=() build RFs:RF3 s_suppkey->[l_suppkey] ----------------------PhysicalDistribute -------------------------hashJoin[INNER_JOIN] hashCondition=((orders.o_orderkey = l1.l_orderkey)) otherCondition=() build RFs:RF1 o_orderkey->[l_orderkey] ---------------------------hashJoin[RIGHT_SEMI_JOIN] hashCondition=((l2.l_orderkey = l1.l_orderkey)) otherCondition=(( not (l_suppkey = l_suppkey))) build RFs:RF0 l_orderkey->[l_orderkey] +------------------------hashJoin[INNER_JOIN] hashCondition=((orders.o_orderkey = l1.l_orderkey)) otherCondition=() build RFs:RF2 o_orderkey->[l_orderkey] +--------------------------hashJoin[RIGHT_SEMI_JOIN] hashCondition=((l2.l_orderkey = l1.l_orderkey)) otherCondition=(( not (l_suppkey = l_suppkey))) build RFs:RF1 l_orderkey->[l_orderkey] ----------------------------PhysicalProject -------------------------------PhysicalOlapScan[lineitem] apply RFs: RF0 -----------------------------hashJoin[LEFT_ANTI_JOIN] hashCondition=((l3.l_orderkey = l1.l_orderkey)) otherCondition=(( not (l_suppkey = l_suppkey))) -------------------------------PhysicalProject ---------------------------------filter((l1.l_receiptdate > l1.l_commitdate)) -----------------------------------PhysicalOlapScan[lineitem] apply RFs: RF1 RF2 +------------------------------PhysicalOlapScan[lineitem] apply RFs: RF1 +----------------------------hashJoin[RIGHT_ANTI_JOIN] hashCondition=((l3.l_orderkey = l1.l_orderkey)) otherCondition=(( not (l_suppkey = l_suppkey))) build RFs:RF0 l_orderkey->[l_orderkey] ------------------------------PhysicalProject --------------------------------filter((l3.l_receiptdate > l3.l_commitdate)) -----------------------------------PhysicalOlapScan[lineitem] +----------------------------------PhysicalOlapScan[lineitem] apply RFs: RF0 +------------------------------PhysicalProject +--------------------------------filter((l1.l_receiptdate > l1.l_commitdate)) +----------------------------------PhysicalOlapScan[lineitem] apply RFs: RF2 RF3 --------------------------PhysicalProject ----------------------------filter((orders.o_orderstatus = 'F')) ------------------------------PhysicalOlapScan[orders] ----------------------PhysicalDistribute ------------------------PhysicalProject ---------------------------PhysicalOlapScan[supplier] apply RFs: RF3 +--------------------------PhysicalOlapScan[supplier] apply RFs: RF4 ------------------PhysicalDistribute --------------------PhysicalProject ----------------------filter((nation.n_name = 'SAUDI ARABIA')) diff --git a/regression-test/data/nereids_tpch_shape_sf1000_p0/shape_no_stats/q21.out b/regression-test/data/nereids_tpch_shape_sf1000_p0/shape_no_stats/q21.out index b248bc83d64..d744f431889 100644 --- a/regression-test/data/nereids_tpch_shape_sf1000_p0/shape_no_stats/q21.out +++ b/regression-test/data/nereids_tpch_shape_sf1000_p0/shape_no_stats/q21.out @@ -8,27 +8,27 @@ PhysicalResultSink ----------PhysicalDistribute ------------hashAgg[LOCAL] --------------PhysicalProject -----------------hashJoin[INNER_JOIN] hashCondition=((supplier.s_nationkey = nation.n_nationkey)) otherCondition=() build RFs:RF3 n_nationkey->[s_nationkey] +----------------hashJoin[INNER_JOIN] hashCondition=((supplier.s_nationkey = nation.n_nationkey)) otherCondition=() build RFs:RF4 n_nationkey->[s_nationkey] ------------------PhysicalProject ---------------------hashJoin[INNER_JOIN] hashCondition=((supplier.s_suppkey = l1.l_suppkey)) otherCondition=() build RFs:RF2 s_suppkey->[l_suppkey] +--------------------hashJoin[INNER_JOIN] hashCondition=((supplier.s_suppkey = l1.l_suppkey)) otherCondition=() build RFs:RF3 s_suppkey->[l_suppkey] ----------------------PhysicalDistribute -------------------------hashJoin[INNER_JOIN] hashCondition=((orders.o_orderkey = l1.l_orderkey)) otherCondition=() build RFs:RF1 o_orderkey->[l_orderkey] ---------------------------hashJoin[RIGHT_SEMI_JOIN] hashCondition=((l2.l_orderkey = l1.l_orderkey)) otherCondition=(( not (l_suppkey = l_suppkey))) build RFs:RF0 l_orderkey->[l_orderkey] +------------------------hashJoin[INNER_JOIN] hashCondition=((orders.o_orderkey = l1.l_orderkey)) otherCondition=() build RFs:RF2 o_orderkey->[l_orderkey] +--------------------------hashJoin[RIGHT_SEMI_JOIN] hashCondition=((l2.l_orderkey = l1.l_orderkey)) otherCondition=(( not (l_suppkey = l_suppkey))) build RFs:RF1 l_orderkey->[l_orderkey] ----------------------------PhysicalProject -------------------------------PhysicalOlapScan[lineitem] apply RFs: RF0 -----------------------------hashJoin[LEFT_ANTI_JOIN] hashCondition=((l3.l_orderkey = l1.l_orderkey)) otherCondition=(( not (l_suppkey = l_suppkey))) -------------------------------PhysicalProject ---------------------------------filter((l1.l_receiptdate > l1.l_commitdate)) -----------------------------------PhysicalOlapScan[lineitem] apply RFs: RF1 RF2 +------------------------------PhysicalOlapScan[lineitem] apply RFs: RF1 +----------------------------hashJoin[RIGHT_ANTI_JOIN] hashCondition=((l3.l_orderkey = l1.l_orderkey)) otherCondition=(( not (l_suppkey = l_suppkey))) build RFs:RF0 l_orderkey->[l_orderkey] ------------------------------PhysicalProject --------------------------------filter((l3.l_receiptdate > l3.l_commitdate)) -----------------------------------PhysicalOlapScan[lineitem] +----------------------------------PhysicalOlapScan[lineitem] apply RFs: RF0 +------------------------------PhysicalProject +--------------------------------filter((l1.l_receiptdate > l1.l_commitdate)) +----------------------------------PhysicalOlapScan[lineitem] apply RFs: RF2 RF3 --------------------------PhysicalProject ----------------------------filter((orders.o_orderstatus = 'F')) ------------------------------PhysicalOlapScan[orders] ----------------------PhysicalDistribute ------------------------PhysicalProject ---------------------------PhysicalOlapScan[supplier] apply RFs: RF3 +--------------------------PhysicalOlapScan[supplier] apply RFs: RF4 ------------------PhysicalDistribute --------------------PhysicalProject ----------------------filter((nation.n_name = 'SAUDI ARABIA')) --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org