This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch branch-4.1
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-4.1 by this push:
new 3ebe05ea90b branch-4.1 [opt](nereids) adjust Left join cost
factor:probeShortcutFactor #60183 (#61475)
3ebe05ea90b is described below
commit 3ebe05ea90b0d902fb77094ba1844470e323f2f9
Author: minghong <[email protected]>
AuthorDate: Thu Mar 19 10:49:03 2026 +0800
branch-4.1 [opt](nereids) adjust Left join cost factor:probeShortcutFactor
#60183 (#61475)
### What problem does this PR solve?
pick #60183
Issue Number: close #xxx
Related PR: #xxx
Problem Summary:
### Release note
None
### Check List (For Author)
- Test <!-- At least one of them must be included. -->
- [ ] Regression test
- [ ] Unit Test
- [ ] Manual test (add detailed scripts or steps below)
- [ ] No need to test or manual test. Explain why:
- [ ] This is a refactor/code format and no logic has been changed.
- [ ] Previous test can cover this change.
- [ ] No code files have been changed.
- [ ] Other reason <!-- Add your reason? -->
- Behavior changed:
- [ ] No.
- [ ] Yes. <!-- Explain the behavior change -->
- Does this need documentation?
- [ ] No.
- [ ] Yes. <!-- Add document PR link here. eg:
https://github.com/apache/doris-website/pull/1214 -->
### Check List (For Reviewer who merge this PR)
- [ ] Confirm the release note
- [ ] Confirm test cases
- [ ] Confirm document
- [ ] Add branch pick label <!-- Add branch pick label that this PR
should merge into -->
---
.../org/apache/doris/nereids/cost/CostModel.java | 3 +-
.../data/shape_check/clickbench/query10.out | 6 +-
.../tpcds_sf100/constraints/query23.out | 12 +-
.../shape_check/tpcds_sf100/rf_prune/query14.out | 156 +++++++++++----------
.../shape_check/tpcds_sf100/rf_prune/query33.out | 34 ++---
.../shape_check/tpcds_sf100/rf_prune/query35.out | 38 ++---
.../data/shape_check/tpcds_sf100/shape/query14.out | 156 +++++++++++----------
.../data/shape_check/tpcds_sf100/shape/query33.out | 26 ++--
.../data/shape_check/tpcds_sf100/shape/query35.out | 42 +++---
.../data/shape_check/tpcds_sf1000/hint/query69.out | 34 ++---
.../shape_check/tpcds_sf1000/shape/query33.out | 26 ++--
.../shape_check/tpcds_sf1000/shape/query35.out | 44 +++---
.../shape_check/tpcds_sf1000/shape/query69.out | 34 ++---
.../check_point/probeShortcutFactor.out | 10 ++
.../data/shape_check/tpch_sf1000/rf_prune/q22.out | 20 +--
.../data/shape_check/tpch_sf1000/shape/q22.out | 20 +--
.../check_point/probeShortcutFactor.groovy | 44 ++++++
17 files changed, 390 insertions(+), 315 deletions(-)
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/cost/CostModel.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/cost/CostModel.java
index b113a3d1cc6..e942e71d064 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/cost/CostModel.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/cost/CostModel.java
@@ -453,7 +453,8 @@ class CostModel extends PlanVisitor<Cost, PlanContext> {
);
}
double probeShortcutFactor = 1.0;
- if (ConnectContext.get() != null &&
ConnectContext.get().getStatementContext() != null
+ if (rightRowCount < 10 * leftRowCount
+ && ConnectContext.get() != null &&
ConnectContext.get().getStatementContext() != null
&&
!ConnectContext.get().getStatementContext().isHasUnknownColStats()
&& physicalHashJoin.getJoinType().isLeftSemiOrAntiJoin()
&& physicalHashJoin.getOtherJoinConjuncts().isEmpty()
diff --git a/regression-test/data/shape_check/clickbench/query10.out
b/regression-test/data/shape_check/clickbench/query10.out
index ae9174ce1c1..36122c8cbf0 100644
--- a/regression-test/data/shape_check/clickbench/query10.out
+++ b/regression-test/data/shape_check/clickbench/query10.out
@@ -5,9 +5,9 @@ PhysicalResultSink
----PhysicalDistribute[DistributionSpecGather]
------PhysicalTopN[LOCAL_SORT]
--------hashAgg[DISTINCT_GLOBAL]
-----------PhysicalDistribute[DistributionSpecHash]
-------------hashAgg[DISTINCT_LOCAL]
---------------hashAgg[GLOBAL]
+----------hashAgg[GLOBAL]
+------------PhysicalDistribute[DistributionSpecHash]
+--------------hashAgg[LOCAL]
----------------PhysicalProject
------------------PhysicalOlapScan[hits]
diff --git
a/regression-test/data/shape_check/tpcds_sf100/constraints/query23.out
b/regression-test/data/shape_check/tpcds_sf100/constraints/query23.out
index 9aac34d3e02..17c6c0e8b7a 100644
--- a/regression-test/data/shape_check/tpcds_sf100/constraints/query23.out
+++ b/regression-test/data/shape_check/tpcds_sf100/constraints/query23.out
@@ -53,29 +53,29 @@ PhysicalCteAnchor ( cteId=CTEId#0 )
--------------hashAgg[LOCAL]
----------------PhysicalUnion
------------------PhysicalProject
---------------------hashJoin[LEFT_SEMI_JOIN shuffle]
hashCondition=((catalog_sales.cs_item_sk = frequent_ss_items.item_sk))
otherCondition=() build RFs:RF5 item_sk->[cs_item_sk]
+--------------------hashJoin[RIGHT_SEMI_JOIN shuffle]
hashCondition=((catalog_sales.cs_item_sk = frequent_ss_items.item_sk))
otherCondition=() build RFs:RF5 cs_item_sk->[item_sk]
+----------------------PhysicalCteConsumer ( cteId=CTEId#0 ) apply RFs: RF5
----------------------PhysicalProject
------------------------hashJoin[LEFT_SEMI_JOIN broadcast]
hashCondition=((catalog_sales.cs_bill_customer_sk =
best_ss_customer.c_customer_sk)) otherCondition=() build RFs:RF4
c_customer_sk->[cs_bill_customer_sk]
--------------------------PhysicalProject
----------------------------hashJoin[INNER_JOIN broadcast]
hashCondition=((catalog_sales.cs_sold_date_sk = date_dim.d_date_sk))
otherCondition=() build RFs:RF3 d_date_sk->[cs_sold_date_sk]
------------------------------PhysicalProject
---------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF3
RF4 RF5
+--------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF3
RF4
------------------------------PhysicalProject
--------------------------------filter((date_dim.d_moy = 5) and
(date_dim.d_year = 2000))
----------------------------------PhysicalOlapScan[date_dim]
--------------------------PhysicalCteConsumer ( cteId=CTEId#2 )
-----------------------PhysicalCteConsumer ( cteId=CTEId#0 )
------------------PhysicalProject
---------------------hashJoin[LEFT_SEMI_JOIN shuffle]
hashCondition=((web_sales.ws_item_sk = frequent_ss_items.item_sk))
otherCondition=() build RFs:RF8 item_sk->[ws_item_sk]
+--------------------hashJoin[RIGHT_SEMI_JOIN shuffle]
hashCondition=((web_sales.ws_item_sk = frequent_ss_items.item_sk))
otherCondition=() build RFs:RF8 ws_item_sk->[item_sk]
+----------------------PhysicalCteConsumer ( cteId=CTEId#0 ) apply RFs: RF8
----------------------PhysicalProject
------------------------hashJoin[LEFT_SEMI_JOIN broadcast]
hashCondition=((web_sales.ws_bill_customer_sk =
best_ss_customer.c_customer_sk)) otherCondition=() build RFs:RF7
c_customer_sk->[ws_bill_customer_sk]
--------------------------PhysicalProject
----------------------------hashJoin[INNER_JOIN broadcast]
hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk))
otherCondition=() build RFs:RF6 d_date_sk->[ws_sold_date_sk]
------------------------------PhysicalProject
---------------------------------PhysicalOlapScan[web_sales] apply RFs: RF6 RF7
RF8
+--------------------------------PhysicalOlapScan[web_sales] apply RFs: RF6 RF7
------------------------------PhysicalProject
--------------------------------filter((date_dim.d_moy = 5) and
(date_dim.d_year = 2000))
----------------------------------PhysicalOlapScan[date_dim]
--------------------------PhysicalCteConsumer ( cteId=CTEId#2 )
-----------------------PhysicalCteConsumer ( cteId=CTEId#0 )
diff --git a/regression-test/data/shape_check/tpcds_sf100/rf_prune/query14.out
b/regression-test/data/shape_check/tpcds_sf100/rf_prune/query14.out
index 32fdf4de9c3..2a369f53078 100644
--- a/regression-test/data/shape_check/tpcds_sf100/rf_prune/query14.out
+++ b/regression-test/data/shape_check/tpcds_sf100/rf_prune/query14.out
@@ -69,80 +69,90 @@ PhysicalCteAnchor ( cteId=CTEId#0 )
----------------PhysicalProject
------------------filter((date_dim.d_year <= 2002) and (date_dim.d_year >=
2000))
--------------------PhysicalOlapScan[date_dim]
-----PhysicalResultSink
-------PhysicalTopN[MERGE_SORT]
---------PhysicalDistribute[DistributionSpecGather]
-----------PhysicalTopN[LOCAL_SORT]
-------------PhysicalProject
---------------hashAgg[GLOBAL]
-----------------PhysicalDistribute[DistributionSpecHash]
-------------------hashAgg[LOCAL]
---------------------PhysicalRepeat
-----------------------PhysicalUnion
-------------------------PhysicalProject
---------------------------NestedLoopJoin[INNER_JOIN](cast(sales as
DECIMALV3(38, 4)) > avg_sales.average_sales)
+----PhysicalCteAnchor ( cteId=CTEId#4 )
+------PhysicalCteProducer ( cteId=CTEId#4 )
+--------hashAgg[GLOBAL]
+----------PhysicalDistribute[DistributionSpecHash]
+------------hashAgg[LOCAL]
+--------------PhysicalUnion
+----------------PhysicalProject
+------------------NestedLoopJoin[INNER_JOIN](cast(sales as DECIMALV3(38, 4)) >
avg_sales.average_sales)
+--------------------PhysicalProject
+----------------------hashAgg[GLOBAL]
+------------------------PhysicalDistribute[DistributionSpecHash]
+--------------------------hashAgg[LOCAL]
----------------------------PhysicalProject
-------------------------------hashAgg[GLOBAL]
---------------------------------PhysicalDistribute[DistributionSpecHash]
-----------------------------------hashAgg[LOCAL]
-------------------------------------PhysicalProject
---------------------------------------hashJoin[INNER_JOIN broadcast]
hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=()
-----------------------------------------hashJoin[LEFT_SEMI_JOIN broadcast]
hashCondition=((store_sales.ss_item_sk = cross_items.ss_item_sk))
otherCondition=()
-------------------------------------------PhysicalProject
---------------------------------------------hashJoin[INNER_JOIN broadcast]
hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk))
otherCondition=() build RFs:RF10 d_date_sk->[ss_sold_date_sk]
-----------------------------------------------PhysicalProject
-------------------------------------------------PhysicalOlapScan[store_sales]
apply RFs: RF10
-----------------------------------------------PhysicalProject
-------------------------------------------------filter((date_dim.d_moy = 11)
and (date_dim.d_year = 2002))
---------------------------------------------------PhysicalOlapScan[date_dim]
-------------------------------------------PhysicalCteConsumer ( cteId=CTEId#0 )
-----------------------------------------PhysicalProject
-------------------------------------------PhysicalOlapScan[item]
-----------------------------PhysicalAssertNumRows
-------------------------------PhysicalDistribute[DistributionSpecGather]
---------------------------------PhysicalCteConsumer ( cteId=CTEId#1 )
-------------------------PhysicalProject
---------------------------NestedLoopJoin[INNER_JOIN](cast(sales as
DECIMALV3(38, 4)) > avg_sales.average_sales)
+------------------------------hashJoin[INNER_JOIN broadcast]
hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=()
+--------------------------------hashJoin[LEFT_SEMI_JOIN broadcast]
hashCondition=((store_sales.ss_item_sk = cross_items.ss_item_sk))
otherCondition=()
+----------------------------------PhysicalProject
+------------------------------------hashJoin[INNER_JOIN broadcast]
hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk))
otherCondition=() build RFs:RF10 d_date_sk->[ss_sold_date_sk]
+--------------------------------------PhysicalProject
+----------------------------------------PhysicalOlapScan[store_sales] apply
RFs: RF10
+--------------------------------------PhysicalProject
+----------------------------------------filter((date_dim.d_moy = 11) and
(date_dim.d_year = 2002))
+------------------------------------------PhysicalOlapScan[date_dim]
+----------------------------------PhysicalCteConsumer ( cteId=CTEId#0 )
+--------------------------------PhysicalProject
+----------------------------------PhysicalOlapScan[item]
+--------------------PhysicalAssertNumRows
+----------------------PhysicalDistribute[DistributionSpecGather]
+------------------------PhysicalCteConsumer ( cteId=CTEId#1 )
+----------------PhysicalProject
+------------------NestedLoopJoin[INNER_JOIN](cast(sales as DECIMALV3(38, 4)) >
avg_sales.average_sales)
+--------------------PhysicalProject
+----------------------hashAgg[GLOBAL]
+------------------------PhysicalDistribute[DistributionSpecHash]
+--------------------------hashAgg[LOCAL]
----------------------------PhysicalProject
-------------------------------hashAgg[GLOBAL]
---------------------------------PhysicalDistribute[DistributionSpecHash]
-----------------------------------hashAgg[LOCAL]
-------------------------------------PhysicalProject
---------------------------------------hashJoin[INNER_JOIN bucketShuffle]
hashCondition=((catalog_sales.cs_item_sk = item.i_item_sk)) otherCondition=()
-----------------------------------------hashJoin[LEFT_SEMI_JOIN shuffle]
hashCondition=((catalog_sales.cs_item_sk = cross_items.ss_item_sk))
otherCondition=()
-------------------------------------------PhysicalProject
---------------------------------------------hashJoin[INNER_JOIN broadcast]
hashCondition=((catalog_sales.cs_sold_date_sk = date_dim.d_date_sk))
otherCondition=() build RFs:RF13 d_date_sk->[cs_sold_date_sk]
-----------------------------------------------PhysicalProject
-------------------------------------------------PhysicalOlapScan[catalog_sales]
apply RFs: RF13
-----------------------------------------------PhysicalProject
-------------------------------------------------filter((date_dim.d_moy = 11)
and (date_dim.d_year = 2002))
---------------------------------------------------PhysicalOlapScan[date_dim]
-------------------------------------------PhysicalCteConsumer ( cteId=CTEId#0 )
-----------------------------------------PhysicalProject
-------------------------------------------PhysicalOlapScan[item]
-----------------------------PhysicalAssertNumRows
-------------------------------PhysicalDistribute[DistributionSpecGather]
---------------------------------PhysicalCteConsumer ( cteId=CTEId#1 )
-------------------------PhysicalProject
---------------------------NestedLoopJoin[INNER_JOIN](cast(sales as
DECIMALV3(38, 4)) > avg_sales.average_sales)
+------------------------------hashJoin[INNER_JOIN bucketShuffle]
hashCondition=((catalog_sales.cs_item_sk = item.i_item_sk)) otherCondition=()
+--------------------------------hashJoin[LEFT_SEMI_JOIN shuffle]
hashCondition=((catalog_sales.cs_item_sk = cross_items.ss_item_sk))
otherCondition=()
+----------------------------------PhysicalProject
+------------------------------------hashJoin[INNER_JOIN broadcast]
hashCondition=((catalog_sales.cs_sold_date_sk = date_dim.d_date_sk))
otherCondition=() build RFs:RF13 d_date_sk->[cs_sold_date_sk]
+--------------------------------------PhysicalProject
+----------------------------------------PhysicalOlapScan[catalog_sales] apply
RFs: RF13
+--------------------------------------PhysicalProject
+----------------------------------------filter((date_dim.d_moy = 11) and
(date_dim.d_year = 2002))
+------------------------------------------PhysicalOlapScan[date_dim]
+----------------------------------PhysicalCteConsumer ( cteId=CTEId#0 )
+--------------------------------PhysicalProject
+----------------------------------PhysicalOlapScan[item]
+--------------------PhysicalAssertNumRows
+----------------------PhysicalDistribute[DistributionSpecGather]
+------------------------PhysicalCteConsumer ( cteId=CTEId#1 )
+----------------PhysicalProject
+------------------NestedLoopJoin[INNER_JOIN](cast(sales as DECIMALV3(38, 4)) >
avg_sales.average_sales)
+--------------------PhysicalProject
+----------------------hashAgg[GLOBAL]
+------------------------PhysicalDistribute[DistributionSpecHash]
+--------------------------hashAgg[LOCAL]
----------------------------PhysicalProject
-------------------------------hashAgg[GLOBAL]
---------------------------------PhysicalDistribute[DistributionSpecHash]
-----------------------------------hashAgg[LOCAL]
-------------------------------------PhysicalProject
---------------------------------------hashJoin[INNER_JOIN bucketShuffle]
hashCondition=((web_sales.ws_item_sk = item.i_item_sk)) otherCondition=()
-----------------------------------------hashJoin[LEFT_SEMI_JOIN shuffle]
hashCondition=((web_sales.ws_item_sk = cross_items.ss_item_sk))
otherCondition=()
-------------------------------------------PhysicalProject
---------------------------------------------hashJoin[INNER_JOIN broadcast]
hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk))
otherCondition=() build RFs:RF16 d_date_sk->[ws_sold_date_sk]
-----------------------------------------------PhysicalProject
-------------------------------------------------PhysicalOlapScan[web_sales]
apply RFs: RF16
-----------------------------------------------PhysicalProject
-------------------------------------------------filter((date_dim.d_moy = 11)
and (date_dim.d_year = 2002))
---------------------------------------------------PhysicalOlapScan[date_dim]
-------------------------------------------PhysicalCteConsumer ( cteId=CTEId#0 )
-----------------------------------------PhysicalProject
-------------------------------------------PhysicalOlapScan[item]
-----------------------------PhysicalAssertNumRows
-------------------------------PhysicalDistribute[DistributionSpecGather]
---------------------------------PhysicalCteConsumer ( cteId=CTEId#1 )
+------------------------------hashJoin[INNER_JOIN bucketShuffle]
hashCondition=((web_sales.ws_item_sk = item.i_item_sk)) otherCondition=()
+--------------------------------hashJoin[LEFT_SEMI_JOIN shuffle]
hashCondition=((web_sales.ws_item_sk = cross_items.ss_item_sk))
otherCondition=()
+----------------------------------PhysicalProject
+------------------------------------hashJoin[INNER_JOIN broadcast]
hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk))
otherCondition=() build RFs:RF16 d_date_sk->[ws_sold_date_sk]
+--------------------------------------PhysicalProject
+----------------------------------------PhysicalOlapScan[web_sales] apply RFs:
RF16
+--------------------------------------PhysicalProject
+----------------------------------------filter((date_dim.d_moy = 11) and
(date_dim.d_year = 2002))
+------------------------------------------PhysicalOlapScan[date_dim]
+----------------------------------PhysicalCteConsumer ( cteId=CTEId#0 )
+--------------------------------PhysicalProject
+----------------------------------PhysicalOlapScan[item]
+--------------------PhysicalAssertNumRows
+----------------------PhysicalDistribute[DistributionSpecGather]
+------------------------PhysicalCteConsumer ( cteId=CTEId#1 )
+------PhysicalResultSink
+--------PhysicalTopN[MERGE_SORT]
+----------PhysicalDistribute[DistributionSpecGather]
+------------PhysicalTopN[LOCAL_SORT]
+--------------PhysicalUnion
+----------------PhysicalProject
+------------------hashAgg[GLOBAL]
+--------------------PhysicalDistribute[DistributionSpecHash]
+----------------------hashAgg[LOCAL]
+------------------------PhysicalRepeat
+--------------------------PhysicalDistribute[DistributionSpecExecutionAny]
+----------------------------PhysicalCteConsumer ( cteId=CTEId#4 )
+----------------PhysicalDistribute[DistributionSpecExecutionAny]
+------------------PhysicalCteConsumer ( cteId=CTEId#4 )
diff --git a/regression-test/data/shape_check/tpcds_sf100/rf_prune/query33.out
b/regression-test/data/shape_check/tpcds_sf100/rf_prune/query33.out
index 514fc6cf682..a09b5b4ae49 100644
--- a/regression-test/data/shape_check/tpcds_sf100/rf_prune/query33.out
+++ b/regression-test/data/shape_check/tpcds_sf100/rf_prune/query33.out
@@ -9,18 +9,21 @@ PhysicalResultSink
------------hashAgg[LOCAL]
--------------PhysicalUnion
----------------PhysicalProject
-------------------hashJoin[LEFT_SEMI_JOIN broadcast]
hashCondition=((item.i_manufact_id = item.i_manufact_id)) otherCondition=()
build RFs:RF3 i_manufact_id->[i_manufact_id]
+------------------hashJoin[RIGHT_SEMI_JOIN shuffleBucket]
hashCondition=((item.i_manufact_id = item.i_manufact_id)) otherCondition=()
build RFs:RF3 i_manufact_id->[i_manufact_id]
+--------------------PhysicalProject
+----------------------filter((item.i_category = 'Home'))
+------------------------PhysicalOlapScan[item] apply RFs: RF3
--------------------hashAgg[GLOBAL]
----------------------PhysicalDistribute[DistributionSpecHash]
------------------------hashAgg[LOCAL]
--------------------------PhysicalProject
-----------------------------hashJoin[INNER_JOIN shuffle]
hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=()
build RFs:RF2 i_item_sk->[ss_item_sk]
+----------------------------hashJoin[INNER_JOIN shuffle]
hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=()
------------------------------PhysicalProject
--------------------------------hashJoin[INNER_JOIN broadcast]
hashCondition=((store_sales.ss_addr_sk = customer_address.ca_address_sk))
otherCondition=() build RFs:RF1 ca_address_sk->[ss_addr_sk]
----------------------------------PhysicalProject
------------------------------------hashJoin[INNER_JOIN broadcast]
hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk))
otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk]
--------------------------------------PhysicalProject
-----------------------------------------PhysicalOlapScan[store_sales] apply
RFs: RF0 RF1 RF2
+----------------------------------------PhysicalOlapScan[store_sales] apply
RFs: RF0 RF1
--------------------------------------PhysicalProject
----------------------------------------filter((date_dim.d_moy = 1) and
(date_dim.d_year = 2002))
------------------------------------------PhysicalOlapScan[date_dim]
@@ -28,23 +31,23 @@ PhysicalResultSink
------------------------------------filter((customer_address.ca_gmt_offset =
-5.00))
--------------------------------------PhysicalOlapScan[customer_address]
------------------------------PhysicalProject
---------------------------------PhysicalOlapScan[item] apply RFs: RF3
+--------------------------------PhysicalOlapScan[item]
+----------------PhysicalProject
+------------------hashJoin[RIGHT_SEMI_JOIN shuffleBucket]
hashCondition=((item.i_manufact_id = item.i_manufact_id)) otherCondition=()
build RFs:RF7 i_manufact_id->[i_manufact_id]
--------------------PhysicalProject
----------------------filter((item.i_category = 'Home'))
-------------------------PhysicalOlapScan[item]
-----------------PhysicalProject
-------------------hashJoin[LEFT_SEMI_JOIN broadcast]
hashCondition=((item.i_manufact_id = item.i_manufact_id)) otherCondition=()
build RFs:RF7 i_manufact_id->[i_manufact_id]
+------------------------PhysicalOlapScan[item] apply RFs: RF7
--------------------hashAgg[GLOBAL]
----------------------PhysicalDistribute[DistributionSpecHash]
------------------------hashAgg[LOCAL]
--------------------------PhysicalProject
-----------------------------hashJoin[INNER_JOIN shuffle]
hashCondition=((catalog_sales.cs_item_sk = item.i_item_sk)) otherCondition=()
build RFs:RF6 i_item_sk->[cs_item_sk]
+----------------------------hashJoin[INNER_JOIN shuffle]
hashCondition=((catalog_sales.cs_item_sk = item.i_item_sk)) otherCondition=()
------------------------------PhysicalProject
--------------------------------hashJoin[INNER_JOIN broadcast]
hashCondition=((catalog_sales.cs_bill_addr_sk =
customer_address.ca_address_sk)) otherCondition=() build RFs:RF5
ca_address_sk->[cs_bill_addr_sk]
----------------------------------PhysicalProject
------------------------------------hashJoin[INNER_JOIN broadcast]
hashCondition=((catalog_sales.cs_sold_date_sk = date_dim.d_date_sk))
otherCondition=() build RFs:RF4 d_date_sk->[cs_sold_date_sk]
--------------------------------------PhysicalProject
-----------------------------------------PhysicalOlapScan[catalog_sales] apply
RFs: RF4 RF5 RF6
+----------------------------------------PhysicalOlapScan[catalog_sales] apply
RFs: RF4 RF5
--------------------------------------PhysicalProject
----------------------------------------filter((date_dim.d_moy = 1) and
(date_dim.d_year = 2002))
------------------------------------------PhysicalOlapScan[date_dim]
@@ -52,19 +55,19 @@ PhysicalResultSink
------------------------------------filter((customer_address.ca_gmt_offset =
-5.00))
--------------------------------------PhysicalOlapScan[customer_address]
------------------------------PhysicalProject
---------------------------------PhysicalOlapScan[item] apply RFs: RF7
+--------------------------------PhysicalOlapScan[item]
+----------------PhysicalProject
+------------------hashJoin[RIGHT_SEMI_JOIN shuffleBucket]
hashCondition=((item.i_manufact_id = item.i_manufact_id)) otherCondition=()
build RFs:RF11 i_manufact_id->[i_manufact_id]
--------------------PhysicalProject
----------------------filter((item.i_category = 'Home'))
-------------------------PhysicalOlapScan[item]
-----------------PhysicalProject
-------------------hashJoin[LEFT_SEMI_JOIN broadcast]
hashCondition=((item.i_manufact_id = item.i_manufact_id)) otherCondition=()
build RFs:RF11 i_manufact_id->[i_manufact_id]
+------------------------PhysicalOlapScan[item] apply RFs: RF11
--------------------hashAgg[GLOBAL]
----------------------PhysicalDistribute[DistributionSpecHash]
------------------------hashAgg[LOCAL]
--------------------------PhysicalProject
----------------------------hashJoin[INNER_JOIN shuffle]
hashCondition=((web_sales.ws_item_sk = item.i_item_sk)) otherCondition=() build
RFs:RF10 ws_item_sk->[i_item_sk]
------------------------------PhysicalProject
---------------------------------PhysicalOlapScan[item] apply RFs: RF10 RF11
+--------------------------------PhysicalOlapScan[item] apply RFs: RF10
------------------------------PhysicalProject
--------------------------------hashJoin[INNER_JOIN broadcast]
hashCondition=((web_sales.ws_bill_addr_sk = customer_address.ca_address_sk))
otherCondition=() build RFs:RF9 ca_address_sk->[ws_bill_addr_sk]
----------------------------------PhysicalProject
@@ -77,7 +80,4 @@ PhysicalResultSink
----------------------------------PhysicalProject
------------------------------------filter((customer_address.ca_gmt_offset =
-5.00))
--------------------------------------PhysicalOlapScan[customer_address]
---------------------PhysicalProject
-----------------------filter((item.i_category = 'Home'))
-------------------------PhysicalOlapScan[item]
diff --git a/regression-test/data/shape_check/tpcds_sf100/rf_prune/query35.out
b/regression-test/data/shape_check/tpcds_sf100/rf_prune/query35.out
index dea7b62c380..243129c49d4 100644
--- a/regression-test/data/shape_check/tpcds_sf100/rf_prune/query35.out
+++ b/regression-test/data/shape_check/tpcds_sf100/rf_prune/query35.out
@@ -10,38 +10,38 @@ PhysicalResultSink
--------------hashAgg[LOCAL]
----------------PhysicalProject
------------------filter(OR[ifnull($c$1, FALSE),ifnull($c$2, FALSE)])
---------------------hashJoin[LEFT_SEMI_JOIN bucketShuffle]
hashCondition=((c.c_customer_sk = catalog_sales.cs_ship_customer_sk))
otherCondition=()
+--------------------hashJoin[RIGHT_SEMI_JOIN shuffleBucket]
hashCondition=((c.c_customer_sk = catalog_sales.cs_ship_customer_sk))
otherCondition=()
+----------------------PhysicalProject
+------------------------hashJoin[INNER_JOIN broadcast]
hashCondition=((catalog_sales.cs_sold_date_sk = date_dim.d_date_sk))
otherCondition=() build RFs:RF5 d_date_sk->[cs_sold_date_sk]
+--------------------------PhysicalProject
+----------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF5
+--------------------------PhysicalProject
+----------------------------filter((date_dim.d_qoy < 4) and (date_dim.d_year =
2001))
+------------------------------PhysicalOlapScan[date_dim]
----------------------hashJoin[LEFT_SEMI_JOIN bucketShuffle]
hashCondition=((c.c_customer_sk = web_sales.ws_bill_customer_sk))
otherCondition=()
-------------------------hashJoin[LEFT_SEMI_JOIN shuffle]
hashCondition=((c.c_customer_sk = store_sales.ss_customer_sk))
otherCondition=() build RFs:RF5 ss_customer_sk->[c_customer_sk]
+------------------------hashJoin[RIGHT_SEMI_JOIN shuffle]
hashCondition=((c.c_customer_sk = store_sales.ss_customer_sk)) otherCondition=()
+--------------------------PhysicalProject
+----------------------------hashJoin[INNER_JOIN broadcast]
hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk))
otherCondition=() build RFs:RF3 d_date_sk->[ss_sold_date_sk]
+------------------------------PhysicalProject
+--------------------------------PhysicalOlapScan[store_sales] apply RFs: RF3
+------------------------------PhysicalProject
+--------------------------------filter((date_dim.d_qoy < 4) and
(date_dim.d_year = 2001))
+----------------------------------PhysicalOlapScan[date_dim]
--------------------------PhysicalProject
----------------------------hashJoin[INNER_JOIN shuffle]
hashCondition=((customer_demographics.cd_demo_sk = c.c_current_cdemo_sk))
otherCondition=()
------------------------------PhysicalProject
--------------------------------hashJoin[INNER_JOIN shuffle]
hashCondition=((c.c_current_addr_sk = ca.ca_address_sk)) otherCondition=()
----------------------------------PhysicalProject
-------------------------------------PhysicalOlapScan[customer] apply RFs: RF5
+------------------------------------PhysicalOlapScan[customer]
----------------------------------PhysicalProject
------------------------------------PhysicalOlapScan[customer_address]
------------------------------PhysicalProject
--------------------------------PhysicalOlapScan[customer_demographics]
---------------------------PhysicalProject
-----------------------------hashJoin[INNER_JOIN broadcast]
hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk))
otherCondition=() build RFs:RF2 d_date_sk->[ss_sold_date_sk]
-------------------------------PhysicalProject
---------------------------------PhysicalOlapScan[store_sales] apply RFs: RF2
-------------------------------PhysicalProject
---------------------------------filter((date_dim.d_qoy < 4) and
(date_dim.d_year = 2001))
-----------------------------------PhysicalOlapScan[date_dim]
------------------------PhysicalProject
---------------------------hashJoin[INNER_JOIN broadcast]
hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk))
otherCondition=() build RFs:RF1 d_date_sk->[ws_sold_date_sk]
+--------------------------hashJoin[INNER_JOIN broadcast]
hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk))
otherCondition=() build RFs:RF0 d_date_sk->[ws_sold_date_sk]
----------------------------PhysicalProject
-------------------------------PhysicalOlapScan[web_sales] apply RFs: RF1
+------------------------------PhysicalOlapScan[web_sales] apply RFs: RF0
----------------------------PhysicalProject
------------------------------filter((date_dim.d_qoy < 4) and (date_dim.d_year
= 2001))
--------------------------------PhysicalOlapScan[date_dim]
-----------------------PhysicalProject
-------------------------hashJoin[INNER_JOIN broadcast]
hashCondition=((catalog_sales.cs_sold_date_sk = date_dim.d_date_sk))
otherCondition=() build RFs:RF0 d_date_sk->[cs_sold_date_sk]
---------------------------PhysicalProject
-----------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF0
---------------------------PhysicalProject
-----------------------------filter((date_dim.d_qoy < 4) and (date_dim.d_year =
2001))
-------------------------------PhysicalOlapScan[date_dim]
diff --git a/regression-test/data/shape_check/tpcds_sf100/shape/query14.out
b/regression-test/data/shape_check/tpcds_sf100/shape/query14.out
index 9655664e700..517eb87bea6 100644
--- a/regression-test/data/shape_check/tpcds_sf100/shape/query14.out
+++ b/regression-test/data/shape_check/tpcds_sf100/shape/query14.out
@@ -69,80 +69,90 @@ PhysicalCteAnchor ( cteId=CTEId#0 )
----------------PhysicalProject
------------------filter((date_dim.d_year <= 2002) and (date_dim.d_year >=
2000))
--------------------PhysicalOlapScan[date_dim]
-----PhysicalResultSink
-------PhysicalTopN[MERGE_SORT]
---------PhysicalDistribute[DistributionSpecGather]
-----------PhysicalTopN[LOCAL_SORT]
-------------PhysicalProject
---------------hashAgg[GLOBAL]
-----------------PhysicalDistribute[DistributionSpecHash]
-------------------hashAgg[LOCAL]
---------------------PhysicalRepeat
-----------------------PhysicalUnion
-------------------------PhysicalProject
---------------------------NestedLoopJoin[INNER_JOIN](cast(sales as
DECIMALV3(38, 4)) > avg_sales.average_sales)
+----PhysicalCteAnchor ( cteId=CTEId#4 )
+------PhysicalCteProducer ( cteId=CTEId#4 )
+--------hashAgg[GLOBAL]
+----------PhysicalDistribute[DistributionSpecHash]
+------------hashAgg[LOCAL]
+--------------PhysicalUnion
+----------------PhysicalProject
+------------------NestedLoopJoin[INNER_JOIN](cast(sales as DECIMALV3(38, 4)) >
avg_sales.average_sales)
+--------------------PhysicalProject
+----------------------hashAgg[GLOBAL]
+------------------------PhysicalDistribute[DistributionSpecHash]
+--------------------------hashAgg[LOCAL]
----------------------------PhysicalProject
-------------------------------hashAgg[GLOBAL]
---------------------------------PhysicalDistribute[DistributionSpecHash]
-----------------------------------hashAgg[LOCAL]
-------------------------------------PhysicalProject
---------------------------------------hashJoin[INNER_JOIN broadcast]
hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=()
build RFs:RF12 i_item_sk->[ss_item_sk,ss_item_sk]
-----------------------------------------hashJoin[LEFT_SEMI_JOIN broadcast]
hashCondition=((store_sales.ss_item_sk = cross_items.ss_item_sk))
otherCondition=() build RFs:RF11 ss_item_sk->[ss_item_sk]
-------------------------------------------PhysicalProject
---------------------------------------------hashJoin[INNER_JOIN broadcast]
hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk))
otherCondition=() build RFs:RF10 d_date_sk->[ss_sold_date_sk]
-----------------------------------------------PhysicalProject
-------------------------------------------------PhysicalOlapScan[store_sales]
apply RFs: RF10 RF11 RF12
-----------------------------------------------PhysicalProject
-------------------------------------------------filter((date_dim.d_moy = 11)
and (date_dim.d_year = 2002))
---------------------------------------------------PhysicalOlapScan[date_dim]
-------------------------------------------PhysicalCteConsumer ( cteId=CTEId#0
) apply RFs: RF12
-----------------------------------------PhysicalProject
-------------------------------------------PhysicalOlapScan[item]
-----------------------------PhysicalAssertNumRows
-------------------------------PhysicalDistribute[DistributionSpecGather]
---------------------------------PhysicalCteConsumer ( cteId=CTEId#1 )
-------------------------PhysicalProject
---------------------------NestedLoopJoin[INNER_JOIN](cast(sales as
DECIMALV3(38, 4)) > avg_sales.average_sales)
+------------------------------hashJoin[INNER_JOIN broadcast]
hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=()
build RFs:RF12 i_item_sk->[ss_item_sk,ss_item_sk]
+--------------------------------hashJoin[LEFT_SEMI_JOIN broadcast]
hashCondition=((store_sales.ss_item_sk = cross_items.ss_item_sk))
otherCondition=() build RFs:RF11 ss_item_sk->[ss_item_sk]
+----------------------------------PhysicalProject
+------------------------------------hashJoin[INNER_JOIN broadcast]
hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk))
otherCondition=() build RFs:RF10 d_date_sk->[ss_sold_date_sk]
+--------------------------------------PhysicalProject
+----------------------------------------PhysicalOlapScan[store_sales] apply
RFs: RF10 RF11 RF12
+--------------------------------------PhysicalProject
+----------------------------------------filter((date_dim.d_moy = 11) and
(date_dim.d_year = 2002))
+------------------------------------------PhysicalOlapScan[date_dim]
+----------------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) apply
RFs: RF12
+--------------------------------PhysicalProject
+----------------------------------PhysicalOlapScan[item]
+--------------------PhysicalAssertNumRows
+----------------------PhysicalDistribute[DistributionSpecGather]
+------------------------PhysicalCteConsumer ( cteId=CTEId#1 )
+----------------PhysicalProject
+------------------NestedLoopJoin[INNER_JOIN](cast(sales as DECIMALV3(38, 4)) >
avg_sales.average_sales)
+--------------------PhysicalProject
+----------------------hashAgg[GLOBAL]
+------------------------PhysicalDistribute[DistributionSpecHash]
+--------------------------hashAgg[LOCAL]
----------------------------PhysicalProject
-------------------------------hashAgg[GLOBAL]
---------------------------------PhysicalDistribute[DistributionSpecHash]
-----------------------------------hashAgg[LOCAL]
-------------------------------------PhysicalProject
---------------------------------------hashJoin[INNER_JOIN bucketShuffle]
hashCondition=((catalog_sales.cs_item_sk = item.i_item_sk)) otherCondition=()
build RFs:RF15 i_item_sk->[cs_item_sk,ss_item_sk]
-----------------------------------------hashJoin[LEFT_SEMI_JOIN shuffle]
hashCondition=((catalog_sales.cs_item_sk = cross_items.ss_item_sk))
otherCondition=() build RFs:RF14 ss_item_sk->[cs_item_sk]
-------------------------------------------PhysicalProject
---------------------------------------------hashJoin[INNER_JOIN broadcast]
hashCondition=((catalog_sales.cs_sold_date_sk = date_dim.d_date_sk))
otherCondition=() build RFs:RF13 d_date_sk->[cs_sold_date_sk]
-----------------------------------------------PhysicalProject
-------------------------------------------------PhysicalOlapScan[catalog_sales]
apply RFs: RF13 RF14 RF15
-----------------------------------------------PhysicalProject
-------------------------------------------------filter((date_dim.d_moy = 11)
and (date_dim.d_year = 2002))
---------------------------------------------------PhysicalOlapScan[date_dim]
-------------------------------------------PhysicalCteConsumer ( cteId=CTEId#0
) apply RFs: RF15
-----------------------------------------PhysicalProject
-------------------------------------------PhysicalOlapScan[item]
-----------------------------PhysicalAssertNumRows
-------------------------------PhysicalDistribute[DistributionSpecGather]
---------------------------------PhysicalCteConsumer ( cteId=CTEId#1 )
-------------------------PhysicalProject
---------------------------NestedLoopJoin[INNER_JOIN](cast(sales as
DECIMALV3(38, 4)) > avg_sales.average_sales)
+------------------------------hashJoin[INNER_JOIN bucketShuffle]
hashCondition=((catalog_sales.cs_item_sk = item.i_item_sk)) otherCondition=()
build RFs:RF15 i_item_sk->[cs_item_sk,ss_item_sk]
+--------------------------------hashJoin[LEFT_SEMI_JOIN shuffle]
hashCondition=((catalog_sales.cs_item_sk = cross_items.ss_item_sk))
otherCondition=() build RFs:RF14 ss_item_sk->[cs_item_sk]
+----------------------------------PhysicalProject
+------------------------------------hashJoin[INNER_JOIN broadcast]
hashCondition=((catalog_sales.cs_sold_date_sk = date_dim.d_date_sk))
otherCondition=() build RFs:RF13 d_date_sk->[cs_sold_date_sk]
+--------------------------------------PhysicalProject
+----------------------------------------PhysicalOlapScan[catalog_sales] apply
RFs: RF13 RF14 RF15
+--------------------------------------PhysicalProject
+----------------------------------------filter((date_dim.d_moy = 11) and
(date_dim.d_year = 2002))
+------------------------------------------PhysicalOlapScan[date_dim]
+----------------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) apply
RFs: RF15
+--------------------------------PhysicalProject
+----------------------------------PhysicalOlapScan[item]
+--------------------PhysicalAssertNumRows
+----------------------PhysicalDistribute[DistributionSpecGather]
+------------------------PhysicalCteConsumer ( cteId=CTEId#1 )
+----------------PhysicalProject
+------------------NestedLoopJoin[INNER_JOIN](cast(sales as DECIMALV3(38, 4)) >
avg_sales.average_sales)
+--------------------PhysicalProject
+----------------------hashAgg[GLOBAL]
+------------------------PhysicalDistribute[DistributionSpecHash]
+--------------------------hashAgg[LOCAL]
----------------------------PhysicalProject
-------------------------------hashAgg[GLOBAL]
---------------------------------PhysicalDistribute[DistributionSpecHash]
-----------------------------------hashAgg[LOCAL]
-------------------------------------PhysicalProject
---------------------------------------hashJoin[INNER_JOIN bucketShuffle]
hashCondition=((web_sales.ws_item_sk = item.i_item_sk)) otherCondition=() build
RFs:RF18 i_item_sk->[ss_item_sk,ws_item_sk]
-----------------------------------------hashJoin[LEFT_SEMI_JOIN shuffle]
hashCondition=((web_sales.ws_item_sk = cross_items.ss_item_sk))
otherCondition=() build RFs:RF17 ss_item_sk->[ws_item_sk]
-------------------------------------------PhysicalProject
---------------------------------------------hashJoin[INNER_JOIN broadcast]
hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk))
otherCondition=() build RFs:RF16 d_date_sk->[ws_sold_date_sk]
-----------------------------------------------PhysicalProject
-------------------------------------------------PhysicalOlapScan[web_sales]
apply RFs: RF16 RF17 RF18
-----------------------------------------------PhysicalProject
-------------------------------------------------filter((date_dim.d_moy = 11)
and (date_dim.d_year = 2002))
---------------------------------------------------PhysicalOlapScan[date_dim]
-------------------------------------------PhysicalCteConsumer ( cteId=CTEId#0
) apply RFs: RF18
-----------------------------------------PhysicalProject
-------------------------------------------PhysicalOlapScan[item]
-----------------------------PhysicalAssertNumRows
-------------------------------PhysicalDistribute[DistributionSpecGather]
---------------------------------PhysicalCteConsumer ( cteId=CTEId#1 )
+------------------------------hashJoin[INNER_JOIN bucketShuffle]
hashCondition=((web_sales.ws_item_sk = item.i_item_sk)) otherCondition=() build
RFs:RF18 i_item_sk->[ss_item_sk,ws_item_sk]
+--------------------------------hashJoin[LEFT_SEMI_JOIN shuffle]
hashCondition=((web_sales.ws_item_sk = cross_items.ss_item_sk))
otherCondition=() build RFs:RF17 ss_item_sk->[ws_item_sk]
+----------------------------------PhysicalProject
+------------------------------------hashJoin[INNER_JOIN broadcast]
hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk))
otherCondition=() build RFs:RF16 d_date_sk->[ws_sold_date_sk]
+--------------------------------------PhysicalProject
+----------------------------------------PhysicalOlapScan[web_sales] apply RFs:
RF16 RF17 RF18
+--------------------------------------PhysicalProject
+----------------------------------------filter((date_dim.d_moy = 11) and
(date_dim.d_year = 2002))
+------------------------------------------PhysicalOlapScan[date_dim]
+----------------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) apply
RFs: RF18
+--------------------------------PhysicalProject
+----------------------------------PhysicalOlapScan[item]
+--------------------PhysicalAssertNumRows
+----------------------PhysicalDistribute[DistributionSpecGather]
+------------------------PhysicalCteConsumer ( cteId=CTEId#1 )
+------PhysicalResultSink
+--------PhysicalTopN[MERGE_SORT]
+----------PhysicalDistribute[DistributionSpecGather]
+------------PhysicalTopN[LOCAL_SORT]
+--------------PhysicalUnion
+----------------PhysicalProject
+------------------hashAgg[GLOBAL]
+--------------------PhysicalDistribute[DistributionSpecHash]
+----------------------hashAgg[LOCAL]
+------------------------PhysicalRepeat
+--------------------------PhysicalDistribute[DistributionSpecExecutionAny]
+----------------------------PhysicalCteConsumer ( cteId=CTEId#4 )
+----------------PhysicalDistribute[DistributionSpecExecutionAny]
+------------------PhysicalCteConsumer ( cteId=CTEId#4 )
diff --git a/regression-test/data/shape_check/tpcds_sf100/shape/query33.out
b/regression-test/data/shape_check/tpcds_sf100/shape/query33.out
index 514fc6cf682..f5b2b8cb633 100644
--- a/regression-test/data/shape_check/tpcds_sf100/shape/query33.out
+++ b/regression-test/data/shape_check/tpcds_sf100/shape/query33.out
@@ -9,7 +9,10 @@ PhysicalResultSink
------------hashAgg[LOCAL]
--------------PhysicalUnion
----------------PhysicalProject
-------------------hashJoin[LEFT_SEMI_JOIN broadcast]
hashCondition=((item.i_manufact_id = item.i_manufact_id)) otherCondition=()
build RFs:RF3 i_manufact_id->[i_manufact_id]
+------------------hashJoin[RIGHT_SEMI_JOIN shuffleBucket]
hashCondition=((item.i_manufact_id = item.i_manufact_id)) otherCondition=()
build RFs:RF3 i_manufact_id->[i_manufact_id]
+--------------------PhysicalProject
+----------------------filter((item.i_category = 'Home'))
+------------------------PhysicalOlapScan[item] apply RFs: RF3
--------------------hashAgg[GLOBAL]
----------------------PhysicalDistribute[DistributionSpecHash]
------------------------hashAgg[LOCAL]
@@ -28,12 +31,12 @@ PhysicalResultSink
------------------------------------filter((customer_address.ca_gmt_offset =
-5.00))
--------------------------------------PhysicalOlapScan[customer_address]
------------------------------PhysicalProject
---------------------------------PhysicalOlapScan[item] apply RFs: RF3
+--------------------------------PhysicalOlapScan[item]
+----------------PhysicalProject
+------------------hashJoin[RIGHT_SEMI_JOIN shuffleBucket]
hashCondition=((item.i_manufact_id = item.i_manufact_id)) otherCondition=()
build RFs:RF7 i_manufact_id->[i_manufact_id]
--------------------PhysicalProject
----------------------filter((item.i_category = 'Home'))
-------------------------PhysicalOlapScan[item]
-----------------PhysicalProject
-------------------hashJoin[LEFT_SEMI_JOIN broadcast]
hashCondition=((item.i_manufact_id = item.i_manufact_id)) otherCondition=()
build RFs:RF7 i_manufact_id->[i_manufact_id]
+------------------------PhysicalOlapScan[item] apply RFs: RF7
--------------------hashAgg[GLOBAL]
----------------------PhysicalDistribute[DistributionSpecHash]
------------------------hashAgg[LOCAL]
@@ -52,19 +55,19 @@ PhysicalResultSink
------------------------------------filter((customer_address.ca_gmt_offset =
-5.00))
--------------------------------------PhysicalOlapScan[customer_address]
------------------------------PhysicalProject
---------------------------------PhysicalOlapScan[item] apply RFs: RF7
+--------------------------------PhysicalOlapScan[item]
+----------------PhysicalProject
+------------------hashJoin[RIGHT_SEMI_JOIN shuffleBucket]
hashCondition=((item.i_manufact_id = item.i_manufact_id)) otherCondition=()
build RFs:RF11 i_manufact_id->[i_manufact_id]
--------------------PhysicalProject
----------------------filter((item.i_category = 'Home'))
-------------------------PhysicalOlapScan[item]
-----------------PhysicalProject
-------------------hashJoin[LEFT_SEMI_JOIN broadcast]
hashCondition=((item.i_manufact_id = item.i_manufact_id)) otherCondition=()
build RFs:RF11 i_manufact_id->[i_manufact_id]
+------------------------PhysicalOlapScan[item] apply RFs: RF11
--------------------hashAgg[GLOBAL]
----------------------PhysicalDistribute[DistributionSpecHash]
------------------------hashAgg[LOCAL]
--------------------------PhysicalProject
----------------------------hashJoin[INNER_JOIN shuffle]
hashCondition=((web_sales.ws_item_sk = item.i_item_sk)) otherCondition=() build
RFs:RF10 ws_item_sk->[i_item_sk]
------------------------------PhysicalProject
---------------------------------PhysicalOlapScan[item] apply RFs: RF10 RF11
+--------------------------------PhysicalOlapScan[item] apply RFs: RF10
------------------------------PhysicalProject
--------------------------------hashJoin[INNER_JOIN broadcast]
hashCondition=((web_sales.ws_bill_addr_sk = customer_address.ca_address_sk))
otherCondition=() build RFs:RF9 ca_address_sk->[ws_bill_addr_sk]
----------------------------------PhysicalProject
@@ -77,7 +80,4 @@ PhysicalResultSink
----------------------------------PhysicalProject
------------------------------------filter((customer_address.ca_gmt_offset =
-5.00))
--------------------------------------PhysicalOlapScan[customer_address]
---------------------PhysicalProject
-----------------------filter((item.i_category = 'Home'))
-------------------------PhysicalOlapScan[item]
diff --git a/regression-test/data/shape_check/tpcds_sf100/shape/query35.out
b/regression-test/data/shape_check/tpcds_sf100/shape/query35.out
index 9728d7d3070..1f9a0fc6a43 100644
--- a/regression-test/data/shape_check/tpcds_sf100/shape/query35.out
+++ b/regression-test/data/shape_check/tpcds_sf100/shape/query35.out
@@ -10,38 +10,38 @@ PhysicalResultSink
--------------hashAgg[LOCAL]
----------------PhysicalProject
------------------filter(OR[ifnull($c$1, FALSE),ifnull($c$2, FALSE)])
---------------------hashJoin[LEFT_SEMI_JOIN bucketShuffle]
hashCondition=((c.c_customer_sk = catalog_sales.cs_ship_customer_sk))
otherCondition=()
+--------------------hashJoin[RIGHT_SEMI_JOIN shuffleBucket]
hashCondition=((c.c_customer_sk = catalog_sales.cs_ship_customer_sk))
otherCondition=()
+----------------------PhysicalProject
+------------------------hashJoin[INNER_JOIN broadcast]
hashCondition=((catalog_sales.cs_sold_date_sk = date_dim.d_date_sk))
otherCondition=() build RFs:RF5 d_date_sk->[cs_sold_date_sk]
+--------------------------PhysicalProject
+----------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF5
+--------------------------PhysicalProject
+----------------------------filter((date_dim.d_qoy < 4) and (date_dim.d_year =
2001))
+------------------------------PhysicalOlapScan[date_dim]
----------------------hashJoin[LEFT_SEMI_JOIN bucketShuffle]
hashCondition=((c.c_customer_sk = web_sales.ws_bill_customer_sk))
otherCondition=()
-------------------------hashJoin[LEFT_SEMI_JOIN shuffle]
hashCondition=((c.c_customer_sk = store_sales.ss_customer_sk))
otherCondition=() build RFs:RF5 ss_customer_sk->[c_customer_sk]
+------------------------hashJoin[RIGHT_SEMI_JOIN shuffle]
hashCondition=((c.c_customer_sk = store_sales.ss_customer_sk))
otherCondition=() build RFs:RF4 c_customer_sk->[ss_customer_sk]
+--------------------------PhysicalProject
+----------------------------hashJoin[INNER_JOIN broadcast]
hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk))
otherCondition=() build RFs:RF3 d_date_sk->[ss_sold_date_sk]
+------------------------------PhysicalProject
+--------------------------------PhysicalOlapScan[store_sales] apply RFs: RF3
RF4
+------------------------------PhysicalProject
+--------------------------------filter((date_dim.d_qoy < 4) and
(date_dim.d_year = 2001))
+----------------------------------PhysicalOlapScan[date_dim]
--------------------------PhysicalProject
-----------------------------hashJoin[INNER_JOIN shuffle]
hashCondition=((customer_demographics.cd_demo_sk = c.c_current_cdemo_sk))
otherCondition=() build RFs:RF4 cd_demo_sk->[c_current_cdemo_sk]
+----------------------------hashJoin[INNER_JOIN shuffle]
hashCondition=((customer_demographics.cd_demo_sk = c.c_current_cdemo_sk))
otherCondition=() build RFs:RF2 cd_demo_sk->[c_current_cdemo_sk]
------------------------------PhysicalProject
---------------------------------hashJoin[INNER_JOIN shuffle]
hashCondition=((c.c_current_addr_sk = ca.ca_address_sk)) otherCondition=()
build RFs:RF3 ca_address_sk->[c_current_addr_sk]
+--------------------------------hashJoin[INNER_JOIN shuffle]
hashCondition=((c.c_current_addr_sk = ca.ca_address_sk)) otherCondition=()
build RFs:RF1 ca_address_sk->[c_current_addr_sk]
----------------------------------PhysicalProject
-------------------------------------PhysicalOlapScan[customer] apply RFs: RF3
RF4 RF5
+------------------------------------PhysicalOlapScan[customer] apply RFs: RF1
RF2
----------------------------------PhysicalProject
------------------------------------PhysicalOlapScan[customer_address]
------------------------------PhysicalProject
--------------------------------PhysicalOlapScan[customer_demographics]
---------------------------PhysicalProject
-----------------------------hashJoin[INNER_JOIN broadcast]
hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk))
otherCondition=() build RFs:RF2 d_date_sk->[ss_sold_date_sk]
-------------------------------PhysicalProject
---------------------------------PhysicalOlapScan[store_sales] apply RFs: RF2
-------------------------------PhysicalProject
---------------------------------filter((date_dim.d_qoy < 4) and
(date_dim.d_year = 2001))
-----------------------------------PhysicalOlapScan[date_dim]
------------------------PhysicalProject
---------------------------hashJoin[INNER_JOIN broadcast]
hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk))
otherCondition=() build RFs:RF1 d_date_sk->[ws_sold_date_sk]
+--------------------------hashJoin[INNER_JOIN broadcast]
hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk))
otherCondition=() build RFs:RF0 d_date_sk->[ws_sold_date_sk]
----------------------------PhysicalProject
-------------------------------PhysicalOlapScan[web_sales] apply RFs: RF1
+------------------------------PhysicalOlapScan[web_sales] apply RFs: RF0
----------------------------PhysicalProject
------------------------------filter((date_dim.d_qoy < 4) and (date_dim.d_year
= 2001))
--------------------------------PhysicalOlapScan[date_dim]
-----------------------PhysicalProject
-------------------------hashJoin[INNER_JOIN broadcast]
hashCondition=((catalog_sales.cs_sold_date_sk = date_dim.d_date_sk))
otherCondition=() build RFs:RF0 d_date_sk->[cs_sold_date_sk]
---------------------------PhysicalProject
-----------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF0
---------------------------PhysicalProject
-----------------------------filter((date_dim.d_qoy < 4) and (date_dim.d_year =
2001))
-------------------------------PhysicalOlapScan[date_dim]
diff --git a/regression-test/data/shape_check/tpcds_sf1000/hint/query69.out
b/regression-test/data/shape_check/tpcds_sf1000/hint/query69.out
index 31101f12eab..23e2e0cf0e4 100644
--- a/regression-test/data/shape_check/tpcds_sf1000/hint/query69.out
+++ b/regression-test/data/shape_check/tpcds_sf1000/hint/query69.out
@@ -9,39 +9,39 @@ PhysicalResultSink
------------PhysicalDistribute[DistributionSpecHash]
--------------hashAgg[LOCAL]
----------------PhysicalProject
-------------------hashJoin[RIGHT_SEMI_JOIN shuffleBucket]
hashCondition=((c.c_customer_sk = store_sales.ss_customer_sk))
otherCondition=() build RFs:RF6 c_customer_sk->[ss_customer_sk]
+------------------hashJoin[RIGHT_SEMI_JOIN shuffleBucket]
hashCondition=((c.c_customer_sk = store_sales.ss_customer_sk))
otherCondition=() build RFs:RF7 c_customer_sk->[ss_customer_sk]
--------------------PhysicalProject
-----------------------hashJoin[INNER_JOIN broadcast]
hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk))
otherCondition=() build RFs:RF5 d_date_sk->[ss_sold_date_sk]
+----------------------hashJoin[INNER_JOIN broadcast]
hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk))
otherCondition=() build RFs:RF6 d_date_sk->[ss_sold_date_sk]
------------------------PhysicalProject
---------------------------PhysicalOlapScan[store_sales] apply RFs: RF5 RF6
+--------------------------PhysicalOlapScan[store_sales] apply RFs: RF6 RF7
------------------------PhysicalProject
--------------------------filter((date_dim.d_moy <= 3) and (date_dim.d_moy >=
1) and (date_dim.d_year = 2002))
----------------------------PhysicalOlapScan[date_dim]
---------------------hashJoin[RIGHT_ANTI_JOIN shuffle]
hashCondition=((c.c_customer_sk = catalog_sales.cs_ship_customer_sk))
otherCondition=() build RFs:RF4 c_customer_sk->[cs_ship_customer_sk]
+--------------------hashJoin[RIGHT_ANTI_JOIN shuffle]
hashCondition=((c.c_customer_sk = catalog_sales.cs_ship_customer_sk))
otherCondition=() build RFs:RF5 c_customer_sk->[cs_ship_customer_sk]
----------------------PhysicalProject
-------------------------hashJoin[INNER_JOIN broadcast]
hashCondition=((catalog_sales.cs_sold_date_sk = date_dim.d_date_sk))
otherCondition=() build RFs:RF3 d_date_sk->[cs_sold_date_sk]
+------------------------hashJoin[INNER_JOIN broadcast]
hashCondition=((catalog_sales.cs_sold_date_sk = date_dim.d_date_sk))
otherCondition=() build RFs:RF4 d_date_sk->[cs_sold_date_sk]
--------------------------PhysicalProject
-----------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF3 RF4
+----------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF4 RF5
--------------------------PhysicalProject
----------------------------filter((date_dim.d_moy <= 3) and (date_dim.d_moy
>= 1) and (date_dim.d_year = 2002))
------------------------------PhysicalOlapScan[date_dim]
----------------------PhysicalProject
-------------------------hashJoin[INNER_JOIN broadcast]
hashCondition=((customer_demographics.cd_demo_sk = c.c_current_cdemo_sk))
otherCondition=() build RFs:RF2 c_current_cdemo_sk->[cd_demo_sk]
+------------------------hashJoin[INNER_JOIN broadcast]
hashCondition=((customer_demographics.cd_demo_sk = c.c_current_cdemo_sk))
otherCondition=() build RFs:RF3 c_current_cdemo_sk->[cd_demo_sk]
--------------------------PhysicalProject
-----------------------------PhysicalOlapScan[customer_demographics] apply RFs:
RF2
---------------------------hashJoin[LEFT_ANTI_JOIN shuffle]
hashCondition=((c.c_customer_sk = web_sales.ws_bill_customer_sk))
otherCondition=()
+----------------------------PhysicalOlapScan[customer_demographics] apply RFs:
RF3
+--------------------------hashJoin[RIGHT_ANTI_JOIN shuffle]
hashCondition=((c.c_customer_sk = web_sales.ws_bill_customer_sk))
otherCondition=() build RFs:RF2 c_customer_sk->[ws_bill_customer_sk]
----------------------------PhysicalProject
-------------------------------hashJoin[INNER_JOIN broadcast]
hashCondition=((c.c_current_addr_sk = ca.ca_address_sk)) otherCondition=()
build RFs:RF1 ca_address_sk->[c_current_addr_sk]
+------------------------------hashJoin[INNER_JOIN broadcast]
hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk))
otherCondition=() build RFs:RF1 d_date_sk->[ws_sold_date_sk]
--------------------------------PhysicalProject
-----------------------------------PhysicalOlapScan[customer] apply RFs: RF1
+----------------------------------PhysicalOlapScan[web_sales] apply RFs: RF1
RF2
--------------------------------PhysicalProject
-----------------------------------filter(ca_state IN ('IL', 'ME', 'TX'))
-------------------------------------PhysicalOlapScan[customer_address]
+----------------------------------filter((date_dim.d_moy <= 3) and
(date_dim.d_moy >= 1) and (date_dim.d_year = 2002))
+------------------------------------PhysicalOlapScan[date_dim]
----------------------------PhysicalProject
-------------------------------hashJoin[INNER_JOIN broadcast]
hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk))
otherCondition=() build RFs:RF0 d_date_sk->[ws_sold_date_sk]
+------------------------------hashJoin[INNER_JOIN broadcast]
hashCondition=((c.c_current_addr_sk = ca.ca_address_sk)) otherCondition=()
build RFs:RF0 ca_address_sk->[c_current_addr_sk]
--------------------------------PhysicalProject
-----------------------------------PhysicalOlapScan[web_sales] apply RFs: RF0
+----------------------------------PhysicalOlapScan[customer] apply RFs: RF0
--------------------------------PhysicalProject
-----------------------------------filter((date_dim.d_moy <= 3) and
(date_dim.d_moy >= 1) and (date_dim.d_year = 2002))
-------------------------------------PhysicalOlapScan[date_dim]
+----------------------------------filter(ca_state IN ('IL', 'ME', 'TX'))
+------------------------------------PhysicalOlapScan[customer_address]
diff --git a/regression-test/data/shape_check/tpcds_sf1000/shape/query33.out
b/regression-test/data/shape_check/tpcds_sf1000/shape/query33.out
index 1f4f083cdcf..f1cd1e4c777 100644
--- a/regression-test/data/shape_check/tpcds_sf1000/shape/query33.out
+++ b/regression-test/data/shape_check/tpcds_sf1000/shape/query33.out
@@ -9,7 +9,10 @@ PhysicalResultSink
------------hashAgg[LOCAL]
--------------PhysicalUnion
----------------PhysicalProject
-------------------hashJoin[LEFT_SEMI_JOIN broadcast]
hashCondition=((item.i_manufact_id = item.i_manufact_id)) otherCondition=()
build RFs:RF3 i_manufact_id->[i_manufact_id]
+------------------hashJoin[RIGHT_SEMI_JOIN shuffleBucket]
hashCondition=((item.i_manufact_id = item.i_manufact_id)) otherCondition=()
build RFs:RF3 i_manufact_id->[i_manufact_id]
+--------------------PhysicalProject
+----------------------filter((item.i_category = 'Books'))
+------------------------PhysicalOlapScan[item] apply RFs: RF3
--------------------hashAgg[GLOBAL]
----------------------PhysicalDistribute[DistributionSpecHash]
------------------------hashAgg[LOCAL]
@@ -28,12 +31,12 @@ PhysicalResultSink
------------------------------------filter((customer_address.ca_gmt_offset =
-5.00))
--------------------------------------PhysicalOlapScan[customer_address]
------------------------------PhysicalProject
---------------------------------PhysicalOlapScan[item] apply RFs: RF3
+--------------------------------PhysicalOlapScan[item]
+----------------PhysicalProject
+------------------hashJoin[RIGHT_SEMI_JOIN shuffleBucket]
hashCondition=((item.i_manufact_id = item.i_manufact_id)) otherCondition=()
build RFs:RF7 i_manufact_id->[i_manufact_id]
--------------------PhysicalProject
----------------------filter((item.i_category = 'Books'))
-------------------------PhysicalOlapScan[item]
-----------------PhysicalProject
-------------------hashJoin[LEFT_SEMI_JOIN broadcast]
hashCondition=((item.i_manufact_id = item.i_manufact_id)) otherCondition=()
build RFs:RF7 i_manufact_id->[i_manufact_id]
+------------------------PhysicalOlapScan[item] apply RFs: RF7
--------------------hashAgg[GLOBAL]
----------------------PhysicalDistribute[DistributionSpecHash]
------------------------hashAgg[LOCAL]
@@ -52,12 +55,12 @@ PhysicalResultSink
------------------------------------filter((customer_address.ca_gmt_offset =
-5.00))
--------------------------------------PhysicalOlapScan[customer_address]
------------------------------PhysicalProject
---------------------------------PhysicalOlapScan[item] apply RFs: RF7
+--------------------------------PhysicalOlapScan[item]
+----------------PhysicalProject
+------------------hashJoin[RIGHT_SEMI_JOIN shuffleBucket]
hashCondition=((item.i_manufact_id = item.i_manufact_id)) otherCondition=()
build RFs:RF11 i_manufact_id->[i_manufact_id]
--------------------PhysicalProject
----------------------filter((item.i_category = 'Books'))
-------------------------PhysicalOlapScan[item]
-----------------PhysicalProject
-------------------hashJoin[LEFT_SEMI_JOIN broadcast]
hashCondition=((item.i_manufact_id = item.i_manufact_id)) otherCondition=()
build RFs:RF11 i_manufact_id->[i_manufact_id]
+------------------------PhysicalOlapScan[item] apply RFs: RF11
--------------------hashAgg[GLOBAL]
----------------------PhysicalDistribute[DistributionSpecHash]
------------------------hashAgg[LOCAL]
@@ -76,8 +79,5 @@ PhysicalResultSink
------------------------------------filter((customer_address.ca_gmt_offset =
-5.00))
--------------------------------------PhysicalOlapScan[customer_address]
------------------------------PhysicalProject
---------------------------------PhysicalOlapScan[item] apply RFs: RF11
---------------------PhysicalProject
-----------------------filter((item.i_category = 'Books'))
-------------------------PhysicalOlapScan[item]
+--------------------------------PhysicalOlapScan[item]
diff --git a/regression-test/data/shape_check/tpcds_sf1000/shape/query35.out
b/regression-test/data/shape_check/tpcds_sf1000/shape/query35.out
index 2f4be8c2912..fc317d15ce7 100644
--- a/regression-test/data/shape_check/tpcds_sf1000/shape/query35.out
+++ b/regression-test/data/shape_check/tpcds_sf1000/shape/query35.out
@@ -10,38 +10,38 @@ PhysicalResultSink
--------------hashAgg[LOCAL]
----------------PhysicalProject
------------------filter(OR[ifnull($c$1, FALSE),ifnull($c$2, FALSE)])
---------------------hashJoin[LEFT_SEMI_JOIN bucketShuffle]
hashCondition=((c.c_customer_sk = catalog_sales.cs_ship_customer_sk))
otherCondition=()
+--------------------hashJoin[RIGHT_SEMI_JOIN shuffleBucket]
hashCondition=((c.c_customer_sk = catalog_sales.cs_ship_customer_sk))
otherCondition=()
----------------------PhysicalProject
-------------------------hashJoin[INNER_JOIN broadcast]
hashCondition=((customer_demographics.cd_demo_sk = c.c_current_cdemo_sk))
otherCondition=() build RFs:RF5 cd_demo_sk->[c_current_cdemo_sk]
---------------------------hashJoin[LEFT_SEMI_JOIN bucketShuffle]
hashCondition=((c.c_customer_sk = web_sales.ws_bill_customer_sk))
otherCondition=()
-----------------------------hashJoin[RIGHT_SEMI_JOIN shuffle]
hashCondition=((c.c_customer_sk = store_sales.ss_customer_sk))
otherCondition=() build RFs:RF4 c_customer_sk->[ss_customer_sk]
+------------------------hashJoin[INNER_JOIN broadcast]
hashCondition=((catalog_sales.cs_sold_date_sk = date_dim.d_date_sk))
otherCondition=() build RFs:RF5 d_date_sk->[cs_sold_date_sk]
+--------------------------PhysicalProject
+----------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF5
+--------------------------PhysicalProject
+----------------------------filter((date_dim.d_qoy < 4) and (date_dim.d_year =
1999))
+------------------------------PhysicalOlapScan[date_dim]
+----------------------PhysicalProject
+------------------------hashJoin[INNER_JOIN broadcast]
hashCondition=((customer_demographics.cd_demo_sk = c.c_current_cdemo_sk))
otherCondition=() build RFs:RF4 cd_demo_sk->[c_current_cdemo_sk]
+--------------------------hashJoin[RIGHT_SEMI_JOIN shuffleBucket]
hashCondition=((c.c_customer_sk = web_sales.ws_bill_customer_sk))
otherCondition=()
+----------------------------PhysicalProject
+------------------------------hashJoin[INNER_JOIN broadcast]
hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk))
otherCondition=() build RFs:RF3 d_date_sk->[ws_sold_date_sk]
+--------------------------------PhysicalProject
+----------------------------------PhysicalOlapScan[web_sales] apply RFs: RF3
+--------------------------------PhysicalProject
+----------------------------------filter((date_dim.d_qoy < 4) and
(date_dim.d_year = 1999))
+------------------------------------PhysicalOlapScan[date_dim]
+----------------------------hashJoin[RIGHT_SEMI_JOIN shuffle]
hashCondition=((c.c_customer_sk = store_sales.ss_customer_sk))
otherCondition=() build RFs:RF2 c_customer_sk->[ss_customer_sk]
------------------------------PhysicalProject
---------------------------------hashJoin[INNER_JOIN broadcast]
hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk))
otherCondition=() build RFs:RF3 d_date_sk->[ss_sold_date_sk]
+--------------------------------hashJoin[INNER_JOIN broadcast]
hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk))
otherCondition=() build RFs:RF1 d_date_sk->[ss_sold_date_sk]
----------------------------------PhysicalProject
-------------------------------------PhysicalOlapScan[store_sales] apply RFs:
RF3 RF4
+------------------------------------PhysicalOlapScan[store_sales] apply RFs:
RF1 RF2
----------------------------------PhysicalProject
------------------------------------filter((date_dim.d_qoy < 4) and
(date_dim.d_year = 1999))
--------------------------------------PhysicalOlapScan[date_dim]
------------------------------PhysicalProject
---------------------------------hashJoin[INNER_JOIN shuffle]
hashCondition=((c.c_current_addr_sk = ca.ca_address_sk)) otherCondition=()
build RFs:RF2 ca_address_sk->[c_current_addr_sk]
+--------------------------------hashJoin[INNER_JOIN shuffle]
hashCondition=((c.c_current_addr_sk = ca.ca_address_sk)) otherCondition=()
build RFs:RF0 ca_address_sk->[c_current_addr_sk]
----------------------------------PhysicalProject
-------------------------------------PhysicalOlapScan[customer] apply RFs: RF2
RF5
+------------------------------------PhysicalOlapScan[customer] apply RFs: RF0
RF4
----------------------------------PhysicalProject
------------------------------------PhysicalOlapScan[customer_address]
-----------------------------PhysicalProject
-------------------------------hashJoin[INNER_JOIN broadcast]
hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk))
otherCondition=() build RFs:RF1 d_date_sk->[ws_sold_date_sk]
---------------------------------PhysicalProject
-----------------------------------PhysicalOlapScan[web_sales] apply RFs: RF1
---------------------------------PhysicalProject
-----------------------------------filter((date_dim.d_qoy < 4) and
(date_dim.d_year = 1999))
-------------------------------------PhysicalOlapScan[date_dim]
--------------------------PhysicalProject
----------------------------PhysicalOlapScan[customer_demographics]
-----------------------PhysicalProject
-------------------------hashJoin[INNER_JOIN broadcast]
hashCondition=((catalog_sales.cs_sold_date_sk = date_dim.d_date_sk))
otherCondition=() build RFs:RF0 d_date_sk->[cs_sold_date_sk]
---------------------------PhysicalProject
-----------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF0
---------------------------PhysicalProject
-----------------------------filter((date_dim.d_qoy < 4) and (date_dim.d_year =
1999))
-------------------------------PhysicalOlapScan[date_dim]
diff --git a/regression-test/data/shape_check/tpcds_sf1000/shape/query69.out
b/regression-test/data/shape_check/tpcds_sf1000/shape/query69.out
index 31101f12eab..23e2e0cf0e4 100644
--- a/regression-test/data/shape_check/tpcds_sf1000/shape/query69.out
+++ b/regression-test/data/shape_check/tpcds_sf1000/shape/query69.out
@@ -9,39 +9,39 @@ PhysicalResultSink
------------PhysicalDistribute[DistributionSpecHash]
--------------hashAgg[LOCAL]
----------------PhysicalProject
-------------------hashJoin[RIGHT_SEMI_JOIN shuffleBucket]
hashCondition=((c.c_customer_sk = store_sales.ss_customer_sk))
otherCondition=() build RFs:RF6 c_customer_sk->[ss_customer_sk]
+------------------hashJoin[RIGHT_SEMI_JOIN shuffleBucket]
hashCondition=((c.c_customer_sk = store_sales.ss_customer_sk))
otherCondition=() build RFs:RF7 c_customer_sk->[ss_customer_sk]
--------------------PhysicalProject
-----------------------hashJoin[INNER_JOIN broadcast]
hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk))
otherCondition=() build RFs:RF5 d_date_sk->[ss_sold_date_sk]
+----------------------hashJoin[INNER_JOIN broadcast]
hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk))
otherCondition=() build RFs:RF6 d_date_sk->[ss_sold_date_sk]
------------------------PhysicalProject
---------------------------PhysicalOlapScan[store_sales] apply RFs: RF5 RF6
+--------------------------PhysicalOlapScan[store_sales] apply RFs: RF6 RF7
------------------------PhysicalProject
--------------------------filter((date_dim.d_moy <= 3) and (date_dim.d_moy >=
1) and (date_dim.d_year = 2002))
----------------------------PhysicalOlapScan[date_dim]
---------------------hashJoin[RIGHT_ANTI_JOIN shuffle]
hashCondition=((c.c_customer_sk = catalog_sales.cs_ship_customer_sk))
otherCondition=() build RFs:RF4 c_customer_sk->[cs_ship_customer_sk]
+--------------------hashJoin[RIGHT_ANTI_JOIN shuffle]
hashCondition=((c.c_customer_sk = catalog_sales.cs_ship_customer_sk))
otherCondition=() build RFs:RF5 c_customer_sk->[cs_ship_customer_sk]
----------------------PhysicalProject
-------------------------hashJoin[INNER_JOIN broadcast]
hashCondition=((catalog_sales.cs_sold_date_sk = date_dim.d_date_sk))
otherCondition=() build RFs:RF3 d_date_sk->[cs_sold_date_sk]
+------------------------hashJoin[INNER_JOIN broadcast]
hashCondition=((catalog_sales.cs_sold_date_sk = date_dim.d_date_sk))
otherCondition=() build RFs:RF4 d_date_sk->[cs_sold_date_sk]
--------------------------PhysicalProject
-----------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF3 RF4
+----------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF4 RF5
--------------------------PhysicalProject
----------------------------filter((date_dim.d_moy <= 3) and (date_dim.d_moy
>= 1) and (date_dim.d_year = 2002))
------------------------------PhysicalOlapScan[date_dim]
----------------------PhysicalProject
-------------------------hashJoin[INNER_JOIN broadcast]
hashCondition=((customer_demographics.cd_demo_sk = c.c_current_cdemo_sk))
otherCondition=() build RFs:RF2 c_current_cdemo_sk->[cd_demo_sk]
+------------------------hashJoin[INNER_JOIN broadcast]
hashCondition=((customer_demographics.cd_demo_sk = c.c_current_cdemo_sk))
otherCondition=() build RFs:RF3 c_current_cdemo_sk->[cd_demo_sk]
--------------------------PhysicalProject
-----------------------------PhysicalOlapScan[customer_demographics] apply RFs:
RF2
---------------------------hashJoin[LEFT_ANTI_JOIN shuffle]
hashCondition=((c.c_customer_sk = web_sales.ws_bill_customer_sk))
otherCondition=()
+----------------------------PhysicalOlapScan[customer_demographics] apply RFs:
RF3
+--------------------------hashJoin[RIGHT_ANTI_JOIN shuffle]
hashCondition=((c.c_customer_sk = web_sales.ws_bill_customer_sk))
otherCondition=() build RFs:RF2 c_customer_sk->[ws_bill_customer_sk]
----------------------------PhysicalProject
-------------------------------hashJoin[INNER_JOIN broadcast]
hashCondition=((c.c_current_addr_sk = ca.ca_address_sk)) otherCondition=()
build RFs:RF1 ca_address_sk->[c_current_addr_sk]
+------------------------------hashJoin[INNER_JOIN broadcast]
hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk))
otherCondition=() build RFs:RF1 d_date_sk->[ws_sold_date_sk]
--------------------------------PhysicalProject
-----------------------------------PhysicalOlapScan[customer] apply RFs: RF1
+----------------------------------PhysicalOlapScan[web_sales] apply RFs: RF1
RF2
--------------------------------PhysicalProject
-----------------------------------filter(ca_state IN ('IL', 'ME', 'TX'))
-------------------------------------PhysicalOlapScan[customer_address]
+----------------------------------filter((date_dim.d_moy <= 3) and
(date_dim.d_moy >= 1) and (date_dim.d_year = 2002))
+------------------------------------PhysicalOlapScan[date_dim]
----------------------------PhysicalProject
-------------------------------hashJoin[INNER_JOIN broadcast]
hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk))
otherCondition=() build RFs:RF0 d_date_sk->[ws_sold_date_sk]
+------------------------------hashJoin[INNER_JOIN broadcast]
hashCondition=((c.c_current_addr_sk = ca.ca_address_sk)) otherCondition=()
build RFs:RF0 ca_address_sk->[c_current_addr_sk]
--------------------------------PhysicalProject
-----------------------------------PhysicalOlapScan[web_sales] apply RFs: RF0
+----------------------------------PhysicalOlapScan[customer] apply RFs: RF0
--------------------------------PhysicalProject
-----------------------------------filter((date_dim.d_moy <= 3) and
(date_dim.d_moy >= 1) and (date_dim.d_year = 2002))
-------------------------------------PhysicalOlapScan[date_dim]
+----------------------------------filter(ca_state IN ('IL', 'ME', 'TX'))
+------------------------------------PhysicalOlapScan[customer_address]
diff --git
a/regression-test/data/shape_check/tpch_sf1000/check_point/probeShortcutFactor.out
b/regression-test/data/shape_check/tpch_sf1000/check_point/probeShortcutFactor.out
new file mode 100644
index 00000000000..c11e94e4777
--- /dev/null
+++
b/regression-test/data/shape_check/tpch_sf1000/check_point/probeShortcutFactor.out
@@ -0,0 +1,10 @@
+-- This file is automatically generated. You should know what you did if you
want to edit this
+-- !select --
+PhysicalResultSink
+--hashJoin[RIGHT_ANTI_JOIN shuffle] hashCondition=((orders.o_custkey =
customer.c_custkey)) otherCondition=() build RFs:RF0 c_custkey->[o_custkey]
+----PhysicalProject
+------PhysicalOlapScan[orders] apply RFs: RF0
+----PhysicalProject
+------filter(substring(c_phone, 1, 2) IN ('13', '17', '18', '23', '29', '30',
'31'))
+--------PhysicalOlapScan[customer]
+
diff --git a/regression-test/data/shape_check/tpch_sf1000/rf_prune/q22.out
b/regression-test/data/shape_check/tpch_sf1000/rf_prune/q22.out
index 69a80f708f9..5f75b319bf0 100644
--- a/regression-test/data/shape_check/tpch_sf1000/rf_prune/q22.out
+++ b/regression-test/data/shape_check/tpch_sf1000/rf_prune/q22.out
@@ -8,18 +8,18 @@ PhysicalResultSink
----------PhysicalDistribute[DistributionSpecHash]
------------hashAgg[LOCAL]
--------------PhysicalProject
-----------------NestedLoopJoin[INNER_JOIN](cast(c_acctbal as DECIMALV3(38, 4))
> avg(c_acctbal))
+----------------hashJoin[RIGHT_ANTI_JOIN shuffle]
hashCondition=((orders.o_custkey = customer.c_custkey)) otherCondition=() build
RFs:RF0 c_custkey->[o_custkey]
------------------PhysicalProject
---------------------hashJoin[LEFT_ANTI_JOIN bucketShuffle]
hashCondition=((orders.o_custkey = customer.c_custkey)) otherCondition=()
+--------------------PhysicalOlapScan[orders] apply RFs: RF0
+------------------PhysicalProject
+--------------------NestedLoopJoin[INNER_JOIN](cast(c_acctbal as DECIMALV3(38,
4)) > avg(c_acctbal))
----------------------PhysicalProject
------------------------filter(substring(c_phone, 1, 2) IN ('13', '17', '18',
'23', '29', '30', '31'))
--------------------------PhysicalOlapScan[customer]
-----------------------PhysicalProject
-------------------------PhysicalOlapScan[orders]
-------------------hashAgg[GLOBAL]
---------------------PhysicalDistribute[DistributionSpecGather]
-----------------------hashAgg[LOCAL]
-------------------------PhysicalProject
---------------------------filter((customer.c_acctbal > 0.00) and
substring(c_phone, 1, 2) IN ('13', '17', '18', '23', '29', '30', '31'))
-----------------------------PhysicalOlapScan[customer]
+----------------------hashAgg[GLOBAL]
+------------------------PhysicalDistribute[DistributionSpecGather]
+--------------------------hashAgg[LOCAL]
+----------------------------PhysicalProject
+------------------------------filter((customer.c_acctbal > 0.00) and
substring(c_phone, 1, 2) IN ('13', '17', '18', '23', '29', '30', '31'))
+--------------------------------PhysicalOlapScan[customer]
diff --git a/regression-test/data/shape_check/tpch_sf1000/shape/q22.out
b/regression-test/data/shape_check/tpch_sf1000/shape/q22.out
index 69a80f708f9..5f75b319bf0 100644
--- a/regression-test/data/shape_check/tpch_sf1000/shape/q22.out
+++ b/regression-test/data/shape_check/tpch_sf1000/shape/q22.out
@@ -8,18 +8,18 @@ PhysicalResultSink
----------PhysicalDistribute[DistributionSpecHash]
------------hashAgg[LOCAL]
--------------PhysicalProject
-----------------NestedLoopJoin[INNER_JOIN](cast(c_acctbal as DECIMALV3(38, 4))
> avg(c_acctbal))
+----------------hashJoin[RIGHT_ANTI_JOIN shuffle]
hashCondition=((orders.o_custkey = customer.c_custkey)) otherCondition=() build
RFs:RF0 c_custkey->[o_custkey]
------------------PhysicalProject
---------------------hashJoin[LEFT_ANTI_JOIN bucketShuffle]
hashCondition=((orders.o_custkey = customer.c_custkey)) otherCondition=()
+--------------------PhysicalOlapScan[orders] apply RFs: RF0
+------------------PhysicalProject
+--------------------NestedLoopJoin[INNER_JOIN](cast(c_acctbal as DECIMALV3(38,
4)) > avg(c_acctbal))
----------------------PhysicalProject
------------------------filter(substring(c_phone, 1, 2) IN ('13', '17', '18',
'23', '29', '30', '31'))
--------------------------PhysicalOlapScan[customer]
-----------------------PhysicalProject
-------------------------PhysicalOlapScan[orders]
-------------------hashAgg[GLOBAL]
---------------------PhysicalDistribute[DistributionSpecGather]
-----------------------hashAgg[LOCAL]
-------------------------PhysicalProject
---------------------------filter((customer.c_acctbal > 0.00) and
substring(c_phone, 1, 2) IN ('13', '17', '18', '23', '29', '30', '31'))
-----------------------------PhysicalOlapScan[customer]
+----------------------hashAgg[GLOBAL]
+------------------------PhysicalDistribute[DistributionSpecGather]
+--------------------------hashAgg[LOCAL]
+----------------------------PhysicalProject
+------------------------------filter((customer.c_acctbal > 0.00) and
substring(c_phone, 1, 2) IN ('13', '17', '18', '23', '29', '30', '31'))
+--------------------------------PhysicalOlapScan[customer]
diff --git
a/regression-test/suites/shape_check/tpch_sf1000/check_point/probeShortcutFactor.groovy
b/regression-test/suites/shape_check/tpch_sf1000/check_point/probeShortcutFactor.groovy
new file mode 100644
index 00000000000..5f140c93bb3
--- /dev/null
+++
b/regression-test/suites/shape_check/tpch_sf1000/check_point/probeShortcutFactor.groovy
@@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+suite("probeShortcutFactor") {
+ String db = context.config.getDbNameByFile(new File(context.file.parent))
+ if (isCloudMode()) {
+ return
+ }
+ sql "use ${db}"
+ sql 'set enable_nereids_planner=true'
+ sql 'set enable_nereids_distribute_planner=false'
+ sql 'set enable_fallback_to_original_planner=false'
+ sql 'set exec_mem_limit=21G'
+ sql 'SET enable_pipeline_engine = true'
+ sql 'set parallel_pipeline_task_num=8'
+ sql 'set be_number_for_test=3'
+ sql "set runtime_filter_type=8"
+ sql 'set enable_runtime_filter_prune=false'
+ sql "set disable_nereids_rules=PRUNE_EMPTY_PARTITION"
+
+ // should use right anti join, not left anti join
+ qt_select """
+ explain shape plan
+ select c_custkey, c_phone, c_acctbal
+ from customer left anti join orders on o_custkey=c_custkey
+ where substring(c_phone, 1, 2) IN ('13', '31', '23', '29', '30', '18',
'17') ;
+ """
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]