This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch branch-4.1
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-4.1 by this push:
     new 3ebe05ea90b branch-4.1 [opt](nereids) adjust Left join cost 
factor:probeShortcutFactor #60183 (#61475)
3ebe05ea90b is described below

commit 3ebe05ea90b0d902fb77094ba1844470e323f2f9
Author: minghong <[email protected]>
AuthorDate: Thu Mar 19 10:49:03 2026 +0800

    branch-4.1 [opt](nereids) adjust Left join cost factor:probeShortcutFactor 
#60183 (#61475)
    
    ### What problem does this PR solve?
    pick #60183
    Issue Number: close #xxx
    
    Related PR: #xxx
    
    Problem Summary:
    
    ### Release note
    
    None
    
    ### Check List (For Author)
    
    - Test <!-- At least one of them must be included. -->
        - [ ] Regression test
        - [ ] Unit Test
        - [ ] Manual test (add detailed scripts or steps below)
        - [ ] No need to test or manual test. Explain why:
    - [ ] This is a refactor/code format and no logic has been changed.
            - [ ] Previous test can cover this change.
            - [ ] No code files have been changed.
            - [ ] Other reason <!-- Add your reason?  -->
    
    - Behavior changed:
        - [ ] No.
        - [ ] Yes. <!-- Explain the behavior change -->
    
    - Does this need documentation?
        - [ ] No.
    - [ ] Yes. <!-- Add document PR link here. eg:
    https://github.com/apache/doris-website/pull/1214 -->
    
    ### Check List (For Reviewer who merge this PR)
    
    - [ ] Confirm the release note
    - [ ] Confirm test cases
    - [ ] Confirm document
    - [ ] Add branch pick label <!-- Add branch pick label that this PR
    should merge into -->
---
 .../org/apache/doris/nereids/cost/CostModel.java   |   3 +-
 .../data/shape_check/clickbench/query10.out        |   6 +-
 .../tpcds_sf100/constraints/query23.out            |  12 +-
 .../shape_check/tpcds_sf100/rf_prune/query14.out   | 156 +++++++++++----------
 .../shape_check/tpcds_sf100/rf_prune/query33.out   |  34 ++---
 .../shape_check/tpcds_sf100/rf_prune/query35.out   |  38 ++---
 .../data/shape_check/tpcds_sf100/shape/query14.out | 156 +++++++++++----------
 .../data/shape_check/tpcds_sf100/shape/query33.out |  26 ++--
 .../data/shape_check/tpcds_sf100/shape/query35.out |  42 +++---
 .../data/shape_check/tpcds_sf1000/hint/query69.out |  34 ++---
 .../shape_check/tpcds_sf1000/shape/query33.out     |  26 ++--
 .../shape_check/tpcds_sf1000/shape/query35.out     |  44 +++---
 .../shape_check/tpcds_sf1000/shape/query69.out     |  34 ++---
 .../check_point/probeShortcutFactor.out            |  10 ++
 .../data/shape_check/tpch_sf1000/rf_prune/q22.out  |  20 +--
 .../data/shape_check/tpch_sf1000/shape/q22.out     |  20 +--
 .../check_point/probeShortcutFactor.groovy         |  44 ++++++
 17 files changed, 390 insertions(+), 315 deletions(-)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/cost/CostModel.java 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/cost/CostModel.java
index b113a3d1cc6..e942e71d064 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/cost/CostModel.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/cost/CostModel.java
@@ -453,7 +453,8 @@ class CostModel extends PlanVisitor<Cost, PlanContext> {
             );
         }
         double probeShortcutFactor = 1.0;
-        if (ConnectContext.get() != null && 
ConnectContext.get().getStatementContext() != null
+        if (rightRowCount < 10 * leftRowCount
+                && ConnectContext.get() != null && 
ConnectContext.get().getStatementContext() != null
                 && 
!ConnectContext.get().getStatementContext().isHasUnknownColStats()
                 && physicalHashJoin.getJoinType().isLeftSemiOrAntiJoin()
                 && physicalHashJoin.getOtherJoinConjuncts().isEmpty()
diff --git a/regression-test/data/shape_check/clickbench/query10.out 
b/regression-test/data/shape_check/clickbench/query10.out
index ae9174ce1c1..36122c8cbf0 100644
--- a/regression-test/data/shape_check/clickbench/query10.out
+++ b/regression-test/data/shape_check/clickbench/query10.out
@@ -5,9 +5,9 @@ PhysicalResultSink
 ----PhysicalDistribute[DistributionSpecGather]
 ------PhysicalTopN[LOCAL_SORT]
 --------hashAgg[DISTINCT_GLOBAL]
-----------PhysicalDistribute[DistributionSpecHash]
-------------hashAgg[DISTINCT_LOCAL]
---------------hashAgg[GLOBAL]
+----------hashAgg[GLOBAL]
+------------PhysicalDistribute[DistributionSpecHash]
+--------------hashAgg[LOCAL]
 ----------------PhysicalProject
 ------------------PhysicalOlapScan[hits]
 
diff --git 
a/regression-test/data/shape_check/tpcds_sf100/constraints/query23.out 
b/regression-test/data/shape_check/tpcds_sf100/constraints/query23.out
index 9aac34d3e02..17c6c0e8b7a 100644
--- a/regression-test/data/shape_check/tpcds_sf100/constraints/query23.out
+++ b/regression-test/data/shape_check/tpcds_sf100/constraints/query23.out
@@ -53,29 +53,29 @@ PhysicalCteAnchor ( cteId=CTEId#0 )
 --------------hashAgg[LOCAL]
 ----------------PhysicalUnion
 ------------------PhysicalProject
---------------------hashJoin[LEFT_SEMI_JOIN shuffle] 
hashCondition=((catalog_sales.cs_item_sk = frequent_ss_items.item_sk)) 
otherCondition=() build RFs:RF5 item_sk->[cs_item_sk]
+--------------------hashJoin[RIGHT_SEMI_JOIN shuffle] 
hashCondition=((catalog_sales.cs_item_sk = frequent_ss_items.item_sk)) 
otherCondition=() build RFs:RF5 cs_item_sk->[item_sk]
+----------------------PhysicalCteConsumer ( cteId=CTEId#0 ) apply RFs: RF5
 ----------------------PhysicalProject
 ------------------------hashJoin[LEFT_SEMI_JOIN broadcast] 
hashCondition=((catalog_sales.cs_bill_customer_sk = 
best_ss_customer.c_customer_sk)) otherCondition=() build RFs:RF4 
c_customer_sk->[cs_bill_customer_sk]
 --------------------------PhysicalProject
 ----------------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((catalog_sales.cs_sold_date_sk = date_dim.d_date_sk)) 
otherCondition=() build RFs:RF3 d_date_sk->[cs_sold_date_sk]
 ------------------------------PhysicalProject
---------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF3 
RF4 RF5
+--------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF3 
RF4
 ------------------------------PhysicalProject
 --------------------------------filter((date_dim.d_moy = 5) and 
(date_dim.d_year = 2000))
 ----------------------------------PhysicalOlapScan[date_dim]
 --------------------------PhysicalCteConsumer ( cteId=CTEId#2 )
-----------------------PhysicalCteConsumer ( cteId=CTEId#0 )
 ------------------PhysicalProject
---------------------hashJoin[LEFT_SEMI_JOIN shuffle] 
hashCondition=((web_sales.ws_item_sk = frequent_ss_items.item_sk)) 
otherCondition=() build RFs:RF8 item_sk->[ws_item_sk]
+--------------------hashJoin[RIGHT_SEMI_JOIN shuffle] 
hashCondition=((web_sales.ws_item_sk = frequent_ss_items.item_sk)) 
otherCondition=() build RFs:RF8 ws_item_sk->[item_sk]
+----------------------PhysicalCteConsumer ( cteId=CTEId#0 ) apply RFs: RF8
 ----------------------PhysicalProject
 ------------------------hashJoin[LEFT_SEMI_JOIN broadcast] 
hashCondition=((web_sales.ws_bill_customer_sk = 
best_ss_customer.c_customer_sk)) otherCondition=() build RFs:RF7 
c_customer_sk->[ws_bill_customer_sk]
 --------------------------PhysicalProject
 ----------------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) 
otherCondition=() build RFs:RF6 d_date_sk->[ws_sold_date_sk]
 ------------------------------PhysicalProject
---------------------------------PhysicalOlapScan[web_sales] apply RFs: RF6 RF7 
RF8
+--------------------------------PhysicalOlapScan[web_sales] apply RFs: RF6 RF7
 ------------------------------PhysicalProject
 --------------------------------filter((date_dim.d_moy = 5) and 
(date_dim.d_year = 2000))
 ----------------------------------PhysicalOlapScan[date_dim]
 --------------------------PhysicalCteConsumer ( cteId=CTEId#2 )
-----------------------PhysicalCteConsumer ( cteId=CTEId#0 )
 
diff --git a/regression-test/data/shape_check/tpcds_sf100/rf_prune/query14.out 
b/regression-test/data/shape_check/tpcds_sf100/rf_prune/query14.out
index 32fdf4de9c3..2a369f53078 100644
--- a/regression-test/data/shape_check/tpcds_sf100/rf_prune/query14.out
+++ b/regression-test/data/shape_check/tpcds_sf100/rf_prune/query14.out
@@ -69,80 +69,90 @@ PhysicalCteAnchor ( cteId=CTEId#0 )
 ----------------PhysicalProject
 ------------------filter((date_dim.d_year <= 2002) and (date_dim.d_year >= 
2000))
 --------------------PhysicalOlapScan[date_dim]
-----PhysicalResultSink
-------PhysicalTopN[MERGE_SORT]
---------PhysicalDistribute[DistributionSpecGather]
-----------PhysicalTopN[LOCAL_SORT]
-------------PhysicalProject
---------------hashAgg[GLOBAL]
-----------------PhysicalDistribute[DistributionSpecHash]
-------------------hashAgg[LOCAL]
---------------------PhysicalRepeat
-----------------------PhysicalUnion
-------------------------PhysicalProject
---------------------------NestedLoopJoin[INNER_JOIN](cast(sales as 
DECIMALV3(38, 4)) > avg_sales.average_sales)
+----PhysicalCteAnchor ( cteId=CTEId#4 )
+------PhysicalCteProducer ( cteId=CTEId#4 )
+--------hashAgg[GLOBAL]
+----------PhysicalDistribute[DistributionSpecHash]
+------------hashAgg[LOCAL]
+--------------PhysicalUnion
+----------------PhysicalProject
+------------------NestedLoopJoin[INNER_JOIN](cast(sales as DECIMALV3(38, 4)) > 
avg_sales.average_sales)
+--------------------PhysicalProject
+----------------------hashAgg[GLOBAL]
+------------------------PhysicalDistribute[DistributionSpecHash]
+--------------------------hashAgg[LOCAL]
 ----------------------------PhysicalProject
-------------------------------hashAgg[GLOBAL]
---------------------------------PhysicalDistribute[DistributionSpecHash]
-----------------------------------hashAgg[LOCAL]
-------------------------------------PhysicalProject
---------------------------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=()
-----------------------------------------hashJoin[LEFT_SEMI_JOIN broadcast] 
hashCondition=((store_sales.ss_item_sk = cross_items.ss_item_sk)) 
otherCondition=()
-------------------------------------------PhysicalProject
---------------------------------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) 
otherCondition=() build RFs:RF10 d_date_sk->[ss_sold_date_sk]
-----------------------------------------------PhysicalProject
-------------------------------------------------PhysicalOlapScan[store_sales] 
apply RFs: RF10
-----------------------------------------------PhysicalProject
-------------------------------------------------filter((date_dim.d_moy = 11) 
and (date_dim.d_year = 2002))
---------------------------------------------------PhysicalOlapScan[date_dim]
-------------------------------------------PhysicalCteConsumer ( cteId=CTEId#0 )
-----------------------------------------PhysicalProject
-------------------------------------------PhysicalOlapScan[item]
-----------------------------PhysicalAssertNumRows
-------------------------------PhysicalDistribute[DistributionSpecGather]
---------------------------------PhysicalCteConsumer ( cteId=CTEId#1 )
-------------------------PhysicalProject
---------------------------NestedLoopJoin[INNER_JOIN](cast(sales as 
DECIMALV3(38, 4)) > avg_sales.average_sales)
+------------------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=()
+--------------------------------hashJoin[LEFT_SEMI_JOIN broadcast] 
hashCondition=((store_sales.ss_item_sk = cross_items.ss_item_sk)) 
otherCondition=()
+----------------------------------PhysicalProject
+------------------------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) 
otherCondition=() build RFs:RF10 d_date_sk->[ss_sold_date_sk]
+--------------------------------------PhysicalProject
+----------------------------------------PhysicalOlapScan[store_sales] apply 
RFs: RF10
+--------------------------------------PhysicalProject
+----------------------------------------filter((date_dim.d_moy = 11) and 
(date_dim.d_year = 2002))
+------------------------------------------PhysicalOlapScan[date_dim]
+----------------------------------PhysicalCteConsumer ( cteId=CTEId#0 )
+--------------------------------PhysicalProject
+----------------------------------PhysicalOlapScan[item]
+--------------------PhysicalAssertNumRows
+----------------------PhysicalDistribute[DistributionSpecGather]
+------------------------PhysicalCteConsumer ( cteId=CTEId#1 )
+----------------PhysicalProject
+------------------NestedLoopJoin[INNER_JOIN](cast(sales as DECIMALV3(38, 4)) > 
avg_sales.average_sales)
+--------------------PhysicalProject
+----------------------hashAgg[GLOBAL]
+------------------------PhysicalDistribute[DistributionSpecHash]
+--------------------------hashAgg[LOCAL]
 ----------------------------PhysicalProject
-------------------------------hashAgg[GLOBAL]
---------------------------------PhysicalDistribute[DistributionSpecHash]
-----------------------------------hashAgg[LOCAL]
-------------------------------------PhysicalProject
---------------------------------------hashJoin[INNER_JOIN bucketShuffle] 
hashCondition=((catalog_sales.cs_item_sk = item.i_item_sk)) otherCondition=()
-----------------------------------------hashJoin[LEFT_SEMI_JOIN shuffle] 
hashCondition=((catalog_sales.cs_item_sk = cross_items.ss_item_sk)) 
otherCondition=()
-------------------------------------------PhysicalProject
---------------------------------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((catalog_sales.cs_sold_date_sk = date_dim.d_date_sk)) 
otherCondition=() build RFs:RF13 d_date_sk->[cs_sold_date_sk]
-----------------------------------------------PhysicalProject
-------------------------------------------------PhysicalOlapScan[catalog_sales]
 apply RFs: RF13
-----------------------------------------------PhysicalProject
-------------------------------------------------filter((date_dim.d_moy = 11) 
and (date_dim.d_year = 2002))
---------------------------------------------------PhysicalOlapScan[date_dim]
-------------------------------------------PhysicalCteConsumer ( cteId=CTEId#0 )
-----------------------------------------PhysicalProject
-------------------------------------------PhysicalOlapScan[item]
-----------------------------PhysicalAssertNumRows
-------------------------------PhysicalDistribute[DistributionSpecGather]
---------------------------------PhysicalCteConsumer ( cteId=CTEId#1 )
-------------------------PhysicalProject
---------------------------NestedLoopJoin[INNER_JOIN](cast(sales as 
DECIMALV3(38, 4)) > avg_sales.average_sales)
+------------------------------hashJoin[INNER_JOIN bucketShuffle] 
hashCondition=((catalog_sales.cs_item_sk = item.i_item_sk)) otherCondition=()
+--------------------------------hashJoin[LEFT_SEMI_JOIN shuffle] 
hashCondition=((catalog_sales.cs_item_sk = cross_items.ss_item_sk)) 
otherCondition=()
+----------------------------------PhysicalProject
+------------------------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((catalog_sales.cs_sold_date_sk = date_dim.d_date_sk)) 
otherCondition=() build RFs:RF13 d_date_sk->[cs_sold_date_sk]
+--------------------------------------PhysicalProject
+----------------------------------------PhysicalOlapScan[catalog_sales] apply 
RFs: RF13
+--------------------------------------PhysicalProject
+----------------------------------------filter((date_dim.d_moy = 11) and 
(date_dim.d_year = 2002))
+------------------------------------------PhysicalOlapScan[date_dim]
+----------------------------------PhysicalCteConsumer ( cteId=CTEId#0 )
+--------------------------------PhysicalProject
+----------------------------------PhysicalOlapScan[item]
+--------------------PhysicalAssertNumRows
+----------------------PhysicalDistribute[DistributionSpecGather]
+------------------------PhysicalCteConsumer ( cteId=CTEId#1 )
+----------------PhysicalProject
+------------------NestedLoopJoin[INNER_JOIN](cast(sales as DECIMALV3(38, 4)) > 
avg_sales.average_sales)
+--------------------PhysicalProject
+----------------------hashAgg[GLOBAL]
+------------------------PhysicalDistribute[DistributionSpecHash]
+--------------------------hashAgg[LOCAL]
 ----------------------------PhysicalProject
-------------------------------hashAgg[GLOBAL]
---------------------------------PhysicalDistribute[DistributionSpecHash]
-----------------------------------hashAgg[LOCAL]
-------------------------------------PhysicalProject
---------------------------------------hashJoin[INNER_JOIN bucketShuffle] 
hashCondition=((web_sales.ws_item_sk = item.i_item_sk)) otherCondition=()
-----------------------------------------hashJoin[LEFT_SEMI_JOIN shuffle] 
hashCondition=((web_sales.ws_item_sk = cross_items.ss_item_sk)) 
otherCondition=()
-------------------------------------------PhysicalProject
---------------------------------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) 
otherCondition=() build RFs:RF16 d_date_sk->[ws_sold_date_sk]
-----------------------------------------------PhysicalProject
-------------------------------------------------PhysicalOlapScan[web_sales] 
apply RFs: RF16
-----------------------------------------------PhysicalProject
-------------------------------------------------filter((date_dim.d_moy = 11) 
and (date_dim.d_year = 2002))
---------------------------------------------------PhysicalOlapScan[date_dim]
-------------------------------------------PhysicalCteConsumer ( cteId=CTEId#0 )
-----------------------------------------PhysicalProject
-------------------------------------------PhysicalOlapScan[item]
-----------------------------PhysicalAssertNumRows
-------------------------------PhysicalDistribute[DistributionSpecGather]
---------------------------------PhysicalCteConsumer ( cteId=CTEId#1 )
+------------------------------hashJoin[INNER_JOIN bucketShuffle] 
hashCondition=((web_sales.ws_item_sk = item.i_item_sk)) otherCondition=()
+--------------------------------hashJoin[LEFT_SEMI_JOIN shuffle] 
hashCondition=((web_sales.ws_item_sk = cross_items.ss_item_sk)) 
otherCondition=()
+----------------------------------PhysicalProject
+------------------------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) 
otherCondition=() build RFs:RF16 d_date_sk->[ws_sold_date_sk]
+--------------------------------------PhysicalProject
+----------------------------------------PhysicalOlapScan[web_sales] apply RFs: 
RF16
+--------------------------------------PhysicalProject
+----------------------------------------filter((date_dim.d_moy = 11) and 
(date_dim.d_year = 2002))
+------------------------------------------PhysicalOlapScan[date_dim]
+----------------------------------PhysicalCteConsumer ( cteId=CTEId#0 )
+--------------------------------PhysicalProject
+----------------------------------PhysicalOlapScan[item]
+--------------------PhysicalAssertNumRows
+----------------------PhysicalDistribute[DistributionSpecGather]
+------------------------PhysicalCteConsumer ( cteId=CTEId#1 )
+------PhysicalResultSink
+--------PhysicalTopN[MERGE_SORT]
+----------PhysicalDistribute[DistributionSpecGather]
+------------PhysicalTopN[LOCAL_SORT]
+--------------PhysicalUnion
+----------------PhysicalProject
+------------------hashAgg[GLOBAL]
+--------------------PhysicalDistribute[DistributionSpecHash]
+----------------------hashAgg[LOCAL]
+------------------------PhysicalRepeat
+--------------------------PhysicalDistribute[DistributionSpecExecutionAny]
+----------------------------PhysicalCteConsumer ( cteId=CTEId#4 )
+----------------PhysicalDistribute[DistributionSpecExecutionAny]
+------------------PhysicalCteConsumer ( cteId=CTEId#4 )
 
diff --git a/regression-test/data/shape_check/tpcds_sf100/rf_prune/query33.out 
b/regression-test/data/shape_check/tpcds_sf100/rf_prune/query33.out
index 514fc6cf682..a09b5b4ae49 100644
--- a/regression-test/data/shape_check/tpcds_sf100/rf_prune/query33.out
+++ b/regression-test/data/shape_check/tpcds_sf100/rf_prune/query33.out
@@ -9,18 +9,21 @@ PhysicalResultSink
 ------------hashAgg[LOCAL]
 --------------PhysicalUnion
 ----------------PhysicalProject
-------------------hashJoin[LEFT_SEMI_JOIN broadcast] 
hashCondition=((item.i_manufact_id = item.i_manufact_id)) otherCondition=() 
build RFs:RF3 i_manufact_id->[i_manufact_id]
+------------------hashJoin[RIGHT_SEMI_JOIN shuffleBucket] 
hashCondition=((item.i_manufact_id = item.i_manufact_id)) otherCondition=() 
build RFs:RF3 i_manufact_id->[i_manufact_id]
+--------------------PhysicalProject
+----------------------filter((item.i_category = 'Home'))
+------------------------PhysicalOlapScan[item] apply RFs: RF3
 --------------------hashAgg[GLOBAL]
 ----------------------PhysicalDistribute[DistributionSpecHash]
 ------------------------hashAgg[LOCAL]
 --------------------------PhysicalProject
-----------------------------hashJoin[INNER_JOIN shuffle] 
hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() 
build RFs:RF2 i_item_sk->[ss_item_sk]
+----------------------------hashJoin[INNER_JOIN shuffle] 
hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=()
 ------------------------------PhysicalProject
 --------------------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((store_sales.ss_addr_sk = customer_address.ca_address_sk)) 
otherCondition=() build RFs:RF1 ca_address_sk->[ss_addr_sk]
 ----------------------------------PhysicalProject
 ------------------------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) 
otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk]
 --------------------------------------PhysicalProject
-----------------------------------------PhysicalOlapScan[store_sales] apply 
RFs: RF0 RF1 RF2
+----------------------------------------PhysicalOlapScan[store_sales] apply 
RFs: RF0 RF1
 --------------------------------------PhysicalProject
 ----------------------------------------filter((date_dim.d_moy = 1) and 
(date_dim.d_year = 2002))
 ------------------------------------------PhysicalOlapScan[date_dim]
@@ -28,23 +31,23 @@ PhysicalResultSink
 ------------------------------------filter((customer_address.ca_gmt_offset = 
-5.00))
 --------------------------------------PhysicalOlapScan[customer_address]
 ------------------------------PhysicalProject
---------------------------------PhysicalOlapScan[item] apply RFs: RF3
+--------------------------------PhysicalOlapScan[item]
+----------------PhysicalProject
+------------------hashJoin[RIGHT_SEMI_JOIN shuffleBucket] 
hashCondition=((item.i_manufact_id = item.i_manufact_id)) otherCondition=() 
build RFs:RF7 i_manufact_id->[i_manufact_id]
 --------------------PhysicalProject
 ----------------------filter((item.i_category = 'Home'))
-------------------------PhysicalOlapScan[item]
-----------------PhysicalProject
-------------------hashJoin[LEFT_SEMI_JOIN broadcast] 
hashCondition=((item.i_manufact_id = item.i_manufact_id)) otherCondition=() 
build RFs:RF7 i_manufact_id->[i_manufact_id]
+------------------------PhysicalOlapScan[item] apply RFs: RF7
 --------------------hashAgg[GLOBAL]
 ----------------------PhysicalDistribute[DistributionSpecHash]
 ------------------------hashAgg[LOCAL]
 --------------------------PhysicalProject
-----------------------------hashJoin[INNER_JOIN shuffle] 
hashCondition=((catalog_sales.cs_item_sk = item.i_item_sk)) otherCondition=() 
build RFs:RF6 i_item_sk->[cs_item_sk]
+----------------------------hashJoin[INNER_JOIN shuffle] 
hashCondition=((catalog_sales.cs_item_sk = item.i_item_sk)) otherCondition=()
 ------------------------------PhysicalProject
 --------------------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((catalog_sales.cs_bill_addr_sk = 
customer_address.ca_address_sk)) otherCondition=() build RFs:RF5 
ca_address_sk->[cs_bill_addr_sk]
 ----------------------------------PhysicalProject
 ------------------------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((catalog_sales.cs_sold_date_sk = date_dim.d_date_sk)) 
otherCondition=() build RFs:RF4 d_date_sk->[cs_sold_date_sk]
 --------------------------------------PhysicalProject
-----------------------------------------PhysicalOlapScan[catalog_sales] apply 
RFs: RF4 RF5 RF6
+----------------------------------------PhysicalOlapScan[catalog_sales] apply 
RFs: RF4 RF5
 --------------------------------------PhysicalProject
 ----------------------------------------filter((date_dim.d_moy = 1) and 
(date_dim.d_year = 2002))
 ------------------------------------------PhysicalOlapScan[date_dim]
@@ -52,19 +55,19 @@ PhysicalResultSink
 ------------------------------------filter((customer_address.ca_gmt_offset = 
-5.00))
 --------------------------------------PhysicalOlapScan[customer_address]
 ------------------------------PhysicalProject
---------------------------------PhysicalOlapScan[item] apply RFs: RF7
+--------------------------------PhysicalOlapScan[item]
+----------------PhysicalProject
+------------------hashJoin[RIGHT_SEMI_JOIN shuffleBucket] 
hashCondition=((item.i_manufact_id = item.i_manufact_id)) otherCondition=() 
build RFs:RF11 i_manufact_id->[i_manufact_id]
 --------------------PhysicalProject
 ----------------------filter((item.i_category = 'Home'))
-------------------------PhysicalOlapScan[item]
-----------------PhysicalProject
-------------------hashJoin[LEFT_SEMI_JOIN broadcast] 
hashCondition=((item.i_manufact_id = item.i_manufact_id)) otherCondition=() 
build RFs:RF11 i_manufact_id->[i_manufact_id]
+------------------------PhysicalOlapScan[item] apply RFs: RF11
 --------------------hashAgg[GLOBAL]
 ----------------------PhysicalDistribute[DistributionSpecHash]
 ------------------------hashAgg[LOCAL]
 --------------------------PhysicalProject
 ----------------------------hashJoin[INNER_JOIN shuffle] 
hashCondition=((web_sales.ws_item_sk = item.i_item_sk)) otherCondition=() build 
RFs:RF10 ws_item_sk->[i_item_sk]
 ------------------------------PhysicalProject
---------------------------------PhysicalOlapScan[item] apply RFs: RF10 RF11
+--------------------------------PhysicalOlapScan[item] apply RFs: RF10
 ------------------------------PhysicalProject
 --------------------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((web_sales.ws_bill_addr_sk = customer_address.ca_address_sk)) 
otherCondition=() build RFs:RF9 ca_address_sk->[ws_bill_addr_sk]
 ----------------------------------PhysicalProject
@@ -77,7 +80,4 @@ PhysicalResultSink
 ----------------------------------PhysicalProject
 ------------------------------------filter((customer_address.ca_gmt_offset = 
-5.00))
 --------------------------------------PhysicalOlapScan[customer_address]
---------------------PhysicalProject
-----------------------filter((item.i_category = 'Home'))
-------------------------PhysicalOlapScan[item]
 
diff --git a/regression-test/data/shape_check/tpcds_sf100/rf_prune/query35.out 
b/regression-test/data/shape_check/tpcds_sf100/rf_prune/query35.out
index dea7b62c380..243129c49d4 100644
--- a/regression-test/data/shape_check/tpcds_sf100/rf_prune/query35.out
+++ b/regression-test/data/shape_check/tpcds_sf100/rf_prune/query35.out
@@ -10,38 +10,38 @@ PhysicalResultSink
 --------------hashAgg[LOCAL]
 ----------------PhysicalProject
 ------------------filter(OR[ifnull($c$1, FALSE),ifnull($c$2, FALSE)])
---------------------hashJoin[LEFT_SEMI_JOIN bucketShuffle] 
hashCondition=((c.c_customer_sk = catalog_sales.cs_ship_customer_sk)) 
otherCondition=()
+--------------------hashJoin[RIGHT_SEMI_JOIN shuffleBucket] 
hashCondition=((c.c_customer_sk = catalog_sales.cs_ship_customer_sk)) 
otherCondition=()
+----------------------PhysicalProject
+------------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((catalog_sales.cs_sold_date_sk = date_dim.d_date_sk)) 
otherCondition=() build RFs:RF5 d_date_sk->[cs_sold_date_sk]
+--------------------------PhysicalProject
+----------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF5
+--------------------------PhysicalProject
+----------------------------filter((date_dim.d_qoy < 4) and (date_dim.d_year = 
2001))
+------------------------------PhysicalOlapScan[date_dim]
 ----------------------hashJoin[LEFT_SEMI_JOIN bucketShuffle] 
hashCondition=((c.c_customer_sk = web_sales.ws_bill_customer_sk)) 
otherCondition=()
-------------------------hashJoin[LEFT_SEMI_JOIN shuffle] 
hashCondition=((c.c_customer_sk = store_sales.ss_customer_sk)) 
otherCondition=() build RFs:RF5 ss_customer_sk->[c_customer_sk]
+------------------------hashJoin[RIGHT_SEMI_JOIN shuffle] 
hashCondition=((c.c_customer_sk = store_sales.ss_customer_sk)) otherCondition=()
+--------------------------PhysicalProject
+----------------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) 
otherCondition=() build RFs:RF3 d_date_sk->[ss_sold_date_sk]
+------------------------------PhysicalProject
+--------------------------------PhysicalOlapScan[store_sales] apply RFs: RF3
+------------------------------PhysicalProject
+--------------------------------filter((date_dim.d_qoy < 4) and 
(date_dim.d_year = 2001))
+----------------------------------PhysicalOlapScan[date_dim]
 --------------------------PhysicalProject
 ----------------------------hashJoin[INNER_JOIN shuffle] 
hashCondition=((customer_demographics.cd_demo_sk = c.c_current_cdemo_sk)) 
otherCondition=()
 ------------------------------PhysicalProject
 --------------------------------hashJoin[INNER_JOIN shuffle] 
hashCondition=((c.c_current_addr_sk = ca.ca_address_sk)) otherCondition=()
 ----------------------------------PhysicalProject
-------------------------------------PhysicalOlapScan[customer] apply RFs: RF5
+------------------------------------PhysicalOlapScan[customer]
 ----------------------------------PhysicalProject
 ------------------------------------PhysicalOlapScan[customer_address]
 ------------------------------PhysicalProject
 --------------------------------PhysicalOlapScan[customer_demographics]
---------------------------PhysicalProject
-----------------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) 
otherCondition=() build RFs:RF2 d_date_sk->[ss_sold_date_sk]
-------------------------------PhysicalProject
---------------------------------PhysicalOlapScan[store_sales] apply RFs: RF2
-------------------------------PhysicalProject
---------------------------------filter((date_dim.d_qoy < 4) and 
(date_dim.d_year = 2001))
-----------------------------------PhysicalOlapScan[date_dim]
 ------------------------PhysicalProject
---------------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) 
otherCondition=() build RFs:RF1 d_date_sk->[ws_sold_date_sk]
+--------------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) 
otherCondition=() build RFs:RF0 d_date_sk->[ws_sold_date_sk]
 ----------------------------PhysicalProject
-------------------------------PhysicalOlapScan[web_sales] apply RFs: RF1
+------------------------------PhysicalOlapScan[web_sales] apply RFs: RF0
 ----------------------------PhysicalProject
 ------------------------------filter((date_dim.d_qoy < 4) and (date_dim.d_year 
= 2001))
 --------------------------------PhysicalOlapScan[date_dim]
-----------------------PhysicalProject
-------------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((catalog_sales.cs_sold_date_sk = date_dim.d_date_sk)) 
otherCondition=() build RFs:RF0 d_date_sk->[cs_sold_date_sk]
---------------------------PhysicalProject
-----------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF0
---------------------------PhysicalProject
-----------------------------filter((date_dim.d_qoy < 4) and (date_dim.d_year = 
2001))
-------------------------------PhysicalOlapScan[date_dim]
 
diff --git a/regression-test/data/shape_check/tpcds_sf100/shape/query14.out 
b/regression-test/data/shape_check/tpcds_sf100/shape/query14.out
index 9655664e700..517eb87bea6 100644
--- a/regression-test/data/shape_check/tpcds_sf100/shape/query14.out
+++ b/regression-test/data/shape_check/tpcds_sf100/shape/query14.out
@@ -69,80 +69,90 @@ PhysicalCteAnchor ( cteId=CTEId#0 )
 ----------------PhysicalProject
 ------------------filter((date_dim.d_year <= 2002) and (date_dim.d_year >= 
2000))
 --------------------PhysicalOlapScan[date_dim]
-----PhysicalResultSink
-------PhysicalTopN[MERGE_SORT]
---------PhysicalDistribute[DistributionSpecGather]
-----------PhysicalTopN[LOCAL_SORT]
-------------PhysicalProject
---------------hashAgg[GLOBAL]
-----------------PhysicalDistribute[DistributionSpecHash]
-------------------hashAgg[LOCAL]
---------------------PhysicalRepeat
-----------------------PhysicalUnion
-------------------------PhysicalProject
---------------------------NestedLoopJoin[INNER_JOIN](cast(sales as 
DECIMALV3(38, 4)) > avg_sales.average_sales)
+----PhysicalCteAnchor ( cteId=CTEId#4 )
+------PhysicalCteProducer ( cteId=CTEId#4 )
+--------hashAgg[GLOBAL]
+----------PhysicalDistribute[DistributionSpecHash]
+------------hashAgg[LOCAL]
+--------------PhysicalUnion
+----------------PhysicalProject
+------------------NestedLoopJoin[INNER_JOIN](cast(sales as DECIMALV3(38, 4)) > 
avg_sales.average_sales)
+--------------------PhysicalProject
+----------------------hashAgg[GLOBAL]
+------------------------PhysicalDistribute[DistributionSpecHash]
+--------------------------hashAgg[LOCAL]
 ----------------------------PhysicalProject
-------------------------------hashAgg[GLOBAL]
---------------------------------PhysicalDistribute[DistributionSpecHash]
-----------------------------------hashAgg[LOCAL]
-------------------------------------PhysicalProject
---------------------------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() 
build RFs:RF12 i_item_sk->[ss_item_sk,ss_item_sk]
-----------------------------------------hashJoin[LEFT_SEMI_JOIN broadcast] 
hashCondition=((store_sales.ss_item_sk = cross_items.ss_item_sk)) 
otherCondition=() build RFs:RF11 ss_item_sk->[ss_item_sk]
-------------------------------------------PhysicalProject
---------------------------------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) 
otherCondition=() build RFs:RF10 d_date_sk->[ss_sold_date_sk]
-----------------------------------------------PhysicalProject
-------------------------------------------------PhysicalOlapScan[store_sales] 
apply RFs: RF10 RF11 RF12
-----------------------------------------------PhysicalProject
-------------------------------------------------filter((date_dim.d_moy = 11) 
and (date_dim.d_year = 2002))
---------------------------------------------------PhysicalOlapScan[date_dim]
-------------------------------------------PhysicalCteConsumer ( cteId=CTEId#0 
) apply RFs: RF12
-----------------------------------------PhysicalProject
-------------------------------------------PhysicalOlapScan[item]
-----------------------------PhysicalAssertNumRows
-------------------------------PhysicalDistribute[DistributionSpecGather]
---------------------------------PhysicalCteConsumer ( cteId=CTEId#1 )
-------------------------PhysicalProject
---------------------------NestedLoopJoin[INNER_JOIN](cast(sales as 
DECIMALV3(38, 4)) > avg_sales.average_sales)
+------------------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() 
build RFs:RF12 i_item_sk->[ss_item_sk,ss_item_sk]
+--------------------------------hashJoin[LEFT_SEMI_JOIN broadcast] 
hashCondition=((store_sales.ss_item_sk = cross_items.ss_item_sk)) 
otherCondition=() build RFs:RF11 ss_item_sk->[ss_item_sk]
+----------------------------------PhysicalProject
+------------------------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) 
otherCondition=() build RFs:RF10 d_date_sk->[ss_sold_date_sk]
+--------------------------------------PhysicalProject
+----------------------------------------PhysicalOlapScan[store_sales] apply 
RFs: RF10 RF11 RF12
+--------------------------------------PhysicalProject
+----------------------------------------filter((date_dim.d_moy = 11) and 
(date_dim.d_year = 2002))
+------------------------------------------PhysicalOlapScan[date_dim]
+----------------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) apply 
RFs: RF12
+--------------------------------PhysicalProject
+----------------------------------PhysicalOlapScan[item]
+--------------------PhysicalAssertNumRows
+----------------------PhysicalDistribute[DistributionSpecGather]
+------------------------PhysicalCteConsumer ( cteId=CTEId#1 )
+----------------PhysicalProject
+------------------NestedLoopJoin[INNER_JOIN](cast(sales as DECIMALV3(38, 4)) > 
avg_sales.average_sales)
+--------------------PhysicalProject
+----------------------hashAgg[GLOBAL]
+------------------------PhysicalDistribute[DistributionSpecHash]
+--------------------------hashAgg[LOCAL]
 ----------------------------PhysicalProject
-------------------------------hashAgg[GLOBAL]
---------------------------------PhysicalDistribute[DistributionSpecHash]
-----------------------------------hashAgg[LOCAL]
-------------------------------------PhysicalProject
---------------------------------------hashJoin[INNER_JOIN bucketShuffle] 
hashCondition=((catalog_sales.cs_item_sk = item.i_item_sk)) otherCondition=() 
build RFs:RF15 i_item_sk->[cs_item_sk,ss_item_sk]
-----------------------------------------hashJoin[LEFT_SEMI_JOIN shuffle] 
hashCondition=((catalog_sales.cs_item_sk = cross_items.ss_item_sk)) 
otherCondition=() build RFs:RF14 ss_item_sk->[cs_item_sk]
-------------------------------------------PhysicalProject
---------------------------------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((catalog_sales.cs_sold_date_sk = date_dim.d_date_sk)) 
otherCondition=() build RFs:RF13 d_date_sk->[cs_sold_date_sk]
-----------------------------------------------PhysicalProject
-------------------------------------------------PhysicalOlapScan[catalog_sales]
 apply RFs: RF13 RF14 RF15
-----------------------------------------------PhysicalProject
-------------------------------------------------filter((date_dim.d_moy = 11) 
and (date_dim.d_year = 2002))
---------------------------------------------------PhysicalOlapScan[date_dim]
-------------------------------------------PhysicalCteConsumer ( cteId=CTEId#0 
) apply RFs: RF15
-----------------------------------------PhysicalProject
-------------------------------------------PhysicalOlapScan[item]
-----------------------------PhysicalAssertNumRows
-------------------------------PhysicalDistribute[DistributionSpecGather]
---------------------------------PhysicalCteConsumer ( cteId=CTEId#1 )
-------------------------PhysicalProject
---------------------------NestedLoopJoin[INNER_JOIN](cast(sales as 
DECIMALV3(38, 4)) > avg_sales.average_sales)
+------------------------------hashJoin[INNER_JOIN bucketShuffle] 
hashCondition=((catalog_sales.cs_item_sk = item.i_item_sk)) otherCondition=() 
build RFs:RF15 i_item_sk->[cs_item_sk,ss_item_sk]
+--------------------------------hashJoin[LEFT_SEMI_JOIN shuffle] 
hashCondition=((catalog_sales.cs_item_sk = cross_items.ss_item_sk)) 
otherCondition=() build RFs:RF14 ss_item_sk->[cs_item_sk]
+----------------------------------PhysicalProject
+------------------------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((catalog_sales.cs_sold_date_sk = date_dim.d_date_sk)) 
otherCondition=() build RFs:RF13 d_date_sk->[cs_sold_date_sk]
+--------------------------------------PhysicalProject
+----------------------------------------PhysicalOlapScan[catalog_sales] apply 
RFs: RF13 RF14 RF15
+--------------------------------------PhysicalProject
+----------------------------------------filter((date_dim.d_moy = 11) and 
(date_dim.d_year = 2002))
+------------------------------------------PhysicalOlapScan[date_dim]
+----------------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) apply 
RFs: RF15
+--------------------------------PhysicalProject
+----------------------------------PhysicalOlapScan[item]
+--------------------PhysicalAssertNumRows
+----------------------PhysicalDistribute[DistributionSpecGather]
+------------------------PhysicalCteConsumer ( cteId=CTEId#1 )
+----------------PhysicalProject
+------------------NestedLoopJoin[INNER_JOIN](cast(sales as DECIMALV3(38, 4)) > 
avg_sales.average_sales)
+--------------------PhysicalProject
+----------------------hashAgg[GLOBAL]
+------------------------PhysicalDistribute[DistributionSpecHash]
+--------------------------hashAgg[LOCAL]
 ----------------------------PhysicalProject
-------------------------------hashAgg[GLOBAL]
---------------------------------PhysicalDistribute[DistributionSpecHash]
-----------------------------------hashAgg[LOCAL]
-------------------------------------PhysicalProject
---------------------------------------hashJoin[INNER_JOIN bucketShuffle] 
hashCondition=((web_sales.ws_item_sk = item.i_item_sk)) otherCondition=() build 
RFs:RF18 i_item_sk->[ss_item_sk,ws_item_sk]
-----------------------------------------hashJoin[LEFT_SEMI_JOIN shuffle] 
hashCondition=((web_sales.ws_item_sk = cross_items.ss_item_sk)) 
otherCondition=() build RFs:RF17 ss_item_sk->[ws_item_sk]
-------------------------------------------PhysicalProject
---------------------------------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) 
otherCondition=() build RFs:RF16 d_date_sk->[ws_sold_date_sk]
-----------------------------------------------PhysicalProject
-------------------------------------------------PhysicalOlapScan[web_sales] 
apply RFs: RF16 RF17 RF18
-----------------------------------------------PhysicalProject
-------------------------------------------------filter((date_dim.d_moy = 11) 
and (date_dim.d_year = 2002))
---------------------------------------------------PhysicalOlapScan[date_dim]
-------------------------------------------PhysicalCteConsumer ( cteId=CTEId#0 
) apply RFs: RF18
-----------------------------------------PhysicalProject
-------------------------------------------PhysicalOlapScan[item]
-----------------------------PhysicalAssertNumRows
-------------------------------PhysicalDistribute[DistributionSpecGather]
---------------------------------PhysicalCteConsumer ( cteId=CTEId#1 )
+------------------------------hashJoin[INNER_JOIN bucketShuffle] 
hashCondition=((web_sales.ws_item_sk = item.i_item_sk)) otherCondition=() build 
RFs:RF18 i_item_sk->[ss_item_sk,ws_item_sk]
+--------------------------------hashJoin[LEFT_SEMI_JOIN shuffle] 
hashCondition=((web_sales.ws_item_sk = cross_items.ss_item_sk)) 
otherCondition=() build RFs:RF17 ss_item_sk->[ws_item_sk]
+----------------------------------PhysicalProject
+------------------------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) 
otherCondition=() build RFs:RF16 d_date_sk->[ws_sold_date_sk]
+--------------------------------------PhysicalProject
+----------------------------------------PhysicalOlapScan[web_sales] apply RFs: 
RF16 RF17 RF18
+--------------------------------------PhysicalProject
+----------------------------------------filter((date_dim.d_moy = 11) and 
(date_dim.d_year = 2002))
+------------------------------------------PhysicalOlapScan[date_dim]
+----------------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) apply 
RFs: RF18
+--------------------------------PhysicalProject
+----------------------------------PhysicalOlapScan[item]
+--------------------PhysicalAssertNumRows
+----------------------PhysicalDistribute[DistributionSpecGather]
+------------------------PhysicalCteConsumer ( cteId=CTEId#1 )
+------PhysicalResultSink
+--------PhysicalTopN[MERGE_SORT]
+----------PhysicalDistribute[DistributionSpecGather]
+------------PhysicalTopN[LOCAL_SORT]
+--------------PhysicalUnion
+----------------PhysicalProject
+------------------hashAgg[GLOBAL]
+--------------------PhysicalDistribute[DistributionSpecHash]
+----------------------hashAgg[LOCAL]
+------------------------PhysicalRepeat
+--------------------------PhysicalDistribute[DistributionSpecExecutionAny]
+----------------------------PhysicalCteConsumer ( cteId=CTEId#4 )
+----------------PhysicalDistribute[DistributionSpecExecutionAny]
+------------------PhysicalCteConsumer ( cteId=CTEId#4 )
 
diff --git a/regression-test/data/shape_check/tpcds_sf100/shape/query33.out 
b/regression-test/data/shape_check/tpcds_sf100/shape/query33.out
index 514fc6cf682..f5b2b8cb633 100644
--- a/regression-test/data/shape_check/tpcds_sf100/shape/query33.out
+++ b/regression-test/data/shape_check/tpcds_sf100/shape/query33.out
@@ -9,7 +9,10 @@ PhysicalResultSink
 ------------hashAgg[LOCAL]
 --------------PhysicalUnion
 ----------------PhysicalProject
-------------------hashJoin[LEFT_SEMI_JOIN broadcast] 
hashCondition=((item.i_manufact_id = item.i_manufact_id)) otherCondition=() 
build RFs:RF3 i_manufact_id->[i_manufact_id]
+------------------hashJoin[RIGHT_SEMI_JOIN shuffleBucket] 
hashCondition=((item.i_manufact_id = item.i_manufact_id)) otherCondition=() 
build RFs:RF3 i_manufact_id->[i_manufact_id]
+--------------------PhysicalProject
+----------------------filter((item.i_category = 'Home'))
+------------------------PhysicalOlapScan[item] apply RFs: RF3
 --------------------hashAgg[GLOBAL]
 ----------------------PhysicalDistribute[DistributionSpecHash]
 ------------------------hashAgg[LOCAL]
@@ -28,12 +31,12 @@ PhysicalResultSink
 ------------------------------------filter((customer_address.ca_gmt_offset = 
-5.00))
 --------------------------------------PhysicalOlapScan[customer_address]
 ------------------------------PhysicalProject
---------------------------------PhysicalOlapScan[item] apply RFs: RF3
+--------------------------------PhysicalOlapScan[item]
+----------------PhysicalProject
+------------------hashJoin[RIGHT_SEMI_JOIN shuffleBucket] 
hashCondition=((item.i_manufact_id = item.i_manufact_id)) otherCondition=() 
build RFs:RF7 i_manufact_id->[i_manufact_id]
 --------------------PhysicalProject
 ----------------------filter((item.i_category = 'Home'))
-------------------------PhysicalOlapScan[item]
-----------------PhysicalProject
-------------------hashJoin[LEFT_SEMI_JOIN broadcast] 
hashCondition=((item.i_manufact_id = item.i_manufact_id)) otherCondition=() 
build RFs:RF7 i_manufact_id->[i_manufact_id]
+------------------------PhysicalOlapScan[item] apply RFs: RF7
 --------------------hashAgg[GLOBAL]
 ----------------------PhysicalDistribute[DistributionSpecHash]
 ------------------------hashAgg[LOCAL]
@@ -52,19 +55,19 @@ PhysicalResultSink
 ------------------------------------filter((customer_address.ca_gmt_offset = 
-5.00))
 --------------------------------------PhysicalOlapScan[customer_address]
 ------------------------------PhysicalProject
---------------------------------PhysicalOlapScan[item] apply RFs: RF7
+--------------------------------PhysicalOlapScan[item]
+----------------PhysicalProject
+------------------hashJoin[RIGHT_SEMI_JOIN shuffleBucket] 
hashCondition=((item.i_manufact_id = item.i_manufact_id)) otherCondition=() 
build RFs:RF11 i_manufact_id->[i_manufact_id]
 --------------------PhysicalProject
 ----------------------filter((item.i_category = 'Home'))
-------------------------PhysicalOlapScan[item]
-----------------PhysicalProject
-------------------hashJoin[LEFT_SEMI_JOIN broadcast] 
hashCondition=((item.i_manufact_id = item.i_manufact_id)) otherCondition=() 
build RFs:RF11 i_manufact_id->[i_manufact_id]
+------------------------PhysicalOlapScan[item] apply RFs: RF11
 --------------------hashAgg[GLOBAL]
 ----------------------PhysicalDistribute[DistributionSpecHash]
 ------------------------hashAgg[LOCAL]
 --------------------------PhysicalProject
 ----------------------------hashJoin[INNER_JOIN shuffle] 
hashCondition=((web_sales.ws_item_sk = item.i_item_sk)) otherCondition=() build 
RFs:RF10 ws_item_sk->[i_item_sk]
 ------------------------------PhysicalProject
---------------------------------PhysicalOlapScan[item] apply RFs: RF10 RF11
+--------------------------------PhysicalOlapScan[item] apply RFs: RF10
 ------------------------------PhysicalProject
 --------------------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((web_sales.ws_bill_addr_sk = customer_address.ca_address_sk)) 
otherCondition=() build RFs:RF9 ca_address_sk->[ws_bill_addr_sk]
 ----------------------------------PhysicalProject
@@ -77,7 +80,4 @@ PhysicalResultSink
 ----------------------------------PhysicalProject
 ------------------------------------filter((customer_address.ca_gmt_offset = 
-5.00))
 --------------------------------------PhysicalOlapScan[customer_address]
---------------------PhysicalProject
-----------------------filter((item.i_category = 'Home'))
-------------------------PhysicalOlapScan[item]
 
diff --git a/regression-test/data/shape_check/tpcds_sf100/shape/query35.out 
b/regression-test/data/shape_check/tpcds_sf100/shape/query35.out
index 9728d7d3070..1f9a0fc6a43 100644
--- a/regression-test/data/shape_check/tpcds_sf100/shape/query35.out
+++ b/regression-test/data/shape_check/tpcds_sf100/shape/query35.out
@@ -10,38 +10,38 @@ PhysicalResultSink
 --------------hashAgg[LOCAL]
 ----------------PhysicalProject
 ------------------filter(OR[ifnull($c$1, FALSE),ifnull($c$2, FALSE)])
---------------------hashJoin[LEFT_SEMI_JOIN bucketShuffle] 
hashCondition=((c.c_customer_sk = catalog_sales.cs_ship_customer_sk)) 
otherCondition=()
+--------------------hashJoin[RIGHT_SEMI_JOIN shuffleBucket] 
hashCondition=((c.c_customer_sk = catalog_sales.cs_ship_customer_sk)) 
otherCondition=()
+----------------------PhysicalProject
+------------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((catalog_sales.cs_sold_date_sk = date_dim.d_date_sk)) 
otherCondition=() build RFs:RF5 d_date_sk->[cs_sold_date_sk]
+--------------------------PhysicalProject
+----------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF5
+--------------------------PhysicalProject
+----------------------------filter((date_dim.d_qoy < 4) and (date_dim.d_year = 
2001))
+------------------------------PhysicalOlapScan[date_dim]
 ----------------------hashJoin[LEFT_SEMI_JOIN bucketShuffle] 
hashCondition=((c.c_customer_sk = web_sales.ws_bill_customer_sk)) 
otherCondition=()
-------------------------hashJoin[LEFT_SEMI_JOIN shuffle] 
hashCondition=((c.c_customer_sk = store_sales.ss_customer_sk)) 
otherCondition=() build RFs:RF5 ss_customer_sk->[c_customer_sk]
+------------------------hashJoin[RIGHT_SEMI_JOIN shuffle] 
hashCondition=((c.c_customer_sk = store_sales.ss_customer_sk)) 
otherCondition=() build RFs:RF4 c_customer_sk->[ss_customer_sk]
+--------------------------PhysicalProject
+----------------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) 
otherCondition=() build RFs:RF3 d_date_sk->[ss_sold_date_sk]
+------------------------------PhysicalProject
+--------------------------------PhysicalOlapScan[store_sales] apply RFs: RF3 
RF4
+------------------------------PhysicalProject
+--------------------------------filter((date_dim.d_qoy < 4) and 
(date_dim.d_year = 2001))
+----------------------------------PhysicalOlapScan[date_dim]
 --------------------------PhysicalProject
-----------------------------hashJoin[INNER_JOIN shuffle] 
hashCondition=((customer_demographics.cd_demo_sk = c.c_current_cdemo_sk)) 
otherCondition=() build RFs:RF4 cd_demo_sk->[c_current_cdemo_sk]
+----------------------------hashJoin[INNER_JOIN shuffle] 
hashCondition=((customer_demographics.cd_demo_sk = c.c_current_cdemo_sk)) 
otherCondition=() build RFs:RF2 cd_demo_sk->[c_current_cdemo_sk]
 ------------------------------PhysicalProject
---------------------------------hashJoin[INNER_JOIN shuffle] 
hashCondition=((c.c_current_addr_sk = ca.ca_address_sk)) otherCondition=() 
build RFs:RF3 ca_address_sk->[c_current_addr_sk]
+--------------------------------hashJoin[INNER_JOIN shuffle] 
hashCondition=((c.c_current_addr_sk = ca.ca_address_sk)) otherCondition=() 
build RFs:RF1 ca_address_sk->[c_current_addr_sk]
 ----------------------------------PhysicalProject
-------------------------------------PhysicalOlapScan[customer] apply RFs: RF3 
RF4 RF5
+------------------------------------PhysicalOlapScan[customer] apply RFs: RF1 
RF2
 ----------------------------------PhysicalProject
 ------------------------------------PhysicalOlapScan[customer_address]
 ------------------------------PhysicalProject
 --------------------------------PhysicalOlapScan[customer_demographics]
---------------------------PhysicalProject
-----------------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) 
otherCondition=() build RFs:RF2 d_date_sk->[ss_sold_date_sk]
-------------------------------PhysicalProject
---------------------------------PhysicalOlapScan[store_sales] apply RFs: RF2
-------------------------------PhysicalProject
---------------------------------filter((date_dim.d_qoy < 4) and 
(date_dim.d_year = 2001))
-----------------------------------PhysicalOlapScan[date_dim]
 ------------------------PhysicalProject
---------------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) 
otherCondition=() build RFs:RF1 d_date_sk->[ws_sold_date_sk]
+--------------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) 
otherCondition=() build RFs:RF0 d_date_sk->[ws_sold_date_sk]
 ----------------------------PhysicalProject
-------------------------------PhysicalOlapScan[web_sales] apply RFs: RF1
+------------------------------PhysicalOlapScan[web_sales] apply RFs: RF0
 ----------------------------PhysicalProject
 ------------------------------filter((date_dim.d_qoy < 4) and (date_dim.d_year 
= 2001))
 --------------------------------PhysicalOlapScan[date_dim]
-----------------------PhysicalProject
-------------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((catalog_sales.cs_sold_date_sk = date_dim.d_date_sk)) 
otherCondition=() build RFs:RF0 d_date_sk->[cs_sold_date_sk]
---------------------------PhysicalProject
-----------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF0
---------------------------PhysicalProject
-----------------------------filter((date_dim.d_qoy < 4) and (date_dim.d_year = 
2001))
-------------------------------PhysicalOlapScan[date_dim]
 
diff --git a/regression-test/data/shape_check/tpcds_sf1000/hint/query69.out 
b/regression-test/data/shape_check/tpcds_sf1000/hint/query69.out
index 31101f12eab..23e2e0cf0e4 100644
--- a/regression-test/data/shape_check/tpcds_sf1000/hint/query69.out
+++ b/regression-test/data/shape_check/tpcds_sf1000/hint/query69.out
@@ -9,39 +9,39 @@ PhysicalResultSink
 ------------PhysicalDistribute[DistributionSpecHash]
 --------------hashAgg[LOCAL]
 ----------------PhysicalProject
-------------------hashJoin[RIGHT_SEMI_JOIN shuffleBucket] 
hashCondition=((c.c_customer_sk = store_sales.ss_customer_sk)) 
otherCondition=() build RFs:RF6 c_customer_sk->[ss_customer_sk]
+------------------hashJoin[RIGHT_SEMI_JOIN shuffleBucket] 
hashCondition=((c.c_customer_sk = store_sales.ss_customer_sk)) 
otherCondition=() build RFs:RF7 c_customer_sk->[ss_customer_sk]
 --------------------PhysicalProject
-----------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) 
otherCondition=() build RFs:RF5 d_date_sk->[ss_sold_date_sk]
+----------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) 
otherCondition=() build RFs:RF6 d_date_sk->[ss_sold_date_sk]
 ------------------------PhysicalProject
---------------------------PhysicalOlapScan[store_sales] apply RFs: RF5 RF6
+--------------------------PhysicalOlapScan[store_sales] apply RFs: RF6 RF7
 ------------------------PhysicalProject
 --------------------------filter((date_dim.d_moy <= 3) and (date_dim.d_moy >= 
1) and (date_dim.d_year = 2002))
 ----------------------------PhysicalOlapScan[date_dim]
---------------------hashJoin[RIGHT_ANTI_JOIN shuffle] 
hashCondition=((c.c_customer_sk = catalog_sales.cs_ship_customer_sk)) 
otherCondition=() build RFs:RF4 c_customer_sk->[cs_ship_customer_sk]
+--------------------hashJoin[RIGHT_ANTI_JOIN shuffle] 
hashCondition=((c.c_customer_sk = catalog_sales.cs_ship_customer_sk)) 
otherCondition=() build RFs:RF5 c_customer_sk->[cs_ship_customer_sk]
 ----------------------PhysicalProject
-------------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((catalog_sales.cs_sold_date_sk = date_dim.d_date_sk)) 
otherCondition=() build RFs:RF3 d_date_sk->[cs_sold_date_sk]
+------------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((catalog_sales.cs_sold_date_sk = date_dim.d_date_sk)) 
otherCondition=() build RFs:RF4 d_date_sk->[cs_sold_date_sk]
 --------------------------PhysicalProject
-----------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF3 RF4
+----------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF4 RF5
 --------------------------PhysicalProject
 ----------------------------filter((date_dim.d_moy <= 3) and (date_dim.d_moy 
>= 1) and (date_dim.d_year = 2002))
 ------------------------------PhysicalOlapScan[date_dim]
 ----------------------PhysicalProject
-------------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((customer_demographics.cd_demo_sk = c.c_current_cdemo_sk)) 
otherCondition=() build RFs:RF2 c_current_cdemo_sk->[cd_demo_sk]
+------------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((customer_demographics.cd_demo_sk = c.c_current_cdemo_sk)) 
otherCondition=() build RFs:RF3 c_current_cdemo_sk->[cd_demo_sk]
 --------------------------PhysicalProject
-----------------------------PhysicalOlapScan[customer_demographics] apply RFs: 
RF2
---------------------------hashJoin[LEFT_ANTI_JOIN shuffle] 
hashCondition=((c.c_customer_sk = web_sales.ws_bill_customer_sk)) 
otherCondition=()
+----------------------------PhysicalOlapScan[customer_demographics] apply RFs: 
RF3
+--------------------------hashJoin[RIGHT_ANTI_JOIN shuffle] 
hashCondition=((c.c_customer_sk = web_sales.ws_bill_customer_sk)) 
otherCondition=() build RFs:RF2 c_customer_sk->[ws_bill_customer_sk]
 ----------------------------PhysicalProject
-------------------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((c.c_current_addr_sk = ca.ca_address_sk)) otherCondition=() 
build RFs:RF1 ca_address_sk->[c_current_addr_sk]
+------------------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) 
otherCondition=() build RFs:RF1 d_date_sk->[ws_sold_date_sk]
 --------------------------------PhysicalProject
-----------------------------------PhysicalOlapScan[customer] apply RFs: RF1
+----------------------------------PhysicalOlapScan[web_sales] apply RFs: RF1 
RF2
 --------------------------------PhysicalProject
-----------------------------------filter(ca_state IN ('IL', 'ME', 'TX'))
-------------------------------------PhysicalOlapScan[customer_address]
+----------------------------------filter((date_dim.d_moy <= 3) and 
(date_dim.d_moy >= 1) and (date_dim.d_year = 2002))
+------------------------------------PhysicalOlapScan[date_dim]
 ----------------------------PhysicalProject
-------------------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) 
otherCondition=() build RFs:RF0 d_date_sk->[ws_sold_date_sk]
+------------------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((c.c_current_addr_sk = ca.ca_address_sk)) otherCondition=() 
build RFs:RF0 ca_address_sk->[c_current_addr_sk]
 --------------------------------PhysicalProject
-----------------------------------PhysicalOlapScan[web_sales] apply RFs: RF0
+----------------------------------PhysicalOlapScan[customer] apply RFs: RF0
 --------------------------------PhysicalProject
-----------------------------------filter((date_dim.d_moy <= 3) and 
(date_dim.d_moy >= 1) and (date_dim.d_year = 2002))
-------------------------------------PhysicalOlapScan[date_dim]
+----------------------------------filter(ca_state IN ('IL', 'ME', 'TX'))
+------------------------------------PhysicalOlapScan[customer_address]
 
diff --git a/regression-test/data/shape_check/tpcds_sf1000/shape/query33.out 
b/regression-test/data/shape_check/tpcds_sf1000/shape/query33.out
index 1f4f083cdcf..f1cd1e4c777 100644
--- a/regression-test/data/shape_check/tpcds_sf1000/shape/query33.out
+++ b/regression-test/data/shape_check/tpcds_sf1000/shape/query33.out
@@ -9,7 +9,10 @@ PhysicalResultSink
 ------------hashAgg[LOCAL]
 --------------PhysicalUnion
 ----------------PhysicalProject
-------------------hashJoin[LEFT_SEMI_JOIN broadcast] 
hashCondition=((item.i_manufact_id = item.i_manufact_id)) otherCondition=() 
build RFs:RF3 i_manufact_id->[i_manufact_id]
+------------------hashJoin[RIGHT_SEMI_JOIN shuffleBucket] 
hashCondition=((item.i_manufact_id = item.i_manufact_id)) otherCondition=() 
build RFs:RF3 i_manufact_id->[i_manufact_id]
+--------------------PhysicalProject
+----------------------filter((item.i_category = 'Books'))
+------------------------PhysicalOlapScan[item] apply RFs: RF3
 --------------------hashAgg[GLOBAL]
 ----------------------PhysicalDistribute[DistributionSpecHash]
 ------------------------hashAgg[LOCAL]
@@ -28,12 +31,12 @@ PhysicalResultSink
 ------------------------------------filter((customer_address.ca_gmt_offset = 
-5.00))
 --------------------------------------PhysicalOlapScan[customer_address]
 ------------------------------PhysicalProject
---------------------------------PhysicalOlapScan[item] apply RFs: RF3
+--------------------------------PhysicalOlapScan[item]
+----------------PhysicalProject
+------------------hashJoin[RIGHT_SEMI_JOIN shuffleBucket] 
hashCondition=((item.i_manufact_id = item.i_manufact_id)) otherCondition=() 
build RFs:RF7 i_manufact_id->[i_manufact_id]
 --------------------PhysicalProject
 ----------------------filter((item.i_category = 'Books'))
-------------------------PhysicalOlapScan[item]
-----------------PhysicalProject
-------------------hashJoin[LEFT_SEMI_JOIN broadcast] 
hashCondition=((item.i_manufact_id = item.i_manufact_id)) otherCondition=() 
build RFs:RF7 i_manufact_id->[i_manufact_id]
+------------------------PhysicalOlapScan[item] apply RFs: RF7
 --------------------hashAgg[GLOBAL]
 ----------------------PhysicalDistribute[DistributionSpecHash]
 ------------------------hashAgg[LOCAL]
@@ -52,12 +55,12 @@ PhysicalResultSink
 ------------------------------------filter((customer_address.ca_gmt_offset = 
-5.00))
 --------------------------------------PhysicalOlapScan[customer_address]
 ------------------------------PhysicalProject
---------------------------------PhysicalOlapScan[item] apply RFs: RF7
+--------------------------------PhysicalOlapScan[item]
+----------------PhysicalProject
+------------------hashJoin[RIGHT_SEMI_JOIN shuffleBucket] 
hashCondition=((item.i_manufact_id = item.i_manufact_id)) otherCondition=() 
build RFs:RF11 i_manufact_id->[i_manufact_id]
 --------------------PhysicalProject
 ----------------------filter((item.i_category = 'Books'))
-------------------------PhysicalOlapScan[item]
-----------------PhysicalProject
-------------------hashJoin[LEFT_SEMI_JOIN broadcast] 
hashCondition=((item.i_manufact_id = item.i_manufact_id)) otherCondition=() 
build RFs:RF11 i_manufact_id->[i_manufact_id]
+------------------------PhysicalOlapScan[item] apply RFs: RF11
 --------------------hashAgg[GLOBAL]
 ----------------------PhysicalDistribute[DistributionSpecHash]
 ------------------------hashAgg[LOCAL]
@@ -76,8 +79,5 @@ PhysicalResultSink
 ------------------------------------filter((customer_address.ca_gmt_offset = 
-5.00))
 --------------------------------------PhysicalOlapScan[customer_address]
 ------------------------------PhysicalProject
---------------------------------PhysicalOlapScan[item] apply RFs: RF11
---------------------PhysicalProject
-----------------------filter((item.i_category = 'Books'))
-------------------------PhysicalOlapScan[item]
+--------------------------------PhysicalOlapScan[item]
 
diff --git a/regression-test/data/shape_check/tpcds_sf1000/shape/query35.out 
b/regression-test/data/shape_check/tpcds_sf1000/shape/query35.out
index 2f4be8c2912..fc317d15ce7 100644
--- a/regression-test/data/shape_check/tpcds_sf1000/shape/query35.out
+++ b/regression-test/data/shape_check/tpcds_sf1000/shape/query35.out
@@ -10,38 +10,38 @@ PhysicalResultSink
 --------------hashAgg[LOCAL]
 ----------------PhysicalProject
 ------------------filter(OR[ifnull($c$1, FALSE),ifnull($c$2, FALSE)])
---------------------hashJoin[LEFT_SEMI_JOIN bucketShuffle] 
hashCondition=((c.c_customer_sk = catalog_sales.cs_ship_customer_sk)) 
otherCondition=()
+--------------------hashJoin[RIGHT_SEMI_JOIN shuffleBucket] 
hashCondition=((c.c_customer_sk = catalog_sales.cs_ship_customer_sk)) 
otherCondition=()
 ----------------------PhysicalProject
-------------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((customer_demographics.cd_demo_sk = c.c_current_cdemo_sk)) 
otherCondition=() build RFs:RF5 cd_demo_sk->[c_current_cdemo_sk]
---------------------------hashJoin[LEFT_SEMI_JOIN bucketShuffle] 
hashCondition=((c.c_customer_sk = web_sales.ws_bill_customer_sk)) 
otherCondition=()
-----------------------------hashJoin[RIGHT_SEMI_JOIN shuffle] 
hashCondition=((c.c_customer_sk = store_sales.ss_customer_sk)) 
otherCondition=() build RFs:RF4 c_customer_sk->[ss_customer_sk]
+------------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((catalog_sales.cs_sold_date_sk = date_dim.d_date_sk)) 
otherCondition=() build RFs:RF5 d_date_sk->[cs_sold_date_sk]
+--------------------------PhysicalProject
+----------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF5
+--------------------------PhysicalProject
+----------------------------filter((date_dim.d_qoy < 4) and (date_dim.d_year = 
1999))
+------------------------------PhysicalOlapScan[date_dim]
+----------------------PhysicalProject
+------------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((customer_demographics.cd_demo_sk = c.c_current_cdemo_sk)) 
otherCondition=() build RFs:RF4 cd_demo_sk->[c_current_cdemo_sk]
+--------------------------hashJoin[RIGHT_SEMI_JOIN shuffleBucket] 
hashCondition=((c.c_customer_sk = web_sales.ws_bill_customer_sk)) 
otherCondition=()
+----------------------------PhysicalProject
+------------------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) 
otherCondition=() build RFs:RF3 d_date_sk->[ws_sold_date_sk]
+--------------------------------PhysicalProject
+----------------------------------PhysicalOlapScan[web_sales] apply RFs: RF3
+--------------------------------PhysicalProject
+----------------------------------filter((date_dim.d_qoy < 4) and 
(date_dim.d_year = 1999))
+------------------------------------PhysicalOlapScan[date_dim]
+----------------------------hashJoin[RIGHT_SEMI_JOIN shuffle] 
hashCondition=((c.c_customer_sk = store_sales.ss_customer_sk)) 
otherCondition=() build RFs:RF2 c_customer_sk->[ss_customer_sk]
 ------------------------------PhysicalProject
---------------------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) 
otherCondition=() build RFs:RF3 d_date_sk->[ss_sold_date_sk]
+--------------------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) 
otherCondition=() build RFs:RF1 d_date_sk->[ss_sold_date_sk]
 ----------------------------------PhysicalProject
-------------------------------------PhysicalOlapScan[store_sales] apply RFs: 
RF3 RF4
+------------------------------------PhysicalOlapScan[store_sales] apply RFs: 
RF1 RF2
 ----------------------------------PhysicalProject
 ------------------------------------filter((date_dim.d_qoy < 4) and 
(date_dim.d_year = 1999))
 --------------------------------------PhysicalOlapScan[date_dim]
 ------------------------------PhysicalProject
---------------------------------hashJoin[INNER_JOIN shuffle] 
hashCondition=((c.c_current_addr_sk = ca.ca_address_sk)) otherCondition=() 
build RFs:RF2 ca_address_sk->[c_current_addr_sk]
+--------------------------------hashJoin[INNER_JOIN shuffle] 
hashCondition=((c.c_current_addr_sk = ca.ca_address_sk)) otherCondition=() 
build RFs:RF0 ca_address_sk->[c_current_addr_sk]
 ----------------------------------PhysicalProject
-------------------------------------PhysicalOlapScan[customer] apply RFs: RF2 
RF5
+------------------------------------PhysicalOlapScan[customer] apply RFs: RF0 
RF4
 ----------------------------------PhysicalProject
 ------------------------------------PhysicalOlapScan[customer_address]
-----------------------------PhysicalProject
-------------------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) 
otherCondition=() build RFs:RF1 d_date_sk->[ws_sold_date_sk]
---------------------------------PhysicalProject
-----------------------------------PhysicalOlapScan[web_sales] apply RFs: RF1
---------------------------------PhysicalProject
-----------------------------------filter((date_dim.d_qoy < 4) and 
(date_dim.d_year = 1999))
-------------------------------------PhysicalOlapScan[date_dim]
 --------------------------PhysicalProject
 ----------------------------PhysicalOlapScan[customer_demographics]
-----------------------PhysicalProject
-------------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((catalog_sales.cs_sold_date_sk = date_dim.d_date_sk)) 
otherCondition=() build RFs:RF0 d_date_sk->[cs_sold_date_sk]
---------------------------PhysicalProject
-----------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF0
---------------------------PhysicalProject
-----------------------------filter((date_dim.d_qoy < 4) and (date_dim.d_year = 
1999))
-------------------------------PhysicalOlapScan[date_dim]
 
diff --git a/regression-test/data/shape_check/tpcds_sf1000/shape/query69.out 
b/regression-test/data/shape_check/tpcds_sf1000/shape/query69.out
index 31101f12eab..23e2e0cf0e4 100644
--- a/regression-test/data/shape_check/tpcds_sf1000/shape/query69.out
+++ b/regression-test/data/shape_check/tpcds_sf1000/shape/query69.out
@@ -9,39 +9,39 @@ PhysicalResultSink
 ------------PhysicalDistribute[DistributionSpecHash]
 --------------hashAgg[LOCAL]
 ----------------PhysicalProject
-------------------hashJoin[RIGHT_SEMI_JOIN shuffleBucket] 
hashCondition=((c.c_customer_sk = store_sales.ss_customer_sk)) 
otherCondition=() build RFs:RF6 c_customer_sk->[ss_customer_sk]
+------------------hashJoin[RIGHT_SEMI_JOIN shuffleBucket] 
hashCondition=((c.c_customer_sk = store_sales.ss_customer_sk)) 
otherCondition=() build RFs:RF7 c_customer_sk->[ss_customer_sk]
 --------------------PhysicalProject
-----------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) 
otherCondition=() build RFs:RF5 d_date_sk->[ss_sold_date_sk]
+----------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) 
otherCondition=() build RFs:RF6 d_date_sk->[ss_sold_date_sk]
 ------------------------PhysicalProject
---------------------------PhysicalOlapScan[store_sales] apply RFs: RF5 RF6
+--------------------------PhysicalOlapScan[store_sales] apply RFs: RF6 RF7
 ------------------------PhysicalProject
 --------------------------filter((date_dim.d_moy <= 3) and (date_dim.d_moy >= 
1) and (date_dim.d_year = 2002))
 ----------------------------PhysicalOlapScan[date_dim]
---------------------hashJoin[RIGHT_ANTI_JOIN shuffle] 
hashCondition=((c.c_customer_sk = catalog_sales.cs_ship_customer_sk)) 
otherCondition=() build RFs:RF4 c_customer_sk->[cs_ship_customer_sk]
+--------------------hashJoin[RIGHT_ANTI_JOIN shuffle] 
hashCondition=((c.c_customer_sk = catalog_sales.cs_ship_customer_sk)) 
otherCondition=() build RFs:RF5 c_customer_sk->[cs_ship_customer_sk]
 ----------------------PhysicalProject
-------------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((catalog_sales.cs_sold_date_sk = date_dim.d_date_sk)) 
otherCondition=() build RFs:RF3 d_date_sk->[cs_sold_date_sk]
+------------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((catalog_sales.cs_sold_date_sk = date_dim.d_date_sk)) 
otherCondition=() build RFs:RF4 d_date_sk->[cs_sold_date_sk]
 --------------------------PhysicalProject
-----------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF3 RF4
+----------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF4 RF5
 --------------------------PhysicalProject
 ----------------------------filter((date_dim.d_moy <= 3) and (date_dim.d_moy 
>= 1) and (date_dim.d_year = 2002))
 ------------------------------PhysicalOlapScan[date_dim]
 ----------------------PhysicalProject
-------------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((customer_demographics.cd_demo_sk = c.c_current_cdemo_sk)) 
otherCondition=() build RFs:RF2 c_current_cdemo_sk->[cd_demo_sk]
+------------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((customer_demographics.cd_demo_sk = c.c_current_cdemo_sk)) 
otherCondition=() build RFs:RF3 c_current_cdemo_sk->[cd_demo_sk]
 --------------------------PhysicalProject
-----------------------------PhysicalOlapScan[customer_demographics] apply RFs: 
RF2
---------------------------hashJoin[LEFT_ANTI_JOIN shuffle] 
hashCondition=((c.c_customer_sk = web_sales.ws_bill_customer_sk)) 
otherCondition=()
+----------------------------PhysicalOlapScan[customer_demographics] apply RFs: 
RF3
+--------------------------hashJoin[RIGHT_ANTI_JOIN shuffle] 
hashCondition=((c.c_customer_sk = web_sales.ws_bill_customer_sk)) 
otherCondition=() build RFs:RF2 c_customer_sk->[ws_bill_customer_sk]
 ----------------------------PhysicalProject
-------------------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((c.c_current_addr_sk = ca.ca_address_sk)) otherCondition=() 
build RFs:RF1 ca_address_sk->[c_current_addr_sk]
+------------------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) 
otherCondition=() build RFs:RF1 d_date_sk->[ws_sold_date_sk]
 --------------------------------PhysicalProject
-----------------------------------PhysicalOlapScan[customer] apply RFs: RF1
+----------------------------------PhysicalOlapScan[web_sales] apply RFs: RF1 
RF2
 --------------------------------PhysicalProject
-----------------------------------filter(ca_state IN ('IL', 'ME', 'TX'))
-------------------------------------PhysicalOlapScan[customer_address]
+----------------------------------filter((date_dim.d_moy <= 3) and 
(date_dim.d_moy >= 1) and (date_dim.d_year = 2002))
+------------------------------------PhysicalOlapScan[date_dim]
 ----------------------------PhysicalProject
-------------------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) 
otherCondition=() build RFs:RF0 d_date_sk->[ws_sold_date_sk]
+------------------------------hashJoin[INNER_JOIN broadcast] 
hashCondition=((c.c_current_addr_sk = ca.ca_address_sk)) otherCondition=() 
build RFs:RF0 ca_address_sk->[c_current_addr_sk]
 --------------------------------PhysicalProject
-----------------------------------PhysicalOlapScan[web_sales] apply RFs: RF0
+----------------------------------PhysicalOlapScan[customer] apply RFs: RF0
 --------------------------------PhysicalProject
-----------------------------------filter((date_dim.d_moy <= 3) and 
(date_dim.d_moy >= 1) and (date_dim.d_year = 2002))
-------------------------------------PhysicalOlapScan[date_dim]
+----------------------------------filter(ca_state IN ('IL', 'ME', 'TX'))
+------------------------------------PhysicalOlapScan[customer_address]
 
diff --git 
a/regression-test/data/shape_check/tpch_sf1000/check_point/probeShortcutFactor.out
 
b/regression-test/data/shape_check/tpch_sf1000/check_point/probeShortcutFactor.out
new file mode 100644
index 00000000000..c11e94e4777
--- /dev/null
+++ 
b/regression-test/data/shape_check/tpch_sf1000/check_point/probeShortcutFactor.out
@@ -0,0 +1,10 @@
+-- This file is automatically generated. You should know what you did if you 
want to edit this
+-- !select --
+PhysicalResultSink
+--hashJoin[RIGHT_ANTI_JOIN shuffle] hashCondition=((orders.o_custkey = 
customer.c_custkey)) otherCondition=() build RFs:RF0 c_custkey->[o_custkey]
+----PhysicalProject
+------PhysicalOlapScan[orders] apply RFs: RF0
+----PhysicalProject
+------filter(substring(c_phone, 1, 2) IN ('13', '17', '18', '23', '29', '30', 
'31'))
+--------PhysicalOlapScan[customer]
+
diff --git a/regression-test/data/shape_check/tpch_sf1000/rf_prune/q22.out 
b/regression-test/data/shape_check/tpch_sf1000/rf_prune/q22.out
index 69a80f708f9..5f75b319bf0 100644
--- a/regression-test/data/shape_check/tpch_sf1000/rf_prune/q22.out
+++ b/regression-test/data/shape_check/tpch_sf1000/rf_prune/q22.out
@@ -8,18 +8,18 @@ PhysicalResultSink
 ----------PhysicalDistribute[DistributionSpecHash]
 ------------hashAgg[LOCAL]
 --------------PhysicalProject
-----------------NestedLoopJoin[INNER_JOIN](cast(c_acctbal as DECIMALV3(38, 4)) 
> avg(c_acctbal))
+----------------hashJoin[RIGHT_ANTI_JOIN shuffle] 
hashCondition=((orders.o_custkey = customer.c_custkey)) otherCondition=() build 
RFs:RF0 c_custkey->[o_custkey]
 ------------------PhysicalProject
---------------------hashJoin[LEFT_ANTI_JOIN bucketShuffle] 
hashCondition=((orders.o_custkey = customer.c_custkey)) otherCondition=()
+--------------------PhysicalOlapScan[orders] apply RFs: RF0
+------------------PhysicalProject
+--------------------NestedLoopJoin[INNER_JOIN](cast(c_acctbal as DECIMALV3(38, 
4)) > avg(c_acctbal))
 ----------------------PhysicalProject
 ------------------------filter(substring(c_phone, 1, 2) IN ('13', '17', '18', 
'23', '29', '30', '31'))
 --------------------------PhysicalOlapScan[customer]
-----------------------PhysicalProject
-------------------------PhysicalOlapScan[orders]
-------------------hashAgg[GLOBAL]
---------------------PhysicalDistribute[DistributionSpecGather]
-----------------------hashAgg[LOCAL]
-------------------------PhysicalProject
---------------------------filter((customer.c_acctbal > 0.00) and 
substring(c_phone, 1, 2) IN ('13', '17', '18', '23', '29', '30', '31'))
-----------------------------PhysicalOlapScan[customer]
+----------------------hashAgg[GLOBAL]
+------------------------PhysicalDistribute[DistributionSpecGather]
+--------------------------hashAgg[LOCAL]
+----------------------------PhysicalProject
+------------------------------filter((customer.c_acctbal > 0.00) and 
substring(c_phone, 1, 2) IN ('13', '17', '18', '23', '29', '30', '31'))
+--------------------------------PhysicalOlapScan[customer]
 
diff --git a/regression-test/data/shape_check/tpch_sf1000/shape/q22.out 
b/regression-test/data/shape_check/tpch_sf1000/shape/q22.out
index 69a80f708f9..5f75b319bf0 100644
--- a/regression-test/data/shape_check/tpch_sf1000/shape/q22.out
+++ b/regression-test/data/shape_check/tpch_sf1000/shape/q22.out
@@ -8,18 +8,18 @@ PhysicalResultSink
 ----------PhysicalDistribute[DistributionSpecHash]
 ------------hashAgg[LOCAL]
 --------------PhysicalProject
-----------------NestedLoopJoin[INNER_JOIN](cast(c_acctbal as DECIMALV3(38, 4)) 
> avg(c_acctbal))
+----------------hashJoin[RIGHT_ANTI_JOIN shuffle] 
hashCondition=((orders.o_custkey = customer.c_custkey)) otherCondition=() build 
RFs:RF0 c_custkey->[o_custkey]
 ------------------PhysicalProject
---------------------hashJoin[LEFT_ANTI_JOIN bucketShuffle] 
hashCondition=((orders.o_custkey = customer.c_custkey)) otherCondition=()
+--------------------PhysicalOlapScan[orders] apply RFs: RF0
+------------------PhysicalProject
+--------------------NestedLoopJoin[INNER_JOIN](cast(c_acctbal as DECIMALV3(38, 
4)) > avg(c_acctbal))
 ----------------------PhysicalProject
 ------------------------filter(substring(c_phone, 1, 2) IN ('13', '17', '18', 
'23', '29', '30', '31'))
 --------------------------PhysicalOlapScan[customer]
-----------------------PhysicalProject
-------------------------PhysicalOlapScan[orders]
-------------------hashAgg[GLOBAL]
---------------------PhysicalDistribute[DistributionSpecGather]
-----------------------hashAgg[LOCAL]
-------------------------PhysicalProject
---------------------------filter((customer.c_acctbal > 0.00) and 
substring(c_phone, 1, 2) IN ('13', '17', '18', '23', '29', '30', '31'))
-----------------------------PhysicalOlapScan[customer]
+----------------------hashAgg[GLOBAL]
+------------------------PhysicalDistribute[DistributionSpecGather]
+--------------------------hashAgg[LOCAL]
+----------------------------PhysicalProject
+------------------------------filter((customer.c_acctbal > 0.00) and 
substring(c_phone, 1, 2) IN ('13', '17', '18', '23', '29', '30', '31'))
+--------------------------------PhysicalOlapScan[customer]
 
diff --git 
a/regression-test/suites/shape_check/tpch_sf1000/check_point/probeShortcutFactor.groovy
 
b/regression-test/suites/shape_check/tpch_sf1000/check_point/probeShortcutFactor.groovy
new file mode 100644
index 00000000000..5f140c93bb3
--- /dev/null
+++ 
b/regression-test/suites/shape_check/tpch_sf1000/check_point/probeShortcutFactor.groovy
@@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+suite("probeShortcutFactor") {
+    String db = context.config.getDbNameByFile(new File(context.file.parent))
+    if (isCloudMode()) {
+        return
+    }
+    sql "use ${db}"
+    sql 'set enable_nereids_planner=true'
+    sql 'set enable_nereids_distribute_planner=false'
+    sql 'set enable_fallback_to_original_planner=false'
+    sql 'set exec_mem_limit=21G'
+    sql 'SET enable_pipeline_engine = true'
+    sql 'set parallel_pipeline_task_num=8'
+    sql 'set be_number_for_test=3'
+    sql "set runtime_filter_type=8"
+    sql 'set enable_runtime_filter_prune=false'
+    sql "set disable_nereids_rules=PRUNE_EMPTY_PARTITION"
+    
+    // should use right anti join, not left anti join
+    qt_select """
+    explain shape plan
+    select c_custkey, c_phone, c_acctbal
+    from customer left anti join orders on o_custkey=c_custkey
+    where substring(c_phone, 1, 2) IN ('13', '31', '23', '29', '30', '18', 
'17') ;
+    """
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to