This is an automated email from the ASF dual-hosted git repository.

englefly pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.1 by this push:
     new 9dc55f90ebb [opt](nereids) set lower bound for range-selectivity(2.1) 
(#41061)
9dc55f90ebb is described below

commit 9dc55f90ebb1bda9531275e5c40d403296f12f6b
Author: minghong <engle...@gmail.com>
AuthorDate: Sun Sep 22 07:32:22 2024 +0800

    [opt](nereids) set lower bound for range-selectivity(2.1) (#41061)
    
    ## Proposed changes
    pick #40089
    Issue Number: close #xxx
    
    <!--Describe your changes.-->
---
 .../doris/nereids/stats/FilterEstimation.java      |  5 +++
 .../data/nereids_hint_tpcds_p0/shape/query12.out   | 17 ++++----
 .../data/nereids_hint_tpcds_p0/shape/query80.out   | 49 ++++++++++------------
 .../shape/query12.out                              | 15 ++++---
 .../shape/query80.out                              | 49 ++++++++++------------
 5 files changed, 66 insertions(+), 69 deletions(-)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java
index 33b7e02b332..65db271e394 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java
@@ -71,6 +71,9 @@ import java.util.function.Predicate;
  */
 public class FilterEstimation extends ExpressionVisitor<Statistics, 
EstimationContext> {
     public static final double DEFAULT_INEQUALITY_COEFFICIENT = 0.5;
+    // "Range selectivity is prone to producing outliers, so we add this 
threshold limit.
+    // The threshold estimation is calculated based on selecting one month out 
of fifty years."
+    public static final double RANGE_SELECTIVITY_THRESHOLD = 0.0016;
     public static final double DEFAULT_IN_COEFFICIENT = 1.0 / 3.0;
 
     public static final double DEFAULT_HAVING_COEFFICIENT = 0.01;
@@ -627,6 +630,8 @@ public class FilterEstimation extends 
ExpressionVisitor<Statistics, EstimationCo
                     : intersectRange.getDistinctValues() / 
leftRange.getDistinctValues();
             if (!(dataType instanceof RangeScalable) && (sel != 0.0 && sel != 
1.0)) {
                 sel = DEFAULT_INEQUALITY_COEFFICIENT;
+            } else if (sel < RANGE_SELECTIVITY_THRESHOLD) {
+                sel = RANGE_SELECTIVITY_THRESHOLD;
             }
             sel = getNotNullSelectivity(leftStats, sel);
             updatedStatistics = context.statistics.withSel(sel);
diff --git a/regression-test/data/nereids_hint_tpcds_p0/shape/query12.out 
b/regression-test/data/nereids_hint_tpcds_p0/shape/query12.out
index 40646f2dda1..03274a28fef 100644
--- a/regression-test/data/nereids_hint_tpcds_p0/shape/query12.out
+++ b/regression-test/data/nereids_hint_tpcds_p0/shape/query12.out
@@ -13,16 +13,15 @@ PhysicalResultSink
 --------------------hashAgg[LOCAL]
 ----------------------PhysicalProject
 ------------------------hashJoin[INNER_JOIN] 
hashCondition=((web_sales.ws_item_sk = item.i_item_sk)) otherCondition=() build 
RFs:RF1 i_item_sk->[ws_item_sk]
---------------------------PhysicalDistribute[DistributionSpecHash]
-----------------------------PhysicalProject
-------------------------------hashJoin[INNER_JOIN] 
hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) 
otherCondition=() build RFs:RF0 d_date_sk->[ws_sold_date_sk]
+--------------------------PhysicalProject
+----------------------------hashJoin[INNER_JOIN] 
hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) 
otherCondition=() build RFs:RF0 d_date_sk->[ws_sold_date_sk]
+------------------------------PhysicalProject
+--------------------------------PhysicalOlapScan[web_sales] apply RFs: RF0 RF1
+------------------------------PhysicalDistribute[DistributionSpecReplicated]
 --------------------------------PhysicalProject
-----------------------------------PhysicalOlapScan[web_sales] apply RFs: RF0 
RF1
---------------------------------PhysicalDistribute[DistributionSpecReplicated]
-----------------------------------PhysicalProject
-------------------------------------filter((date_dim.d_date <= '2001-07-15') 
and (date_dim.d_date >= '2001-06-15'))
---------------------------------------PhysicalOlapScan[date_dim]
---------------------------PhysicalDistribute[DistributionSpecHash]
+----------------------------------filter((date_dim.d_date <= '2001-07-15') and 
(date_dim.d_date >= '2001-06-15'))
+------------------------------------PhysicalOlapScan[date_dim]
+--------------------------PhysicalDistribute[DistributionSpecReplicated]
 ----------------------------PhysicalProject
 ------------------------------filter(i_category IN ('Books', 'Electronics', 
'Men'))
 --------------------------------PhysicalOlapScan[item]
diff --git a/regression-test/data/nereids_hint_tpcds_p0/shape/query80.out 
b/regression-test/data/nereids_hint_tpcds_p0/shape/query80.out
index 88976f6717b..9981b31b55d 100644
--- a/regression-test/data/nereids_hint_tpcds_p0/shape/query80.out
+++ b/regression-test/data/nereids_hint_tpcds_p0/shape/query80.out
@@ -16,34 +16,31 @@ PhysicalResultSink
 --------------------------hashAgg[LOCAL]
 ----------------------------PhysicalProject
 ------------------------------hashJoin[RIGHT_OUTER_JOIN] 
hashCondition=((store_sales.ss_item_sk = store_returns.sr_item_sk) and 
(store_sales.ss_ticket_number = store_returns.sr_ticket_number)) 
otherCondition=() build RFs:RF4 ss_item_sk->[sr_item_sk];RF5 
ss_ticket_number->[sr_ticket_number]
---------------------------------PhysicalDistribute[DistributionSpecHash]
-----------------------------------PhysicalProject
-------------------------------------PhysicalOlapScan[store_returns] apply RFs: 
RF4 RF5
---------------------------------PhysicalDistribute[DistributionSpecHash]
-----------------------------------PhysicalProject
-------------------------------------hashJoin[INNER_JOIN] 
hashCondition=((store_sales.ss_store_sk = store.s_store_sk)) otherCondition=() 
build RFs:RF3 s_store_sk->[ss_store_sk]
---------------------------------------hashJoin[INNER_JOIN] 
hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() 
build RFs:RF2 i_item_sk->[ss_item_sk]
-----------------------------------------PhysicalDistribute[DistributionSpecHash]
-------------------------------------------PhysicalProject
---------------------------------------------hashJoin[INNER_JOIN] 
hashCondition=((store_sales.ss_promo_sk = promotion.p_promo_sk)) 
otherCondition=() build RFs:RF1 p_promo_sk->[ss_promo_sk]
-----------------------------------------------hashJoin[INNER_JOIN] 
hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) 
otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk]
-------------------------------------------------PhysicalProject
---------------------------------------------------PhysicalOlapScan[store_sales]
 apply RFs: RF0 RF1 RF2 RF3
-------------------------------------------------PhysicalDistribute[DistributionSpecReplicated]
---------------------------------------------------PhysicalProject
-----------------------------------------------------filter((date_dim.d_date <= 
'2002-09-13') and (date_dim.d_date >= '2002-08-14'))
-------------------------------------------------------PhysicalOlapScan[date_dim]
-----------------------------------------------PhysicalDistribute[DistributionSpecReplicated]
-------------------------------------------------PhysicalProject
---------------------------------------------------filter((promotion.p_channel_tv
 = 'N'))
-----------------------------------------------------PhysicalOlapScan[promotion]
-----------------------------------------PhysicalDistribute[DistributionSpecHash]
-------------------------------------------PhysicalProject
---------------------------------------------filter((item.i_current_price > 
50.00))
-----------------------------------------------PhysicalOlapScan[item]
+--------------------------------PhysicalProject
+----------------------------------PhysicalOlapScan[store_returns] apply RFs: 
RF4 RF5
+--------------------------------PhysicalProject
+----------------------------------hashJoin[INNER_JOIN] 
hashCondition=((store_sales.ss_store_sk = store.s_store_sk)) otherCondition=() 
build RFs:RF3 s_store_sk->[ss_store_sk]
+------------------------------------hashJoin[INNER_JOIN] 
hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() 
build RFs:RF2 i_item_sk->[ss_item_sk]
+--------------------------------------PhysicalProject
+----------------------------------------hashJoin[INNER_JOIN] 
hashCondition=((store_sales.ss_promo_sk = promotion.p_promo_sk)) 
otherCondition=() build RFs:RF1 p_promo_sk->[ss_promo_sk]
+------------------------------------------hashJoin[INNER_JOIN] 
hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) 
otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk]
+--------------------------------------------PhysicalProject
+----------------------------------------------PhysicalOlapScan[store_sales] 
apply RFs: RF0 RF1 RF2 RF3
+--------------------------------------------PhysicalDistribute[DistributionSpecReplicated]
+----------------------------------------------PhysicalProject
+------------------------------------------------filter((date_dim.d_date <= 
'2002-09-13') and (date_dim.d_date >= '2002-08-14'))
+--------------------------------------------------PhysicalOlapScan[date_dim]
+------------------------------------------PhysicalDistribute[DistributionSpecReplicated]
+--------------------------------------------PhysicalProject
+----------------------------------------------filter((promotion.p_channel_tv = 
'N'))
+------------------------------------------------PhysicalOlapScan[promotion]
 
--------------------------------------PhysicalDistribute[DistributionSpecReplicated]
 ----------------------------------------PhysicalProject
-------------------------------------------PhysicalOlapScan[store]
+------------------------------------------filter((item.i_current_price > 
50.00))
+--------------------------------------------PhysicalOlapScan[item]
+------------------------------------PhysicalDistribute[DistributionSpecReplicated]
+--------------------------------------PhysicalProject
+----------------------------------------PhysicalOlapScan[store]
 --------------------PhysicalProject
 ----------------------hashAgg[GLOBAL]
 ------------------------PhysicalDistribute[DistributionSpecHash]
diff --git 
a/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query12.out 
b/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query12.out
index 03682c1c406..837bd33960d 100644
--- a/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query12.out
+++ b/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query12.out
@@ -13,15 +13,14 @@ PhysicalResultSink
 --------------------hashAgg[LOCAL]
 ----------------------PhysicalProject
 ------------------------hashJoin[INNER_JOIN] 
hashCondition=((web_sales.ws_item_sk = item.i_item_sk)) otherCondition=() build 
RFs:RF1 i_item_sk->[ws_item_sk]
---------------------------PhysicalDistribute[DistributionSpecHash]
-----------------------------hashJoin[INNER_JOIN] 
hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) 
otherCondition=() build RFs:RF0 d_date_sk->[ws_sold_date_sk]
+--------------------------hashJoin[INNER_JOIN] 
hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) 
otherCondition=() build RFs:RF0 d_date_sk->[ws_sold_date_sk]
+----------------------------PhysicalProject
+------------------------------PhysicalOlapScan[web_sales] apply RFs: RF0 RF1
+----------------------------PhysicalDistribute[DistributionSpecReplicated]
 ------------------------------PhysicalProject
---------------------------------PhysicalOlapScan[web_sales] apply RFs: RF0 RF1
-------------------------------PhysicalDistribute[DistributionSpecReplicated]
---------------------------------PhysicalProject
-----------------------------------filter((date_dim.d_date <= '2001-07-15') and 
(date_dim.d_date >= '2001-06-15'))
-------------------------------------PhysicalOlapScan[date_dim]
---------------------------PhysicalDistribute[DistributionSpecHash]
+--------------------------------filter((date_dim.d_date <= '2001-07-15') and 
(date_dim.d_date >= '2001-06-15'))
+----------------------------------PhysicalOlapScan[date_dim]
+--------------------------PhysicalDistribute[DistributionSpecReplicated]
 ----------------------------PhysicalProject
 ------------------------------filter(i_category IN ('Books', 'Electronics', 
'Men'))
 --------------------------------PhysicalOlapScan[item]
diff --git 
a/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query80.out 
b/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query80.out
index 88976f6717b..9981b31b55d 100644
--- a/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query80.out
+++ b/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query80.out
@@ -16,34 +16,31 @@ PhysicalResultSink
 --------------------------hashAgg[LOCAL]
 ----------------------------PhysicalProject
 ------------------------------hashJoin[RIGHT_OUTER_JOIN] 
hashCondition=((store_sales.ss_item_sk = store_returns.sr_item_sk) and 
(store_sales.ss_ticket_number = store_returns.sr_ticket_number)) 
otherCondition=() build RFs:RF4 ss_item_sk->[sr_item_sk];RF5 
ss_ticket_number->[sr_ticket_number]
---------------------------------PhysicalDistribute[DistributionSpecHash]
-----------------------------------PhysicalProject
-------------------------------------PhysicalOlapScan[store_returns] apply RFs: 
RF4 RF5
---------------------------------PhysicalDistribute[DistributionSpecHash]
-----------------------------------PhysicalProject
-------------------------------------hashJoin[INNER_JOIN] 
hashCondition=((store_sales.ss_store_sk = store.s_store_sk)) otherCondition=() 
build RFs:RF3 s_store_sk->[ss_store_sk]
---------------------------------------hashJoin[INNER_JOIN] 
hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() 
build RFs:RF2 i_item_sk->[ss_item_sk]
-----------------------------------------PhysicalDistribute[DistributionSpecHash]
-------------------------------------------PhysicalProject
---------------------------------------------hashJoin[INNER_JOIN] 
hashCondition=((store_sales.ss_promo_sk = promotion.p_promo_sk)) 
otherCondition=() build RFs:RF1 p_promo_sk->[ss_promo_sk]
-----------------------------------------------hashJoin[INNER_JOIN] 
hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) 
otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk]
-------------------------------------------------PhysicalProject
---------------------------------------------------PhysicalOlapScan[store_sales]
 apply RFs: RF0 RF1 RF2 RF3
-------------------------------------------------PhysicalDistribute[DistributionSpecReplicated]
---------------------------------------------------PhysicalProject
-----------------------------------------------------filter((date_dim.d_date <= 
'2002-09-13') and (date_dim.d_date >= '2002-08-14'))
-------------------------------------------------------PhysicalOlapScan[date_dim]
-----------------------------------------------PhysicalDistribute[DistributionSpecReplicated]
-------------------------------------------------PhysicalProject
---------------------------------------------------filter((promotion.p_channel_tv
 = 'N'))
-----------------------------------------------------PhysicalOlapScan[promotion]
-----------------------------------------PhysicalDistribute[DistributionSpecHash]
-------------------------------------------PhysicalProject
---------------------------------------------filter((item.i_current_price > 
50.00))
-----------------------------------------------PhysicalOlapScan[item]
+--------------------------------PhysicalProject
+----------------------------------PhysicalOlapScan[store_returns] apply RFs: 
RF4 RF5
+--------------------------------PhysicalProject
+----------------------------------hashJoin[INNER_JOIN] 
hashCondition=((store_sales.ss_store_sk = store.s_store_sk)) otherCondition=() 
build RFs:RF3 s_store_sk->[ss_store_sk]
+------------------------------------hashJoin[INNER_JOIN] 
hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() 
build RFs:RF2 i_item_sk->[ss_item_sk]
+--------------------------------------PhysicalProject
+----------------------------------------hashJoin[INNER_JOIN] 
hashCondition=((store_sales.ss_promo_sk = promotion.p_promo_sk)) 
otherCondition=() build RFs:RF1 p_promo_sk->[ss_promo_sk]
+------------------------------------------hashJoin[INNER_JOIN] 
hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) 
otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk]
+--------------------------------------------PhysicalProject
+----------------------------------------------PhysicalOlapScan[store_sales] 
apply RFs: RF0 RF1 RF2 RF3
+--------------------------------------------PhysicalDistribute[DistributionSpecReplicated]
+----------------------------------------------PhysicalProject
+------------------------------------------------filter((date_dim.d_date <= 
'2002-09-13') and (date_dim.d_date >= '2002-08-14'))
+--------------------------------------------------PhysicalOlapScan[date_dim]
+------------------------------------------PhysicalDistribute[DistributionSpecReplicated]
+--------------------------------------------PhysicalProject
+----------------------------------------------filter((promotion.p_channel_tv = 
'N'))
+------------------------------------------------PhysicalOlapScan[promotion]
 
--------------------------------------PhysicalDistribute[DistributionSpecReplicated]
 ----------------------------------------PhysicalProject
-------------------------------------------PhysicalOlapScan[store]
+------------------------------------------filter((item.i_current_price > 
50.00))
+--------------------------------------------PhysicalOlapScan[item]
+------------------------------------PhysicalDistribute[DistributionSpecReplicated]
+--------------------------------------PhysicalProject
+----------------------------------------PhysicalOlapScan[store]
 --------------------PhysicalProject
 ----------------------hashAgg[GLOBAL]
 ------------------------PhysicalDistribute[DistributionSpecHash]


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to