This is an automated email from the ASF dual-hosted git repository.

morrysnow pushed a commit to branch branch-3.1
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-3.1 by this push:
     new 883d84fb29b branch-3.1: [opt](nereids) optimize normalize window 
#54947 (#55046)
883d84fb29b is described below

commit 883d84fb29bc534ba2458c6485136f5ea8f0b7ab
Author: 924060929 <[email protected]>
AuthorDate: Thu Sep 4 09:49:51 2025 +0800

    branch-3.1: [opt](nereids) optimize normalize window #54947 (#55046)
    
    cherry pick #54947
---
 .../ExtractAndNormalizeWindowExpression.java       |  39 +++++++++++++
 .../rewrite/PushDownFilterThroughWindowTest.java   |  64 ++++++++++++++++++++-
 .../nereids_p0/cte/test_cte_filter_pushdown.out    | Bin 1353 -> 1352 bytes
 .../cte/test_cte_filter_pushdown.out               | Bin 1353 -> 1352 bytes
 .../tpch/push_filter_window_eqset.out              | Bin 419 -> 362 bytes
 .../tpcds_sf100/noStatsRfPrune/query51.out         | Bin 2673 -> 2637 bytes
 .../tpcds_sf100/no_stats_shape/query51.out         | Bin 2673 -> 2637 bytes
 .../shape_check/tpcds_sf100/rf_prune/query51.out   | Bin 2673 -> 2637 bytes
 .../data/shape_check/tpcds_sf100/shape/query51.out | Bin 2673 -> 2637 bytes
 .../data/shape_check/tpcds_sf1000/hint/query51.out | Bin 2792 -> 2756 bytes
 .../shape_check/tpcds_sf1000/shape/query51.out     | Bin 2673 -> 2637 bytes
 .../shape_check/tpcds_sf10t_orc/shape/query51.out  | Bin 2673 -> 2637 bytes
 12 files changed, 102 insertions(+), 1 deletion(-)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/ExtractAndNormalizeWindowExpression.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/ExtractAndNormalizeWindowExpression.java
index 02165709564..e96cf3e105c 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/ExtractAndNormalizeWindowExpression.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/ExtractAndNormalizeWindowExpression.java
@@ -180,6 +180,45 @@ public class ExtractAndNormalizeWindowExpression extends 
OneRewriteRuleFactory i
                             inputSlots.stream()
                     ).distinct();
                 }
+
+                // for this sql:
+                //   select
+                //     SUBSTR(orderdate,1,10) AS dt,
+                //     ROW_NUMBER() OVER(PARTITION BY orderdate ORDER BY 
orderid DESC) AS rn
+                //   from lineorders
+                //   having dt = '2025-01-01'
+                //
+                // we not push down the `dt` slot under LogicalWindow, but 
push down [orderdate, orderid]
+                // to the bottom projects, because if we push down `dt`, the 
plan tree will be:
+                //
+                //             LogicalFilter(substr(dt#3, 1, 10) = 
'2025-01-01')
+                //                                     |
+                //      LogicalWindow(rowNumber(partition by orderdate#2, 
order by orderid#1))
+                //                                     |
+                //   LogicalProject(orderid#1, orderdate#2, 
substr(orderdate#1, 1, 10) as dt#3)
+                //
+                // and can not push down filter by 
`PushDownFilterThroughWindow`, causing inefficiency,
+                // because dt#3 in LogicalFilter not contains in the partition 
key in LogicalWindow: [orderdate#2].
+                //
+                // so we only push down orderdate in the LogicalFilter, not 
push down `dt`:
+                //
+                //      LogicalFilter(substr(orderdate#2, 1, 10) = 
'2025-01-01')
+                //                               |
+                //      LogicalWindow(rowNumber(partition by orderdate#2, 
order by orderid#1))
+                //                               |
+                //             LogicalProject(orderid#1, orderdate#2)
+                //
+                // and then, `PushDownFilterThroughWindow` found the 
LogicalFilter's `orderdate#2` contains
+                // in the LogicalWindow's partition key: [orderdate#2], and 
can push down filter to:
+                //
+                //   LogicalWindow(rowNumber(partition by orderdate#2, order 
by orderid#1))
+                //                               |
+                //             LogicalProject(orderid#1, orderdate#2)
+                //                              |
+                //     LogicalFilter(substr(orderdate#2, 1, 10) = '2025-01-01')
+                if (expression instanceof Alias) {
+                    return expression.getInputSlots().stream();
+                }
                 return ImmutableList.of(expression).stream();
             })
             .collect(ImmutableSet.toImmutableSet());
diff --git 
a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/PushDownFilterThroughWindowTest.java
 
b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/PushDownFilterThroughWindowTest.java
index 6edcd01a8ea..3c367550cfe 100644
--- 
a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/PushDownFilterThroughWindowTest.java
+++ 
b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/PushDownFilterThroughWindowTest.java
@@ -17,6 +17,7 @@
 
 package org.apache.doris.nereids.rules.rewrite;
 
+import org.apache.doris.nereids.rules.RuleType;
 import org.apache.doris.nereids.trees.expressions.Alias;
 import org.apache.doris.nereids.trees.expressions.EqualTo;
 import org.apache.doris.nereids.trees.expressions.Expression;
@@ -35,6 +36,7 @@ import org.apache.doris.nereids.util.MemoTestUtils;
 import org.apache.doris.nereids.util.PlanChecker;
 import org.apache.doris.nereids.util.PlanConstructor;
 import org.apache.doris.qe.ConnectContext;
+import org.apache.doris.utframe.TestWithFeService;
 
 import com.google.common.collect.ImmutableList;
 import com.google.common.collect.Lists;
@@ -42,7 +44,7 @@ import org.junit.jupiter.api.Test;
 
 import java.util.List;
 
-class PushDownFilterThroughWindowTest implements MemoPatternMatchSupported {
+class PushDownFilterThroughWindowTest extends TestWithFeService implements 
MemoPatternMatchSupported {
     private final LogicalOlapScan scan = new 
LogicalOlapScan(StatementScopeIdGenerator.newRelationId(),
             PlanConstructor.student,
             ImmutableList.of(""));
@@ -89,4 +91,64 @@ class PushDownFilterThroughWindowTest implements 
MemoPatternMatchSupported {
                         )
                 );
     }
+
+    @Test
+    public void testPushDownFilter() throws Exception {
+        String db = "test";
+        createDatabase(db);
+        useDatabase(db);
+        createTable("CREATE TABLE lineorders (\n"
+                + "orderdate varchar(100) NOT NULL,\n"
+                + "orderid int NOT NULL,\n"
+                + "country_id int NOT NULL,\n"
+                + "vender_id int NOT NULL,\n"
+                + "ordernum int NOT NULL,\n"
+                + "ordemoney int NOT NULL\n"
+                + ") ENGINE=OLAP\n"
+                + "DUPLICATE KEY(orderdate, orderid, country_id)\n"
+                + "COMMENT 'OLAP'\n"
+                + "PARTITION BY LIST(orderdate)\n"
+                + "(PARTITION p1992 VALUES IN (\"0-2020\"),\n"
+                + "PARTITION p1993 VALUES IN (\"0-2021\"),\n"
+                + "PARTITION p1994 VALUES IN (\"0-2022\"),\n"
+                + "PARTITION p1995 VALUES IN (\"0-2023\"),\n"
+                + "PARTITION p1996 VALUES IN (\"0-2024\"),\n"
+                + "PARTITION p1997 VALUES IN (\"0-2025\"))\n"
+                + "DISTRIBUTED BY HASH(orderid) BUCKETS 48\n"
+                + "PROPERTIES (\n"
+                + "\"replication_allocation\" = \"tag.location.default: 1\"\n"
+                + ")");
+
+        connectContext.getSessionVariable()
+                .setDisableNereidsRules(
+                        RuleType.OLAP_SCAN_PARTITION_PRUNE.name() + "," + 
RuleType.PRUNE_EMPTY_PARTITION.name());
+
+        PlanChecker.from(connectContext)
+                .analyze("select * from ( \n"
+                        + "  select \n"
+                        + "    orderid,\n"
+                        + "    orderdate,\n"
+                        + "    country_id,\n"
+                        + "    ordernum,\n"
+                        + "    ordemoney,\n"
+                        + "    SUBSTR(lineorders.orderdate,3,4) AS dt,\n"
+                        + "    ROW_NUMBER() OVER(PARTITION BY 
lineorders.orderid,lineorders.orderdate ORDER BY lineorders.country_id DESC) AS 
rn\n"
+                        + "  from lineorders\n"
+                        + ") a \n"
+                        + "where SUBSTR(a.dt, 1, 4) = SUBSTR(curdate(), 1, 4)")
+                .rewrite()
+                .matchesFromRoot(
+                        logicalResultSink(
+                                logicalProject(
+                                        logicalWindow(
+                                                logicalProject(
+                                                        logicalFilter(
+                                                                
logicalOlapScan()
+                                                        )
+                                                )
+                                        )
+                                )
+                        )
+                );
+    }
 }
diff --git a/regression-test/data/nereids_p0/cte/test_cte_filter_pushdown.out 
b/regression-test/data/nereids_p0/cte/test_cte_filter_pushdown.out
index 0bbae0dc25f..edcbfcb6022 100644
Binary files a/regression-test/data/nereids_p0/cte/test_cte_filter_pushdown.out 
and b/regression-test/data/nereids_p0/cte/test_cte_filter_pushdown.out differ
diff --git 
a/regression-test/data/nereids_rules_p0/cte/test_cte_filter_pushdown.out 
b/regression-test/data/nereids_rules_p0/cte/test_cte_filter_pushdown.out
index 0bbae0dc25f..edcbfcb6022 100644
Binary files 
a/regression-test/data/nereids_rules_p0/cte/test_cte_filter_pushdown.out and 
b/regression-test/data/nereids_rules_p0/cte/test_cte_filter_pushdown.out differ
diff --git 
a/regression-test/data/nereids_tpch_p0/tpch/push_filter_window_eqset.out 
b/regression-test/data/nereids_tpch_p0/tpch/push_filter_window_eqset.out
index 0a14dd5f07c..bb38206fc48 100644
Binary files 
a/regression-test/data/nereids_tpch_p0/tpch/push_filter_window_eqset.out and 
b/regression-test/data/nereids_tpch_p0/tpch/push_filter_window_eqset.out differ
diff --git 
a/regression-test/data/shape_check/tpcds_sf100/noStatsRfPrune/query51.out 
b/regression-test/data/shape_check/tpcds_sf100/noStatsRfPrune/query51.out
index 470fabc0f31..cec684574ed 100644
Binary files 
a/regression-test/data/shape_check/tpcds_sf100/noStatsRfPrune/query51.out and 
b/regression-test/data/shape_check/tpcds_sf100/noStatsRfPrune/query51.out differ
diff --git 
a/regression-test/data/shape_check/tpcds_sf100/no_stats_shape/query51.out 
b/regression-test/data/shape_check/tpcds_sf100/no_stats_shape/query51.out
index 470fabc0f31..cec684574ed 100644
Binary files 
a/regression-test/data/shape_check/tpcds_sf100/no_stats_shape/query51.out and 
b/regression-test/data/shape_check/tpcds_sf100/no_stats_shape/query51.out differ
diff --git a/regression-test/data/shape_check/tpcds_sf100/rf_prune/query51.out 
b/regression-test/data/shape_check/tpcds_sf100/rf_prune/query51.out
index 470fabc0f31..cec684574ed 100644
Binary files 
a/regression-test/data/shape_check/tpcds_sf100/rf_prune/query51.out and 
b/regression-test/data/shape_check/tpcds_sf100/rf_prune/query51.out differ
diff --git a/regression-test/data/shape_check/tpcds_sf100/shape/query51.out 
b/regression-test/data/shape_check/tpcds_sf100/shape/query51.out
index 470fabc0f31..cec684574ed 100644
Binary files a/regression-test/data/shape_check/tpcds_sf100/shape/query51.out 
and b/regression-test/data/shape_check/tpcds_sf100/shape/query51.out differ
diff --git a/regression-test/data/shape_check/tpcds_sf1000/hint/query51.out 
b/regression-test/data/shape_check/tpcds_sf1000/hint/query51.out
index a10c39885ad..ad962e7d114 100644
Binary files a/regression-test/data/shape_check/tpcds_sf1000/hint/query51.out 
and b/regression-test/data/shape_check/tpcds_sf1000/hint/query51.out differ
diff --git a/regression-test/data/shape_check/tpcds_sf1000/shape/query51.out 
b/regression-test/data/shape_check/tpcds_sf1000/shape/query51.out
index 6c22d2df308..98b43bfdfc1 100644
Binary files a/regression-test/data/shape_check/tpcds_sf1000/shape/query51.out 
and b/regression-test/data/shape_check/tpcds_sf1000/shape/query51.out differ
diff --git a/regression-test/data/shape_check/tpcds_sf10t_orc/shape/query51.out 
b/regression-test/data/shape_check/tpcds_sf10t_orc/shape/query51.out
index 5635b5dbabf..5d049b5a6bb 100644
Binary files 
a/regression-test/data/shape_check/tpcds_sf10t_orc/shape/query51.out and 
b/regression-test/data/shape_check/tpcds_sf10t_orc/shape/query51.out differ


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to