This is an automated email from the ASF dual-hosted git repository.
morrysnow pushed a commit to branch branch-3.1
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-3.1 by this push:
new 883d84fb29b branch-3.1: [opt](nereids) optimize normalize window
#54947 (#55046)
883d84fb29b is described below
commit 883d84fb29bc534ba2458c6485136f5ea8f0b7ab
Author: 924060929 <[email protected]>
AuthorDate: Thu Sep 4 09:49:51 2025 +0800
branch-3.1: [opt](nereids) optimize normalize window #54947 (#55046)
cherry pick #54947
---
.../ExtractAndNormalizeWindowExpression.java | 39 +++++++++++++
.../rewrite/PushDownFilterThroughWindowTest.java | 64 ++++++++++++++++++++-
.../nereids_p0/cte/test_cte_filter_pushdown.out | Bin 1353 -> 1352 bytes
.../cte/test_cte_filter_pushdown.out | Bin 1353 -> 1352 bytes
.../tpch/push_filter_window_eqset.out | Bin 419 -> 362 bytes
.../tpcds_sf100/noStatsRfPrune/query51.out | Bin 2673 -> 2637 bytes
.../tpcds_sf100/no_stats_shape/query51.out | Bin 2673 -> 2637 bytes
.../shape_check/tpcds_sf100/rf_prune/query51.out | Bin 2673 -> 2637 bytes
.../data/shape_check/tpcds_sf100/shape/query51.out | Bin 2673 -> 2637 bytes
.../data/shape_check/tpcds_sf1000/hint/query51.out | Bin 2792 -> 2756 bytes
.../shape_check/tpcds_sf1000/shape/query51.out | Bin 2673 -> 2637 bytes
.../shape_check/tpcds_sf10t_orc/shape/query51.out | Bin 2673 -> 2637 bytes
12 files changed, 102 insertions(+), 1 deletion(-)
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/ExtractAndNormalizeWindowExpression.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/ExtractAndNormalizeWindowExpression.java
index 02165709564..e96cf3e105c 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/ExtractAndNormalizeWindowExpression.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/ExtractAndNormalizeWindowExpression.java
@@ -180,6 +180,45 @@ public class ExtractAndNormalizeWindowExpression extends
OneRewriteRuleFactory i
inputSlots.stream()
).distinct();
}
+
+ // for this sql:
+ // select
+ // SUBSTR(orderdate,1,10) AS dt,
+ // ROW_NUMBER() OVER(PARTITION BY orderdate ORDER BY
orderid DESC) AS rn
+ // from lineorders
+ // having dt = '2025-01-01'
+ //
+ // we not push down the `dt` slot under LogicalWindow, but
push down [orderdate, orderid]
+ // to the bottom projects, because if we push down `dt`, the
plan tree will be:
+ //
+ // LogicalFilter(substr(dt#3, 1, 10) =
'2025-01-01')
+ // |
+ // LogicalWindow(rowNumber(partition by orderdate#2,
order by orderid#1))
+ // |
+ // LogicalProject(orderid#1, orderdate#2,
substr(orderdate#1, 1, 10) as dt#3)
+ //
+ // and can not push down filter by
`PushDownFilterThroughWindow`, causing inefficiency,
+ // because dt#3 in LogicalFilter not contains in the partition
key in LogicalWindow: [orderdate#2].
+ //
+ // so we only push down orderdate in the LogicalFilter, not
push down `dt`:
+ //
+ // LogicalFilter(substr(orderdate#2, 1, 10) =
'2025-01-01')
+ // |
+ // LogicalWindow(rowNumber(partition by orderdate#2,
order by orderid#1))
+ // |
+ // LogicalProject(orderid#1, orderdate#2)
+ //
+ // and then, `PushDownFilterThroughWindow` found the
LogicalFilter's `orderdate#2` contains
+ // in the LogicalWindow's partition key: [orderdate#2], and
can push down filter to:
+ //
+ // LogicalWindow(rowNumber(partition by orderdate#2, order
by orderid#1))
+ // |
+ // LogicalProject(orderid#1, orderdate#2)
+ // |
+ // LogicalFilter(substr(orderdate#2, 1, 10) = '2025-01-01')
+ if (expression instanceof Alias) {
+ return expression.getInputSlots().stream();
+ }
return ImmutableList.of(expression).stream();
})
.collect(ImmutableSet.toImmutableSet());
diff --git
a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/PushDownFilterThroughWindowTest.java
b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/PushDownFilterThroughWindowTest.java
index 6edcd01a8ea..3c367550cfe 100644
---
a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/PushDownFilterThroughWindowTest.java
+++
b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/PushDownFilterThroughWindowTest.java
@@ -17,6 +17,7 @@
package org.apache.doris.nereids.rules.rewrite;
+import org.apache.doris.nereids.rules.RuleType;
import org.apache.doris.nereids.trees.expressions.Alias;
import org.apache.doris.nereids.trees.expressions.EqualTo;
import org.apache.doris.nereids.trees.expressions.Expression;
@@ -35,6 +36,7 @@ import org.apache.doris.nereids.util.MemoTestUtils;
import org.apache.doris.nereids.util.PlanChecker;
import org.apache.doris.nereids.util.PlanConstructor;
import org.apache.doris.qe.ConnectContext;
+import org.apache.doris.utframe.TestWithFeService;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Lists;
@@ -42,7 +44,7 @@ import org.junit.jupiter.api.Test;
import java.util.List;
-class PushDownFilterThroughWindowTest implements MemoPatternMatchSupported {
+class PushDownFilterThroughWindowTest extends TestWithFeService implements
MemoPatternMatchSupported {
private final LogicalOlapScan scan = new
LogicalOlapScan(StatementScopeIdGenerator.newRelationId(),
PlanConstructor.student,
ImmutableList.of(""));
@@ -89,4 +91,64 @@ class PushDownFilterThroughWindowTest implements
MemoPatternMatchSupported {
)
);
}
+
+ @Test
+ public void testPushDownFilter() throws Exception {
+ String db = "test";
+ createDatabase(db);
+ useDatabase(db);
+ createTable("CREATE TABLE lineorders (\n"
+ + "orderdate varchar(100) NOT NULL,\n"
+ + "orderid int NOT NULL,\n"
+ + "country_id int NOT NULL,\n"
+ + "vender_id int NOT NULL,\n"
+ + "ordernum int NOT NULL,\n"
+ + "ordemoney int NOT NULL\n"
+ + ") ENGINE=OLAP\n"
+ + "DUPLICATE KEY(orderdate, orderid, country_id)\n"
+ + "COMMENT 'OLAP'\n"
+ + "PARTITION BY LIST(orderdate)\n"
+ + "(PARTITION p1992 VALUES IN (\"0-2020\"),\n"
+ + "PARTITION p1993 VALUES IN (\"0-2021\"),\n"
+ + "PARTITION p1994 VALUES IN (\"0-2022\"),\n"
+ + "PARTITION p1995 VALUES IN (\"0-2023\"),\n"
+ + "PARTITION p1996 VALUES IN (\"0-2024\"),\n"
+ + "PARTITION p1997 VALUES IN (\"0-2025\"))\n"
+ + "DISTRIBUTED BY HASH(orderid) BUCKETS 48\n"
+ + "PROPERTIES (\n"
+ + "\"replication_allocation\" = \"tag.location.default: 1\"\n"
+ + ")");
+
+ connectContext.getSessionVariable()
+ .setDisableNereidsRules(
+ RuleType.OLAP_SCAN_PARTITION_PRUNE.name() + "," +
RuleType.PRUNE_EMPTY_PARTITION.name());
+
+ PlanChecker.from(connectContext)
+ .analyze("select * from ( \n"
+ + " select \n"
+ + " orderid,\n"
+ + " orderdate,\n"
+ + " country_id,\n"
+ + " ordernum,\n"
+ + " ordemoney,\n"
+ + " SUBSTR(lineorders.orderdate,3,4) AS dt,\n"
+ + " ROW_NUMBER() OVER(PARTITION BY
lineorders.orderid,lineorders.orderdate ORDER BY lineorders.country_id DESC) AS
rn\n"
+ + " from lineorders\n"
+ + ") a \n"
+ + "where SUBSTR(a.dt, 1, 4) = SUBSTR(curdate(), 1, 4)")
+ .rewrite()
+ .matchesFromRoot(
+ logicalResultSink(
+ logicalProject(
+ logicalWindow(
+ logicalProject(
+ logicalFilter(
+
logicalOlapScan()
+ )
+ )
+ )
+ )
+ )
+ );
+ }
}
diff --git a/regression-test/data/nereids_p0/cte/test_cte_filter_pushdown.out
b/regression-test/data/nereids_p0/cte/test_cte_filter_pushdown.out
index 0bbae0dc25f..edcbfcb6022 100644
Binary files a/regression-test/data/nereids_p0/cte/test_cte_filter_pushdown.out
and b/regression-test/data/nereids_p0/cte/test_cte_filter_pushdown.out differ
diff --git
a/regression-test/data/nereids_rules_p0/cte/test_cte_filter_pushdown.out
b/regression-test/data/nereids_rules_p0/cte/test_cte_filter_pushdown.out
index 0bbae0dc25f..edcbfcb6022 100644
Binary files
a/regression-test/data/nereids_rules_p0/cte/test_cte_filter_pushdown.out and
b/regression-test/data/nereids_rules_p0/cte/test_cte_filter_pushdown.out differ
diff --git
a/regression-test/data/nereids_tpch_p0/tpch/push_filter_window_eqset.out
b/regression-test/data/nereids_tpch_p0/tpch/push_filter_window_eqset.out
index 0a14dd5f07c..bb38206fc48 100644
Binary files
a/regression-test/data/nereids_tpch_p0/tpch/push_filter_window_eqset.out and
b/regression-test/data/nereids_tpch_p0/tpch/push_filter_window_eqset.out differ
diff --git
a/regression-test/data/shape_check/tpcds_sf100/noStatsRfPrune/query51.out
b/regression-test/data/shape_check/tpcds_sf100/noStatsRfPrune/query51.out
index 470fabc0f31..cec684574ed 100644
Binary files
a/regression-test/data/shape_check/tpcds_sf100/noStatsRfPrune/query51.out and
b/regression-test/data/shape_check/tpcds_sf100/noStatsRfPrune/query51.out differ
diff --git
a/regression-test/data/shape_check/tpcds_sf100/no_stats_shape/query51.out
b/regression-test/data/shape_check/tpcds_sf100/no_stats_shape/query51.out
index 470fabc0f31..cec684574ed 100644
Binary files
a/regression-test/data/shape_check/tpcds_sf100/no_stats_shape/query51.out and
b/regression-test/data/shape_check/tpcds_sf100/no_stats_shape/query51.out differ
diff --git a/regression-test/data/shape_check/tpcds_sf100/rf_prune/query51.out
b/regression-test/data/shape_check/tpcds_sf100/rf_prune/query51.out
index 470fabc0f31..cec684574ed 100644
Binary files
a/regression-test/data/shape_check/tpcds_sf100/rf_prune/query51.out and
b/regression-test/data/shape_check/tpcds_sf100/rf_prune/query51.out differ
diff --git a/regression-test/data/shape_check/tpcds_sf100/shape/query51.out
b/regression-test/data/shape_check/tpcds_sf100/shape/query51.out
index 470fabc0f31..cec684574ed 100644
Binary files a/regression-test/data/shape_check/tpcds_sf100/shape/query51.out
and b/regression-test/data/shape_check/tpcds_sf100/shape/query51.out differ
diff --git a/regression-test/data/shape_check/tpcds_sf1000/hint/query51.out
b/regression-test/data/shape_check/tpcds_sf1000/hint/query51.out
index a10c39885ad..ad962e7d114 100644
Binary files a/regression-test/data/shape_check/tpcds_sf1000/hint/query51.out
and b/regression-test/data/shape_check/tpcds_sf1000/hint/query51.out differ
diff --git a/regression-test/data/shape_check/tpcds_sf1000/shape/query51.out
b/regression-test/data/shape_check/tpcds_sf1000/shape/query51.out
index 6c22d2df308..98b43bfdfc1 100644
Binary files a/regression-test/data/shape_check/tpcds_sf1000/shape/query51.out
and b/regression-test/data/shape_check/tpcds_sf1000/shape/query51.out differ
diff --git a/regression-test/data/shape_check/tpcds_sf10t_orc/shape/query51.out
b/regression-test/data/shape_check/tpcds_sf10t_orc/shape/query51.out
index 5635b5dbabf..5d049b5a6bb 100644
Binary files
a/regression-test/data/shape_check/tpcds_sf10t_orc/shape/query51.out and
b/regression-test/data/shape_check/tpcds_sf10t_orc/shape/query51.out differ
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]