This is an automated email from the ASF dual-hosted git repository.

morrysnow pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 2e8a2a69411 [Feat](nereids) add transform rule 
SimplifyWindowExpression (#33647)
2e8a2a69411 is described below

commit 2e8a2a6941153acd9711e3976f88aefc3c4d7c3d
Author: feiniaofeiafei <53502832+feiniaofeia...@users.noreply.github.com>
AuthorDate: Wed Apr 24 16:11:13 2024 +0800

    [Feat](nereids) add transform rule SimplifyWindowExpression (#33647)
    
    rewrite func(para) over (partition by unique_keys)
    1. func() is count(non-null) or rank/dense_rank/row_number -> 1
    2. func(para) is min/max/sum/avg/first_value/last_value -> para
     e.g
    select max(c1) over(partition by pk) from t1;
    -> select c1 from t1;
---
 .../doris/nereids/jobs/executor/Rewriter.java      |   4 +-
 .../org/apache/doris/nereids/rules/RuleType.java   |   1 +
 .../rules/rewrite/SimplifyWindowExpression.java    | 123 ++++++++++
 .../doris/nereids/properties/UniqueTest.java       |   2 +-
 .../simplify_window_expression.out                 | 254 +++++++++++++++++++++
 .../shape/query70.out                              |  36 ++-
 .../noStatsRfPrune/query70.out                     |  36 ++-
 .../no_stats_shape/query70.out                     |  35 ++-
 .../rf_prune/query70.out                           |  36 ++-
 .../nereids_tpcds_shape_sf100_p0/shape/query70.out |  35 ++-
 .../simplify_window_expression.groovy              | 110 +++++++++
 11 files changed, 567 insertions(+), 105 deletions(-)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Rewriter.java 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Rewriter.java
index 2361c276372..24669f40ff8 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Rewriter.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Rewriter.java
@@ -121,6 +121,7 @@ import 
org.apache.doris.nereids.rules.rewrite.PushProjectThroughUnion;
 import org.apache.doris.nereids.rules.rewrite.ReduceAggregateChildOutputRows;
 import org.apache.doris.nereids.rules.rewrite.ReorderJoin;
 import org.apache.doris.nereids.rules.rewrite.RewriteCteChildren;
+import org.apache.doris.nereids.rules.rewrite.SimplifyWindowExpression;
 import org.apache.doris.nereids.rules.rewrite.SplitLimit;
 import org.apache.doris.nereids.rules.rewrite.SumLiteralRewrite;
 import org.apache.doris.nereids.rules.rewrite.TransposeSemiJoinAgg;
@@ -226,7 +227,8 @@ public class Rewriter extends AbstractBatchJobExecutor {
             topic("Window analysis",
                     topDown(
                             new ExtractAndNormalizeWindowExpression(),
-                            new CheckAndStandardizeWindowFunctionAndFrame()
+                            new CheckAndStandardizeWindowFunctionAndFrame(),
+                            new SimplifyWindowExpression()
                     )
             ),
             topic("Rewrite join",
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/RuleType.java 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/RuleType.java
index 696463523f6..4da8c36f89e 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/RuleType.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/RuleType.java
@@ -96,6 +96,7 @@ public enum RuleType {
     NORMALIZE_SORT(RuleTypeClass.REWRITE),
     NORMALIZE_REPEAT(RuleTypeClass.REWRITE),
     EXTRACT_AND_NORMALIZE_WINDOW_EXPRESSIONS(RuleTypeClass.REWRITE),
+    SIMPLIFY_WINDOW_EXPRESSION(RuleTypeClass.REWRITE),
     CHECK_AND_STANDARDIZE_WINDOW_FUNCTION_AND_FRAME(RuleTypeClass.REWRITE),
     CHECK_MATCH_EXPRESSION(RuleTypeClass.REWRITE),
     CREATE_PARTITION_TOPN_FOR_WINDOW(RuleTypeClass.REWRITE),
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/SimplifyWindowExpression.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/SimplifyWindowExpression.java
new file mode 100644
index 00000000000..872ca789818
--- /dev/null
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/SimplifyWindowExpression.java
@@ -0,0 +1,123 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.nereids.rules.rewrite;
+
+import org.apache.doris.nereids.annotation.DependsRules;
+import org.apache.doris.nereids.pattern.MatchingContext;
+import org.apache.doris.nereids.rules.Rule;
+import org.apache.doris.nereids.rules.RuleType;
+import org.apache.doris.nereids.trees.expressions.Alias;
+import org.apache.doris.nereids.trees.expressions.Expression;
+import org.apache.doris.nereids.trees.expressions.NamedExpression;
+import org.apache.doris.nereids.trees.expressions.Slot;
+import org.apache.doris.nereids.trees.expressions.WindowExpression;
+import org.apache.doris.nereids.trees.expressions.functions.BoundFunction;
+import org.apache.doris.nereids.trees.expressions.literal.TinyIntLiteral;
+import org.apache.doris.nereids.trees.plans.Plan;
+import org.apache.doris.nereids.trees.plans.logical.LogicalProject;
+import org.apache.doris.nereids.trees.plans.logical.LogicalWindow;
+
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.ImmutableSet;
+import com.google.common.collect.Lists;
+
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+
+/**
+ * rewrite func(para) over (partition by unique_keys)
+ * 1. func() is count(non-null) or rank/dense_rank/row_number -> 1
+ * 2. func(para) is min/max/sum/avg/first_value/last_value -> para
+ * e.g
+ * select max(c1) over(partition by pk) from t1;
+ * -> select c1 from t1;
+ * */
+@DependsRules({
+        ExtractAndNormalizeWindowExpression.class
+})
+public class SimplifyWindowExpression extends OneRewriteRuleFactory {
+    private static final String COUNT = "count";
+    private static final ImmutableSet<String> 
REWRRITE_TO_CONST_WINDOW_FUNCTIONS =
+            ImmutableSet.of("rank", "dense_rank", "row_number");
+    private static final ImmutableSet<String> 
REWRRITE_TO_SLOT_WINDOW_FUNCTIONS =
+            ImmutableSet.of("min", "max", "sum", "avg", "first_value", 
"last_value");
+
+    @Override
+    public Rule build() {
+        return logicalWindow(any()).thenApply(this::simplify)
+                .toRule(RuleType.SIMPLIFY_WINDOW_EXPRESSION);
+    }
+
+    private Plan simplify(MatchingContext<LogicalWindow<Plan>> ctx) {
+        LogicalWindow<Plan> window = ctx.root;
+        ImmutableList.Builder<NamedExpression> projectionsBuilder = 
ImmutableList.builder();
+        ImmutableList.Builder<NamedExpression> remainWindowExpression = 
ImmutableList.builder();
+        List<NamedExpression> windowExpressions = 
window.getWindowExpressions();
+        for (NamedExpression expr : windowExpressions) {
+            Alias alias = (Alias) expr;
+            WindowExpression windowExpression = (WindowExpression) 
alias.child();
+            if (windowExpression.getPartitionKeys().stream().anyMatch((
+                    partitionKey -> 
partitionKey.getDataType().isOnlyMetricType()))) {
+                continue;
+            }
+            // after normalize window, partition key must be slot
+            List<Slot> partitionSlots = (List<Slot>) (List) 
windowExpression.getPartitionKeys();
+            Set<Slot> partitionSlotSet = new HashSet<>(partitionSlots);
+            if 
(!window.getLogicalProperties().getFunctionalDependencies().isUnique(partitionSlotSet))
 {
+                remainWindowExpression.add(expr);
+                continue;
+            }
+            Expression function = windowExpression.getFunction();
+            if (function instanceof BoundFunction) {
+                BoundFunction boundFunction = (BoundFunction) function;
+                String name = ((BoundFunction) function).getName();
+                if ((name.equals(COUNT) && 
boundFunction.child(0).notNullable())
+                        || REWRRITE_TO_CONST_WINDOW_FUNCTIONS.contains(name)) {
+                    projectionsBuilder.add(new Alias(alias.getExprId(), new 
TinyIntLiteral((byte) 1), alias.getName()));
+                } else if (REWRRITE_TO_SLOT_WINDOW_FUNCTIONS.contains(name)) {
+                    projectionsBuilder.add(new Alias(alias.getExprId(), 
boundFunction.child(0), alias.getName()));
+                } else {
+                    remainWindowExpression.add(expr);
+                }
+            } else {
+                remainWindowExpression.add(expr);
+            }
+        }
+        List<NamedExpression> projections = projectionsBuilder.build();
+        List<NamedExpression> remainWindows = remainWindowExpression.build();
+        if (projections.isEmpty()) {
+            return window;
+        } else if (remainWindows.isEmpty()) {
+            Plan windowChild = window.child(0);
+            List<Slot> slots = windowChild.getOutput();
+            List<NamedExpression> finalProjections = 
Lists.newArrayList(projections);
+            finalProjections.addAll(slots);
+            return new LogicalProject(finalProjections, windowChild);
+        } else {
+            List<Slot> windowOutputs = Lists.newArrayList();
+            for (NamedExpression remainWindow : remainWindows) {
+                windowOutputs.add(remainWindow.toSlot());
+            }
+            List<NamedExpression> finalProjections = 
Lists.newArrayList(projections);
+            finalProjections.addAll(windowOutputs);
+            return new LogicalProject(finalProjections, 
window.withExpression(remainWindows,
+                    window.child(0)));
+        }
+    }
+}
diff --git 
a/fe/fe-core/src/test/java/org/apache/doris/nereids/properties/UniqueTest.java 
b/fe/fe-core/src/test/java/org/apache/doris/nereids/properties/UniqueTest.java
index 27d64ad186c..fb57098421b 100644
--- 
a/fe/fe-core/src/test/java/org/apache/doris/nereids/properties/UniqueTest.java
+++ 
b/fe/fe-core/src/test/java/org/apache/doris/nereids/properties/UniqueTest.java
@@ -363,7 +363,7 @@ class UniqueTest extends TestWithFeService {
     void testWindow() {
         // partition by uniform
         Plan plan = PlanChecker.from(connectContext)
-                .analyze("select id, row_number() over(partition by id) from 
agg where id =1")
+                .analyze("select id, row_number() over(partition by name) from 
agg where name ='d'")
                 .rewrite()
                 .getPlan();
         Assertions.assertTrue(plan.getLogicalProperties()
diff --git 
a/regression-test/data/nereids_rules_p0/simplify_window_expression/simplify_window_expression.out
 
b/regression-test/data/nereids_rules_p0/simplify_window_expression/simplify_window_expression.out
new file mode 100644
index 00000000000..3befc3dcbb2
--- /dev/null
+++ 
b/regression-test/data/nereids_rules_p0/simplify_window_expression/simplify_window_expression.out
@@ -0,0 +1,254 @@
+-- This file is automatically generated. You should know what you did if you 
want to edit this
+-- !select_count_col --
+\N     0       0
+1      1       1
+1      1       1
+2      1       1
+2      1       1
+2      1       1
+3      1       1
+3      1       1
+4      1       1
+6      1       1
+6      1       1
+
+-- !select_rank --
+\N     1       1
+1      1       1
+1      1       1
+2      1       1
+2      1       1
+2      1       1
+3      1       1
+3      1       1
+4      1       1
+6      1       1
+6      1       1
+
+-- !select_dense_rank --
+\N     1       1
+1      1       1
+1      1       1
+2      1       1
+2      1       1
+2      1       1
+3      1       1
+3      1       1
+4      1       1
+6      1       1
+6      1       1
+
+-- !select_row_number --
+\N     1       1
+1      1       1
+1      1       1
+2      1       1
+2      1       1
+2      1       1
+3      1       1
+3      1       1
+4      1       1
+6      1       1
+6      1       1
+
+-- !select_first_value --
+\N     \N      \N
+1      1       1
+1      1       1
+2      2       2
+2      2       2
+2      2       2
+3      3       3
+3      3       3
+4      4       4
+6      6       6
+6      6       6
+
+-- !select_last_value --
+\N     \N      \N
+1      1       1
+1      1       1
+2      2       2
+2      2       2
+2      2       2
+3      3       3
+3      3       3
+4      4       4
+6      6       6
+6      6       6
+
+-- !select_min --
+\N     \N      \N
+\N     \N      \N
+1      1       1
+1      1       1
+2      2       2
+3      3       3
+3      3       3
+4      4       4
+5      5       5
+5      5       5
+7      7       7
+
+-- !select_max --
+\N     \N      \N
+\N     \N      \N
+1      1       1
+1      1       1
+2      2       2
+3      3       3
+3      3       3
+4      4       4
+5      5       5
+5      5       5
+7      7       7
+
+-- !select_sum --
+\N     \N      \N
+1      1       1
+1      1       1
+2      2       2
+2      2       2
+2      2       2
+3      3       3
+3      3       3
+4      4       4
+6      6       6
+6      6       6
+
+-- !select_avg --
+\N     \N      \N
+\N     \N      \N
+1      1       1
+1      1       1
+2      2       2
+3      3       3
+3      3       3
+4      4       4
+5      5       5
+5      5       5
+7      7       7
+
+-- !more_than_pk --
+\N     \N      \N
+\N     \N      \N
+1      1       1
+1      1       1
+2      2       2
+3      3       3
+3      3       3
+4      4       4
+5      5       5
+5      5       5
+7      7       7
+
+-- !select_last_value_shape --
+PhysicalResultSink
+--PhysicalDistribute[DistributionSpecGather]
+----PhysicalProject
+------filter((mal_test_simplify_window.__DORIS_DELETE_SIGN__ = 0))
+--------PhysicalOlapScan[mal_test_simplify_window]
+
+-- !select_min_shape --
+PhysicalResultSink
+--PhysicalDistribute[DistributionSpecGather]
+----PhysicalProject
+------filter((mal_test_simplify_window.__DORIS_DELETE_SIGN__ = 0))
+--------PhysicalOlapScan[mal_test_simplify_window]
+
+-- !more_than_pk_shape --
+PhysicalResultSink
+--PhysicalDistribute[DistributionSpecGather]
+----PhysicalProject
+------filter((mal_test_simplify_window.__DORIS_DELETE_SIGN__ = 0))
+--------PhysicalOlapScan[mal_test_simplify_window]
+
+-- !select_upper_plan_use_all_rewrite --
+\N     \N
+\N     \N
+1      1
+1      1
+2      2
+3      3
+3      3
+4      4
+5      5
+5      5
+7      7
+
+-- !select_upper_plan_use_rewrite_and_not_rewrite --
+\N     \N      \N
+\N     \N      7
+1      1       3
+1      1       4
+2      2       5
+3      3       3
+3      3       4
+4      4       4
+5      5       5
+5      5       5
+7      7       7
+
+-- !select_upper_plan_use_all_not_rewrite --
+\N     \N
+\N     7
+1      3
+1      4
+2      5
+3      3
+3      4
+4      4
+5      5
+5      5
+7      7
+
+-- !select_upper_plan_use_all_rewrite_shape --
+PhysicalResultSink
+--PhysicalQuickSort[MERGE_SORT]
+----PhysicalDistribute[DistributionSpecGather]
+------PhysicalQuickSort[LOCAL_SORT]
+--------PhysicalProject
+----------filter((mal_test_simplify_window.__DORIS_DELETE_SIGN__ = 0))
+------------PhysicalOlapScan[mal_test_simplify_window]
+
+-- !select_upper_plan_use_rewrite_and_not_rewrite_shape --
+PhysicalResultSink
+--PhysicalQuickSort[MERGE_SORT]
+----PhysicalDistribute[DistributionSpecGather]
+------PhysicalQuickSort[LOCAL_SORT]
+--------PhysicalProject
+----------PhysicalWindow
+------------PhysicalQuickSort[LOCAL_SORT]
+--------------PhysicalProject
+----------------filter((mal_test_simplify_window.__DORIS_DELETE_SIGN__ = 0))
+------------------PhysicalOlapScan[mal_test_simplify_window]
+
+-- !select_upper_plan_use_all_not_rewrite_shape --
+PhysicalResultSink
+--PhysicalQuickSort[MERGE_SORT]
+----PhysicalDistribute[DistributionSpecGather]
+------PhysicalQuickSort[LOCAL_SORT]
+--------PhysicalProject
+----------PhysicalWindow
+------------PhysicalQuickSort[LOCAL_SORT]
+--------------PhysicalProject
+----------------filter((mal_test_simplify_window.__DORIS_DELETE_SIGN__ = 0))
+------------------PhysicalOlapScan[mal_test_simplify_window]
+
+-- !window_agg --
+\N     1
+1      1
+2      1
+3      1
+4      1
+6      1
+
+-- !window_agg_shape --
+PhysicalResultSink
+--PhysicalDistribute[DistributionSpecGather]
+----PhysicalProject
+------hashAgg[LOCAL]
+--------PhysicalProject
+----------filter((mal_test_simplify_window.__DORIS_DELETE_SIGN__ = 0))
+------------PhysicalOlapScan[mal_test_simplify_window]
+
diff --git 
a/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query70.out 
b/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query70.out
index 35ca9710459..fdbfa12a4b7 100644
--- a/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query70.out
+++ b/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query70.out
@@ -30,24 +30,18 @@ PhysicalResultSink
 --------------------------------------PhysicalOlapScan[store] apply RFs: RF2
 
------------------------------------PhysicalDistribute[DistributionSpecReplicated]
 --------------------------------------PhysicalProject
-----------------------------------------filter((ranking <= 5))
-------------------------------------------PhysicalWindow
---------------------------------------------PhysicalQuickSort[LOCAL_SORT]
-----------------------------------------------PhysicalPartitionTopN
-------------------------------------------------PhysicalProject
---------------------------------------------------hashAgg[GLOBAL]
-----------------------------------------------------PhysicalDistribute[DistributionSpecHash]
-------------------------------------------------------hashAgg[LOCAL]
---------------------------------------------------------PhysicalProject
-----------------------------------------------------------hashJoin[INNER_JOIN] 
hashCondition=((store.s_store_sk = store_sales.ss_store_sk)) otherCondition=() 
build RFs:RF1 s_store_sk->[ss_store_sk]
-------------------------------------------------------------hashJoin[INNER_JOIN]
 hashCondition=((date_dim.d_date_sk = store_sales.ss_sold_date_sk)) 
otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk]
---------------------------------------------------------------PhysicalProject
-----------------------------------------------------------------PhysicalOlapScan[store_sales]
 apply RFs: RF0 RF1
---------------------------------------------------------------PhysicalDistribute[DistributionSpecReplicated]
-----------------------------------------------------------------PhysicalProject
-------------------------------------------------------------------filter((date_dim.d_month_seq
 <= 1231) and (date_dim.d_month_seq >= 1220))
---------------------------------------------------------------------PhysicalOlapScan[date_dim]
-------------------------------------------------------------PhysicalDistribute[DistributionSpecReplicated]
---------------------------------------------------------------PhysicalProject
-----------------------------------------------------------------PhysicalOlapScan[store]
-
+----------------------------------------hashAgg[GLOBAL]
+------------------------------------------PhysicalDistribute[DistributionSpecHash]
+--------------------------------------------hashAgg[LOCAL]
+----------------------------------------------PhysicalProject
+------------------------------------------------hashJoin[INNER_JOIN] 
hashCondition=((store.s_store_sk = store_sales.ss_store_sk)) otherCondition=() 
build RFs:RF1 s_store_sk->[ss_store_sk]
+--------------------------------------------------hashJoin[INNER_JOIN] 
hashCondition=((date_dim.d_date_sk = store_sales.ss_sold_date_sk)) 
otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk]
+----------------------------------------------------PhysicalProject
+------------------------------------------------------PhysicalOlapScan[store_sales]
 apply RFs: RF0 RF1
+----------------------------------------------------PhysicalDistribute[DistributionSpecReplicated]
+------------------------------------------------------PhysicalProject
+--------------------------------------------------------filter((date_dim.d_month_seq
 <= 1231) and (date_dim.d_month_seq >= 1220))
+----------------------------------------------------------PhysicalOlapScan[date_dim]
+--------------------------------------------------PhysicalDistribute[DistributionSpecReplicated]
+----------------------------------------------------PhysicalProject
+------------------------------------------------------PhysicalOlapScan[store]
diff --git 
a/regression-test/data/nereids_tpcds_shape_sf100_p0/noStatsRfPrune/query70.out 
b/regression-test/data/nereids_tpcds_shape_sf100_p0/noStatsRfPrune/query70.out
index d919942139d..abdcd1b0149 100644
--- 
a/regression-test/data/nereids_tpcds_shape_sf100_p0/noStatsRfPrune/query70.out
+++ 
b/regression-test/data/nereids_tpcds_shape_sf100_p0/noStatsRfPrune/query70.out
@@ -27,27 +27,21 @@ PhysicalResultSink
 --------------------------------PhysicalDistribute[DistributionSpecReplicated]
 ----------------------------------hashJoin[RIGHT_SEMI_JOIN] 
hashCondition=((store.s_state = tmp1.s_state)) otherCondition=()
 ------------------------------------PhysicalProject
---------------------------------------filter((ranking <= 5))
-----------------------------------------PhysicalWindow
-------------------------------------------PhysicalQuickSort[LOCAL_SORT]
---------------------------------------------PhysicalPartitionTopN
-----------------------------------------------PhysicalProject
-------------------------------------------------hashAgg[GLOBAL]
---------------------------------------------------PhysicalDistribute[DistributionSpecHash]
-----------------------------------------------------hashAgg[LOCAL]
-------------------------------------------------------PhysicalProject
---------------------------------------------------------hashJoin[INNER_JOIN] 
hashCondition=((store.s_store_sk = store_sales.ss_store_sk)) otherCondition=()
-----------------------------------------------------------hashJoin[INNER_JOIN] 
hashCondition=((date_dim.d_date_sk = store_sales.ss_sold_date_sk)) 
otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk]
-------------------------------------------------------------PhysicalProject
---------------------------------------------------------------PhysicalOlapScan[store_sales]
 apply RFs: RF0
-------------------------------------------------------------PhysicalDistribute[DistributionSpecReplicated]
---------------------------------------------------------------PhysicalProject
-----------------------------------------------------------------filter((date_dim.d_month_seq
 <= 1224) and (date_dim.d_month_seq >= 1213))
-------------------------------------------------------------------PhysicalOlapScan[date_dim]
-----------------------------------------------------------PhysicalDistribute[DistributionSpecReplicated]
-------------------------------------------------------------PhysicalProject
---------------------------------------------------------------PhysicalOlapScan[store]
+--------------------------------------hashAgg[GLOBAL]
+----------------------------------------PhysicalDistribute[DistributionSpecHash]
+------------------------------------------hashAgg[LOCAL]
+--------------------------------------------PhysicalProject
+----------------------------------------------hashJoin[INNER_JOIN] 
hashCondition=((store.s_store_sk = store_sales.ss_store_sk)) otherCondition=()
+------------------------------------------------hashJoin[INNER_JOIN] 
hashCondition=((date_dim.d_date_sk = store_sales.ss_sold_date_sk)) 
otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk]
+--------------------------------------------------PhysicalProject
+----------------------------------------------------PhysicalOlapScan[store_sales]
 apply RFs: RF0
+--------------------------------------------------PhysicalDistribute[DistributionSpecReplicated]
+----------------------------------------------------PhysicalProject
+------------------------------------------------------filter((date_dim.d_month_seq
 <= 1224) and (date_dim.d_month_seq >= 1213))
+--------------------------------------------------------PhysicalOlapScan[date_dim]
+------------------------------------------------PhysicalDistribute[DistributionSpecReplicated]
+--------------------------------------------------PhysicalProject
+----------------------------------------------------PhysicalOlapScan[store]
 ------------------------------------PhysicalDistribute[DistributionSpecHash]
 --------------------------------------PhysicalProject
 ----------------------------------------PhysicalOlapScan[store]
-
diff --git 
a/regression-test/data/nereids_tpcds_shape_sf100_p0/no_stats_shape/query70.out 
b/regression-test/data/nereids_tpcds_shape_sf100_p0/no_stats_shape/query70.out
index c0383fa20db..a5776a3d5e1 100644
--- 
a/regression-test/data/nereids_tpcds_shape_sf100_p0/no_stats_shape/query70.out
+++ 
b/regression-test/data/nereids_tpcds_shape_sf100_p0/no_stats_shape/query70.out
@@ -27,26 +27,21 @@ PhysicalResultSink
 --------------------------------PhysicalDistribute[DistributionSpecReplicated]
 ----------------------------------hashJoin[RIGHT_SEMI_JOIN] 
hashCondition=((store.s_state = tmp1.s_state)) otherCondition=() build RFs:RF2 
s_state->[s_state]
 ------------------------------------PhysicalProject
---------------------------------------filter((ranking <= 5))
-----------------------------------------PhysicalWindow
-------------------------------------------PhysicalQuickSort[LOCAL_SORT]
---------------------------------------------PhysicalPartitionTopN
-----------------------------------------------PhysicalProject
-------------------------------------------------hashAgg[GLOBAL]
---------------------------------------------------PhysicalDistribute[DistributionSpecHash]
-----------------------------------------------------hashAgg[LOCAL]
-------------------------------------------------------PhysicalProject
---------------------------------------------------------hashJoin[INNER_JOIN] 
hashCondition=((store.s_store_sk = store_sales.ss_store_sk)) otherCondition=() 
build RFs:RF1 s_store_sk->[ss_store_sk]
-----------------------------------------------------------hashJoin[INNER_JOIN] 
hashCondition=((date_dim.d_date_sk = store_sales.ss_sold_date_sk)) 
otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk]
-------------------------------------------------------------PhysicalProject
---------------------------------------------------------------PhysicalOlapScan[store_sales]
 apply RFs: RF0 RF1
-------------------------------------------------------------PhysicalDistribute[DistributionSpecReplicated]
---------------------------------------------------------------PhysicalProject
-----------------------------------------------------------------filter((date_dim.d_month_seq
 <= 1224) and (date_dim.d_month_seq >= 1213))
-------------------------------------------------------------------PhysicalOlapScan[date_dim]
-----------------------------------------------------------PhysicalDistribute[DistributionSpecReplicated]
-------------------------------------------------------------PhysicalProject
---------------------------------------------------------------PhysicalOlapScan[store]
 apply RFs: RF2
+--------------------------------------hashAgg[GLOBAL]
+----------------------------------------PhysicalDistribute[DistributionSpecHash]
+------------------------------------------hashAgg[LOCAL]
+--------------------------------------------PhysicalProject
+----------------------------------------------hashJoin[INNER_JOIN] 
hashCondition=((store.s_store_sk = store_sales.ss_store_sk)) otherCondition=() 
build RFs:RF1 s_store_sk->[ss_store_sk]
+------------------------------------------------hashJoin[INNER_JOIN] 
hashCondition=((date_dim.d_date_sk = store_sales.ss_sold_date_sk)) 
otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk]
+--------------------------------------------------PhysicalProject
+----------------------------------------------------PhysicalOlapScan[store_sales]
 apply RFs: RF0 RF1
+--------------------------------------------------PhysicalDistribute[DistributionSpecReplicated]
+----------------------------------------------------PhysicalProject
+------------------------------------------------------filter((date_dim.d_month_seq
 <= 1224) and (date_dim.d_month_seq >= 1213))
+--------------------------------------------------------PhysicalOlapScan[date_dim]
+------------------------------------------------PhysicalDistribute[DistributionSpecReplicated]
+--------------------------------------------------PhysicalProject
+----------------------------------------------------PhysicalOlapScan[store] 
apply RFs: RF2
 ------------------------------------PhysicalDistribute[DistributionSpecHash]
 --------------------------------------PhysicalProject
 ----------------------------------------PhysicalOlapScan[store]
diff --git 
a/regression-test/data/nereids_tpcds_shape_sf100_p0/rf_prune/query70.out 
b/regression-test/data/nereids_tpcds_shape_sf100_p0/rf_prune/query70.out
index 93610210333..f3e524aabcf 100644
--- a/regression-test/data/nereids_tpcds_shape_sf100_p0/rf_prune/query70.out
+++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/rf_prune/query70.out
@@ -30,24 +30,18 @@ PhysicalResultSink
 --------------------------------------PhysicalOlapScan[store] apply RFs: RF2
 
------------------------------------PhysicalDistribute[DistributionSpecReplicated]
 --------------------------------------PhysicalProject
-----------------------------------------filter((ranking <= 5))
-------------------------------------------PhysicalWindow
---------------------------------------------PhysicalQuickSort[LOCAL_SORT]
-----------------------------------------------PhysicalPartitionTopN
-------------------------------------------------PhysicalProject
---------------------------------------------------hashAgg[GLOBAL]
-----------------------------------------------------PhysicalDistribute[DistributionSpecHash]
-------------------------------------------------------hashAgg[LOCAL]
---------------------------------------------------------PhysicalProject
-----------------------------------------------------------hashJoin[INNER_JOIN] 
hashCondition=((store.s_store_sk = store_sales.ss_store_sk)) otherCondition=()
-------------------------------------------------------------hashJoin[INNER_JOIN]
 hashCondition=((date_dim.d_date_sk = store_sales.ss_sold_date_sk)) 
otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk]
---------------------------------------------------------------PhysicalProject
-----------------------------------------------------------------PhysicalOlapScan[store_sales]
 apply RFs: RF0
---------------------------------------------------------------PhysicalDistribute[DistributionSpecReplicated]
-----------------------------------------------------------------PhysicalProject
-------------------------------------------------------------------filter((date_dim.d_month_seq
 <= 1224) and (date_dim.d_month_seq >= 1213))
---------------------------------------------------------------------PhysicalOlapScan[date_dim]
-------------------------------------------------------------PhysicalDistribute[DistributionSpecReplicated]
---------------------------------------------------------------PhysicalProject
-----------------------------------------------------------------PhysicalOlapScan[store]
-
+----------------------------------------hashAgg[GLOBAL]
+------------------------------------------PhysicalDistribute[DistributionSpecHash]
+--------------------------------------------hashAgg[LOCAL]
+----------------------------------------------PhysicalProject
+------------------------------------------------hashJoin[INNER_JOIN] 
hashCondition=((store.s_store_sk = store_sales.ss_store_sk)) otherCondition=()
+--------------------------------------------------hashJoin[INNER_JOIN] 
hashCondition=((date_dim.d_date_sk = store_sales.ss_sold_date_sk)) 
otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk]
+----------------------------------------------------PhysicalProject
+------------------------------------------------------PhysicalOlapScan[store_sales]
 apply RFs: RF0
+----------------------------------------------------PhysicalDistribute[DistributionSpecReplicated]
+------------------------------------------------------PhysicalProject
+--------------------------------------------------------filter((date_dim.d_month_seq
 <= 1224) and (date_dim.d_month_seq >= 1213))
+----------------------------------------------------------PhysicalOlapScan[date_dim]
+--------------------------------------------------PhysicalDistribute[DistributionSpecReplicated]
+----------------------------------------------------PhysicalProject
+------------------------------------------------------PhysicalOlapScan[store]
diff --git 
a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query70.out 
b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query70.out
index f3e8b1c3a72..38de6f0c4ed 100644
--- a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query70.out
+++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query70.out
@@ -30,24 +30,19 @@ PhysicalResultSink
 --------------------------------------PhysicalOlapScan[store] apply RFs: RF2
 
------------------------------------PhysicalDistribute[DistributionSpecReplicated]
 --------------------------------------PhysicalProject
-----------------------------------------filter((ranking <= 5))
-------------------------------------------PhysicalWindow
---------------------------------------------PhysicalQuickSort[LOCAL_SORT]
-----------------------------------------------PhysicalPartitionTopN
-------------------------------------------------PhysicalProject
---------------------------------------------------hashAgg[GLOBAL]
-----------------------------------------------------PhysicalDistribute[DistributionSpecHash]
-------------------------------------------------------hashAgg[LOCAL]
---------------------------------------------------------PhysicalProject
-----------------------------------------------------------hashJoin[INNER_JOIN] 
hashCondition=((store.s_store_sk = store_sales.ss_store_sk)) otherCondition=() 
build RFs:RF1 s_store_sk->[ss_store_sk]
-------------------------------------------------------------hashJoin[INNER_JOIN]
 hashCondition=((date_dim.d_date_sk = store_sales.ss_sold_date_sk)) 
otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk]
---------------------------------------------------------------PhysicalProject
-----------------------------------------------------------------PhysicalOlapScan[store_sales]
 apply RFs: RF0 RF1
---------------------------------------------------------------PhysicalDistribute[DistributionSpecReplicated]
-----------------------------------------------------------------PhysicalProject
-------------------------------------------------------------------filter((date_dim.d_month_seq
 <= 1224) and (date_dim.d_month_seq >= 1213))
---------------------------------------------------------------------PhysicalOlapScan[date_dim]
-------------------------------------------------------------PhysicalDistribute[DistributionSpecReplicated]
---------------------------------------------------------------PhysicalProject
-----------------------------------------------------------------PhysicalOlapScan[store]
+----------------------------------------hashAgg[GLOBAL]
+------------------------------------------PhysicalDistribute[DistributionSpecHash]
+--------------------------------------------hashAgg[LOCAL]
+----------------------------------------------PhysicalProject
+------------------------------------------------hashJoin[INNER_JOIN] 
hashCondition=((store.s_store_sk = store_sales.ss_store_sk)) otherCondition=() 
build RFs:RF1 s_store_sk->[ss_store_sk]
+--------------------------------------------------hashJoin[INNER_JOIN] 
hashCondition=((date_dim.d_date_sk = store_sales.ss_sold_date_sk)) 
otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk]
+----------------------------------------------------PhysicalProject
+------------------------------------------------------PhysicalOlapScan[store_sales]
 apply RFs: RF0 RF1
+----------------------------------------------------PhysicalDistribute[DistributionSpecReplicated]
+------------------------------------------------------PhysicalProject
+--------------------------------------------------------filter((date_dim.d_month_seq
 <= 1224) and (date_dim.d_month_seq >= 1213))
+----------------------------------------------------------PhysicalOlapScan[date_dim]
+--------------------------------------------------PhysicalDistribute[DistributionSpecReplicated]
+----------------------------------------------------PhysicalProject
+------------------------------------------------------PhysicalOlapScan[store]
 
diff --git 
a/regression-test/suites/nereids_rules_p0/simplify_window_expression/simplify_window_expression.groovy
 
b/regression-test/suites/nereids_rules_p0/simplify_window_expression/simplify_window_expression.groovy
new file mode 100644
index 00000000000..11ad672c74f
--- /dev/null
+++ 
b/regression-test/suites/nereids_rules_p0/simplify_window_expression/simplify_window_expression.groovy
@@ -0,0 +1,110 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("simplify_window_expression") {
+    sql "SET enable_nereids_planner=true"
+    sql "SET enable_fallback_to_original_planner=false"
+    sql """
+          DROP TABLE IF EXISTS mal_test_simplify_window
+         """
+
+    sql """
+         create table mal_test_simplify_window(a int, b int, c int) unique 
key(a,b) distributed by hash(a) buckets 10
+         properties('replication_num' = '1'); 
+         """
+
+    sql """
+         insert into mal_test_simplify_window values(6,null,6),(4,5,6),(1,1,4)
+        ,(6,7,1),(2,1,7),(2,4,2),(2,3,9),(1,3,6),(3,5,8),(3,2,8),(null,null,3);
+      """
+
+    sql "sync"
+
+    qt_select_count_col """
+        select a,count(a) over (partition by a,b) c1, count(a) over (partition 
by a,b order by a) c2 
+        from mal_test_simplify_window order by 1,2,3;"""
+    qt_select_rank """
+        select a,rank() over (partition by a,b) c1, rank() over (partition by 
a,b order by b) c2 
+        from mal_test_simplify_window order by 1,2,3;"""
+    qt_select_dense_rank """
+        select a,dense_rank() over (partition by a,b) c1, dense_rank() over 
(partition by a,b order by a,b) c1
+        from mal_test_simplify_window order by 1,2,3;"""
+    qt_select_row_number """
+        select a,row_number() over (partition by a,b) c1, row_number() over 
(partition by a,b order by 1) c2 
+        from mal_test_simplify_window order by 1,2,3;"""
+    qt_select_first_value """
+        select a,first_value(a) over (partition by a,b) c1, first_value(a) 
over (partition by a,b order by a) c2
+        from mal_test_simplify_window order by 1,2,3;"""
+    qt_select_last_value """
+        select a,last_value(a) over (partition by a,b) c1,last_value(a) over 
(partition by a,b order by b) c2
+        from mal_test_simplify_window order by 1,2,3;"""
+    qt_select_min """
+        select b,min(b) over (partition by a,b) c1, min(b) over (partition by 
a,b order by a,b) c2
+         from mal_test_simplify_window order by 1,2,3;"""
+    qt_select_max """
+        select b,max(b) over (partition by a,b) c1,max(b) over (partition by 
a,b order by a,b) c2
+         from mal_test_simplify_window order by 1,2,3;"""
+    qt_select_sum """
+        select a,sum(a) over (partition by a,b) c1, sum(a) over (partition by 
a,b order by a) c2
+        from mal_test_simplify_window order by 1,2,3;"""
+    qt_select_avg """
+        select b, avg(b) over (partition by a,b) c1, avg(b) over (partition by 
a,b order by b) c2
+        from mal_test_simplify_window order by 1,2,3;"""
+    qt_more_than_pk """
+        select b, avg(b) over (partition by a,b,c) c1, avg(b) over (partition 
by a,b,c order by b) c2
+        from mal_test_simplify_window order by 1,2,3;"""
+
+    qt_select_last_value_shape """explain shape plan 
+        select a,last_value(a) over (partition by a,b) c1,last_value(a) over 
(partition by a,b order by b) c2
+        from mal_test_simplify_window"""
+    qt_select_min_shape """explain shape plan 
+        select b,min(b) over (partition by a,b) c1, min(b) over (partition by 
a,b order by a,b) c2
+         from mal_test_simplify_window"""
+    qt_more_than_pk_shape """
+        explain shape plan
+        select b, avg(b) over (partition by a,b,c) c1, avg(b) over (partition 
by a,b,c order by b) c2
+        from mal_test_simplify_window"""
+
+    qt_select_upper_plan_use_all_rewrite """
+        select b, c1 from (select b,avg(b) over (partition by a,b) c1 
+        from mal_test_simplify_window) t order by 1,2"""
+    qt_select_upper_plan_use_rewrite_and_not_rewrite """
+        select b, c1, c2 from (select b,sum(b) over (partition by a,b) c1, 
max(b) over (partition by a order by a) c2
+        from mal_test_simplify_window) t order by 1,2,3 """
+    qt_select_upper_plan_use_all_not_rewrite """
+        select b, c2 from (select b, max(b) over (partition by a order by a) c2
+        from mal_test_simplify_window) t order by 1,2 """
+    qt_select_upper_plan_use_all_rewrite_shape """
+        explain shape plan select b, c1 from (select b,avg(b) over (partition 
by a,b) c1 
+        from mal_test_simplify_window) t order by 1,2"""
+    qt_select_upper_plan_use_rewrite_and_not_rewrite_shape """
+        explain shape plan select b, c1, c2 from (select b,sum(b) over 
(partition by a,b) c1, max(b) over (partition by a order by a) c2
+        from mal_test_simplify_window) t order by 1,2,3 """
+    qt_select_upper_plan_use_all_not_rewrite_shape """
+        explain shape plan select b, c2 from (select b, max(b) over (partition 
by a order by a) c2
+        from mal_test_simplify_window) t order by 1,2 """
+
+    qt_window_agg """
+        select a, rank() over (partition by a order by sum(b) desc) as ranking
+        from mal_test_simplify_window group by a order by 1,2;
+    """
+    qt_window_agg_shape """
+        explain shape plan
+        select a, rank() over (partition by a order by sum(b) desc) as ranking
+        from mal_test_simplify_window group by a;
+    """
+}
\ No newline at end of file


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org


Reply via email to