This is an automated email from the ASF dual-hosted git repository. morrysnow pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new 2e8a2a69411 [Feat](nereids) add transform rule SimplifyWindowExpression (#33647) 2e8a2a69411 is described below commit 2e8a2a6941153acd9711e3976f88aefc3c4d7c3d Author: feiniaofeiafei <53502832+feiniaofeia...@users.noreply.github.com> AuthorDate: Wed Apr 24 16:11:13 2024 +0800 [Feat](nereids) add transform rule SimplifyWindowExpression (#33647) rewrite func(para) over (partition by unique_keys) 1. func() is count(non-null) or rank/dense_rank/row_number -> 1 2. func(para) is min/max/sum/avg/first_value/last_value -> para e.g select max(c1) over(partition by pk) from t1; -> select c1 from t1; --- .../doris/nereids/jobs/executor/Rewriter.java | 4 +- .../org/apache/doris/nereids/rules/RuleType.java | 1 + .../rules/rewrite/SimplifyWindowExpression.java | 123 ++++++++++ .../doris/nereids/properties/UniqueTest.java | 2 +- .../simplify_window_expression.out | 254 +++++++++++++++++++++ .../shape/query70.out | 36 ++- .../noStatsRfPrune/query70.out | 36 ++- .../no_stats_shape/query70.out | 35 ++- .../rf_prune/query70.out | 36 ++- .../nereids_tpcds_shape_sf100_p0/shape/query70.out | 35 ++- .../simplify_window_expression.groovy | 110 +++++++++ 11 files changed, 567 insertions(+), 105 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Rewriter.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Rewriter.java index 2361c276372..24669f40ff8 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Rewriter.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Rewriter.java @@ -121,6 +121,7 @@ import org.apache.doris.nereids.rules.rewrite.PushProjectThroughUnion; import org.apache.doris.nereids.rules.rewrite.ReduceAggregateChildOutputRows; import org.apache.doris.nereids.rules.rewrite.ReorderJoin; import org.apache.doris.nereids.rules.rewrite.RewriteCteChildren; +import org.apache.doris.nereids.rules.rewrite.SimplifyWindowExpression; import org.apache.doris.nereids.rules.rewrite.SplitLimit; import org.apache.doris.nereids.rules.rewrite.SumLiteralRewrite; import org.apache.doris.nereids.rules.rewrite.TransposeSemiJoinAgg; @@ -226,7 +227,8 @@ public class Rewriter extends AbstractBatchJobExecutor { topic("Window analysis", topDown( new ExtractAndNormalizeWindowExpression(), - new CheckAndStandardizeWindowFunctionAndFrame() + new CheckAndStandardizeWindowFunctionAndFrame(), + new SimplifyWindowExpression() ) ), topic("Rewrite join", diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/RuleType.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/RuleType.java index 696463523f6..4da8c36f89e 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/RuleType.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/RuleType.java @@ -96,6 +96,7 @@ public enum RuleType { NORMALIZE_SORT(RuleTypeClass.REWRITE), NORMALIZE_REPEAT(RuleTypeClass.REWRITE), EXTRACT_AND_NORMALIZE_WINDOW_EXPRESSIONS(RuleTypeClass.REWRITE), + SIMPLIFY_WINDOW_EXPRESSION(RuleTypeClass.REWRITE), CHECK_AND_STANDARDIZE_WINDOW_FUNCTION_AND_FRAME(RuleTypeClass.REWRITE), CHECK_MATCH_EXPRESSION(RuleTypeClass.REWRITE), CREATE_PARTITION_TOPN_FOR_WINDOW(RuleTypeClass.REWRITE), diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/SimplifyWindowExpression.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/SimplifyWindowExpression.java new file mode 100644 index 00000000000..872ca789818 --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/SimplifyWindowExpression.java @@ -0,0 +1,123 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.nereids.rules.rewrite; + +import org.apache.doris.nereids.annotation.DependsRules; +import org.apache.doris.nereids.pattern.MatchingContext; +import org.apache.doris.nereids.rules.Rule; +import org.apache.doris.nereids.rules.RuleType; +import org.apache.doris.nereids.trees.expressions.Alias; +import org.apache.doris.nereids.trees.expressions.Expression; +import org.apache.doris.nereids.trees.expressions.NamedExpression; +import org.apache.doris.nereids.trees.expressions.Slot; +import org.apache.doris.nereids.trees.expressions.WindowExpression; +import org.apache.doris.nereids.trees.expressions.functions.BoundFunction; +import org.apache.doris.nereids.trees.expressions.literal.TinyIntLiteral; +import org.apache.doris.nereids.trees.plans.Plan; +import org.apache.doris.nereids.trees.plans.logical.LogicalProject; +import org.apache.doris.nereids.trees.plans.logical.LogicalWindow; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableSet; +import com.google.common.collect.Lists; + +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +/** + * rewrite func(para) over (partition by unique_keys) + * 1. func() is count(non-null) or rank/dense_rank/row_number -> 1 + * 2. func(para) is min/max/sum/avg/first_value/last_value -> para + * e.g + * select max(c1) over(partition by pk) from t1; + * -> select c1 from t1; + * */ +@DependsRules({ + ExtractAndNormalizeWindowExpression.class +}) +public class SimplifyWindowExpression extends OneRewriteRuleFactory { + private static final String COUNT = "count"; + private static final ImmutableSet<String> REWRRITE_TO_CONST_WINDOW_FUNCTIONS = + ImmutableSet.of("rank", "dense_rank", "row_number"); + private static final ImmutableSet<String> REWRRITE_TO_SLOT_WINDOW_FUNCTIONS = + ImmutableSet.of("min", "max", "sum", "avg", "first_value", "last_value"); + + @Override + public Rule build() { + return logicalWindow(any()).thenApply(this::simplify) + .toRule(RuleType.SIMPLIFY_WINDOW_EXPRESSION); + } + + private Plan simplify(MatchingContext<LogicalWindow<Plan>> ctx) { + LogicalWindow<Plan> window = ctx.root; + ImmutableList.Builder<NamedExpression> projectionsBuilder = ImmutableList.builder(); + ImmutableList.Builder<NamedExpression> remainWindowExpression = ImmutableList.builder(); + List<NamedExpression> windowExpressions = window.getWindowExpressions(); + for (NamedExpression expr : windowExpressions) { + Alias alias = (Alias) expr; + WindowExpression windowExpression = (WindowExpression) alias.child(); + if (windowExpression.getPartitionKeys().stream().anyMatch(( + partitionKey -> partitionKey.getDataType().isOnlyMetricType()))) { + continue; + } + // after normalize window, partition key must be slot + List<Slot> partitionSlots = (List<Slot>) (List) windowExpression.getPartitionKeys(); + Set<Slot> partitionSlotSet = new HashSet<>(partitionSlots); + if (!window.getLogicalProperties().getFunctionalDependencies().isUnique(partitionSlotSet)) { + remainWindowExpression.add(expr); + continue; + } + Expression function = windowExpression.getFunction(); + if (function instanceof BoundFunction) { + BoundFunction boundFunction = (BoundFunction) function; + String name = ((BoundFunction) function).getName(); + if ((name.equals(COUNT) && boundFunction.child(0).notNullable()) + || REWRRITE_TO_CONST_WINDOW_FUNCTIONS.contains(name)) { + projectionsBuilder.add(new Alias(alias.getExprId(), new TinyIntLiteral((byte) 1), alias.getName())); + } else if (REWRRITE_TO_SLOT_WINDOW_FUNCTIONS.contains(name)) { + projectionsBuilder.add(new Alias(alias.getExprId(), boundFunction.child(0), alias.getName())); + } else { + remainWindowExpression.add(expr); + } + } else { + remainWindowExpression.add(expr); + } + } + List<NamedExpression> projections = projectionsBuilder.build(); + List<NamedExpression> remainWindows = remainWindowExpression.build(); + if (projections.isEmpty()) { + return window; + } else if (remainWindows.isEmpty()) { + Plan windowChild = window.child(0); + List<Slot> slots = windowChild.getOutput(); + List<NamedExpression> finalProjections = Lists.newArrayList(projections); + finalProjections.addAll(slots); + return new LogicalProject(finalProjections, windowChild); + } else { + List<Slot> windowOutputs = Lists.newArrayList(); + for (NamedExpression remainWindow : remainWindows) { + windowOutputs.add(remainWindow.toSlot()); + } + List<NamedExpression> finalProjections = Lists.newArrayList(projections); + finalProjections.addAll(windowOutputs); + return new LogicalProject(finalProjections, window.withExpression(remainWindows, + window.child(0))); + } + } +} diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/properties/UniqueTest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/properties/UniqueTest.java index 27d64ad186c..fb57098421b 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/nereids/properties/UniqueTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/properties/UniqueTest.java @@ -363,7 +363,7 @@ class UniqueTest extends TestWithFeService { void testWindow() { // partition by uniform Plan plan = PlanChecker.from(connectContext) - .analyze("select id, row_number() over(partition by id) from agg where id =1") + .analyze("select id, row_number() over(partition by name) from agg where name ='d'") .rewrite() .getPlan(); Assertions.assertTrue(plan.getLogicalProperties() diff --git a/regression-test/data/nereids_rules_p0/simplify_window_expression/simplify_window_expression.out b/regression-test/data/nereids_rules_p0/simplify_window_expression/simplify_window_expression.out new file mode 100644 index 00000000000..3befc3dcbb2 --- /dev/null +++ b/regression-test/data/nereids_rules_p0/simplify_window_expression/simplify_window_expression.out @@ -0,0 +1,254 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !select_count_col -- +\N 0 0 +1 1 1 +1 1 1 +2 1 1 +2 1 1 +2 1 1 +3 1 1 +3 1 1 +4 1 1 +6 1 1 +6 1 1 + +-- !select_rank -- +\N 1 1 +1 1 1 +1 1 1 +2 1 1 +2 1 1 +2 1 1 +3 1 1 +3 1 1 +4 1 1 +6 1 1 +6 1 1 + +-- !select_dense_rank -- +\N 1 1 +1 1 1 +1 1 1 +2 1 1 +2 1 1 +2 1 1 +3 1 1 +3 1 1 +4 1 1 +6 1 1 +6 1 1 + +-- !select_row_number -- +\N 1 1 +1 1 1 +1 1 1 +2 1 1 +2 1 1 +2 1 1 +3 1 1 +3 1 1 +4 1 1 +6 1 1 +6 1 1 + +-- !select_first_value -- +\N \N \N +1 1 1 +1 1 1 +2 2 2 +2 2 2 +2 2 2 +3 3 3 +3 3 3 +4 4 4 +6 6 6 +6 6 6 + +-- !select_last_value -- +\N \N \N +1 1 1 +1 1 1 +2 2 2 +2 2 2 +2 2 2 +3 3 3 +3 3 3 +4 4 4 +6 6 6 +6 6 6 + +-- !select_min -- +\N \N \N +\N \N \N +1 1 1 +1 1 1 +2 2 2 +3 3 3 +3 3 3 +4 4 4 +5 5 5 +5 5 5 +7 7 7 + +-- !select_max -- +\N \N \N +\N \N \N +1 1 1 +1 1 1 +2 2 2 +3 3 3 +3 3 3 +4 4 4 +5 5 5 +5 5 5 +7 7 7 + +-- !select_sum -- +\N \N \N +1 1 1 +1 1 1 +2 2 2 +2 2 2 +2 2 2 +3 3 3 +3 3 3 +4 4 4 +6 6 6 +6 6 6 + +-- !select_avg -- +\N \N \N +\N \N \N +1 1 1 +1 1 1 +2 2 2 +3 3 3 +3 3 3 +4 4 4 +5 5 5 +5 5 5 +7 7 7 + +-- !more_than_pk -- +\N \N \N +\N \N \N +1 1 1 +1 1 1 +2 2 2 +3 3 3 +3 3 3 +4 4 4 +5 5 5 +5 5 5 +7 7 7 + +-- !select_last_value_shape -- +PhysicalResultSink +--PhysicalDistribute[DistributionSpecGather] +----PhysicalProject +------filter((mal_test_simplify_window.__DORIS_DELETE_SIGN__ = 0)) +--------PhysicalOlapScan[mal_test_simplify_window] + +-- !select_min_shape -- +PhysicalResultSink +--PhysicalDistribute[DistributionSpecGather] +----PhysicalProject +------filter((mal_test_simplify_window.__DORIS_DELETE_SIGN__ = 0)) +--------PhysicalOlapScan[mal_test_simplify_window] + +-- !more_than_pk_shape -- +PhysicalResultSink +--PhysicalDistribute[DistributionSpecGather] +----PhysicalProject +------filter((mal_test_simplify_window.__DORIS_DELETE_SIGN__ = 0)) +--------PhysicalOlapScan[mal_test_simplify_window] + +-- !select_upper_plan_use_all_rewrite -- +\N \N +\N \N +1 1 +1 1 +2 2 +3 3 +3 3 +4 4 +5 5 +5 5 +7 7 + +-- !select_upper_plan_use_rewrite_and_not_rewrite -- +\N \N \N +\N \N 7 +1 1 3 +1 1 4 +2 2 5 +3 3 3 +3 3 4 +4 4 4 +5 5 5 +5 5 5 +7 7 7 + +-- !select_upper_plan_use_all_not_rewrite -- +\N \N +\N 7 +1 3 +1 4 +2 5 +3 3 +3 4 +4 4 +5 5 +5 5 +7 7 + +-- !select_upper_plan_use_all_rewrite_shape -- +PhysicalResultSink +--PhysicalQuickSort[MERGE_SORT] +----PhysicalDistribute[DistributionSpecGather] +------PhysicalQuickSort[LOCAL_SORT] +--------PhysicalProject +----------filter((mal_test_simplify_window.__DORIS_DELETE_SIGN__ = 0)) +------------PhysicalOlapScan[mal_test_simplify_window] + +-- !select_upper_plan_use_rewrite_and_not_rewrite_shape -- +PhysicalResultSink +--PhysicalQuickSort[MERGE_SORT] +----PhysicalDistribute[DistributionSpecGather] +------PhysicalQuickSort[LOCAL_SORT] +--------PhysicalProject +----------PhysicalWindow +------------PhysicalQuickSort[LOCAL_SORT] +--------------PhysicalProject +----------------filter((mal_test_simplify_window.__DORIS_DELETE_SIGN__ = 0)) +------------------PhysicalOlapScan[mal_test_simplify_window] + +-- !select_upper_plan_use_all_not_rewrite_shape -- +PhysicalResultSink +--PhysicalQuickSort[MERGE_SORT] +----PhysicalDistribute[DistributionSpecGather] +------PhysicalQuickSort[LOCAL_SORT] +--------PhysicalProject +----------PhysicalWindow +------------PhysicalQuickSort[LOCAL_SORT] +--------------PhysicalProject +----------------filter((mal_test_simplify_window.__DORIS_DELETE_SIGN__ = 0)) +------------------PhysicalOlapScan[mal_test_simplify_window] + +-- !window_agg -- +\N 1 +1 1 +2 1 +3 1 +4 1 +6 1 + +-- !window_agg_shape -- +PhysicalResultSink +--PhysicalDistribute[DistributionSpecGather] +----PhysicalProject +------hashAgg[LOCAL] +--------PhysicalProject +----------filter((mal_test_simplify_window.__DORIS_DELETE_SIGN__ = 0)) +------------PhysicalOlapScan[mal_test_simplify_window] + diff --git a/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query70.out b/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query70.out index 35ca9710459..fdbfa12a4b7 100644 --- a/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query70.out +++ b/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query70.out @@ -30,24 +30,18 @@ PhysicalResultSink --------------------------------------PhysicalOlapScan[store] apply RFs: RF2 ------------------------------------PhysicalDistribute[DistributionSpecReplicated] --------------------------------------PhysicalProject -----------------------------------------filter((ranking <= 5)) -------------------------------------------PhysicalWindow ---------------------------------------------PhysicalQuickSort[LOCAL_SORT] -----------------------------------------------PhysicalPartitionTopN -------------------------------------------------PhysicalProject ---------------------------------------------------hashAgg[GLOBAL] -----------------------------------------------------PhysicalDistribute[DistributionSpecHash] -------------------------------------------------------hashAgg[LOCAL] ---------------------------------------------------------PhysicalProject -----------------------------------------------------------hashJoin[INNER_JOIN] hashCondition=((store.s_store_sk = store_sales.ss_store_sk)) otherCondition=() build RFs:RF1 s_store_sk->[ss_store_sk] -------------------------------------------------------------hashJoin[INNER_JOIN] hashCondition=((date_dim.d_date_sk = store_sales.ss_sold_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk] ---------------------------------------------------------------PhysicalProject -----------------------------------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1 ---------------------------------------------------------------PhysicalDistribute[DistributionSpecReplicated] -----------------------------------------------------------------PhysicalProject -------------------------------------------------------------------filter((date_dim.d_month_seq <= 1231) and (date_dim.d_month_seq >= 1220)) ---------------------------------------------------------------------PhysicalOlapScan[date_dim] -------------------------------------------------------------PhysicalDistribute[DistributionSpecReplicated] ---------------------------------------------------------------PhysicalProject -----------------------------------------------------------------PhysicalOlapScan[store] - +----------------------------------------hashAgg[GLOBAL] +------------------------------------------PhysicalDistribute[DistributionSpecHash] +--------------------------------------------hashAgg[LOCAL] +----------------------------------------------PhysicalProject +------------------------------------------------hashJoin[INNER_JOIN] hashCondition=((store.s_store_sk = store_sales.ss_store_sk)) otherCondition=() build RFs:RF1 s_store_sk->[ss_store_sk] +--------------------------------------------------hashJoin[INNER_JOIN] hashCondition=((date_dim.d_date_sk = store_sales.ss_sold_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk] +----------------------------------------------------PhysicalProject +------------------------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1 +----------------------------------------------------PhysicalDistribute[DistributionSpecReplicated] +------------------------------------------------------PhysicalProject +--------------------------------------------------------filter((date_dim.d_month_seq <= 1231) and (date_dim.d_month_seq >= 1220)) +----------------------------------------------------------PhysicalOlapScan[date_dim] +--------------------------------------------------PhysicalDistribute[DistributionSpecReplicated] +----------------------------------------------------PhysicalProject +------------------------------------------------------PhysicalOlapScan[store] diff --git a/regression-test/data/nereids_tpcds_shape_sf100_p0/noStatsRfPrune/query70.out b/regression-test/data/nereids_tpcds_shape_sf100_p0/noStatsRfPrune/query70.out index d919942139d..abdcd1b0149 100644 --- a/regression-test/data/nereids_tpcds_shape_sf100_p0/noStatsRfPrune/query70.out +++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/noStatsRfPrune/query70.out @@ -27,27 +27,21 @@ PhysicalResultSink --------------------------------PhysicalDistribute[DistributionSpecReplicated] ----------------------------------hashJoin[RIGHT_SEMI_JOIN] hashCondition=((store.s_state = tmp1.s_state)) otherCondition=() ------------------------------------PhysicalProject ---------------------------------------filter((ranking <= 5)) -----------------------------------------PhysicalWindow -------------------------------------------PhysicalQuickSort[LOCAL_SORT] ---------------------------------------------PhysicalPartitionTopN -----------------------------------------------PhysicalProject -------------------------------------------------hashAgg[GLOBAL] ---------------------------------------------------PhysicalDistribute[DistributionSpecHash] -----------------------------------------------------hashAgg[LOCAL] -------------------------------------------------------PhysicalProject ---------------------------------------------------------hashJoin[INNER_JOIN] hashCondition=((store.s_store_sk = store_sales.ss_store_sk)) otherCondition=() -----------------------------------------------------------hashJoin[INNER_JOIN] hashCondition=((date_dim.d_date_sk = store_sales.ss_sold_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk] -------------------------------------------------------------PhysicalProject ---------------------------------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 -------------------------------------------------------------PhysicalDistribute[DistributionSpecReplicated] ---------------------------------------------------------------PhysicalProject -----------------------------------------------------------------filter((date_dim.d_month_seq <= 1224) and (date_dim.d_month_seq >= 1213)) -------------------------------------------------------------------PhysicalOlapScan[date_dim] -----------------------------------------------------------PhysicalDistribute[DistributionSpecReplicated] -------------------------------------------------------------PhysicalProject ---------------------------------------------------------------PhysicalOlapScan[store] +--------------------------------------hashAgg[GLOBAL] +----------------------------------------PhysicalDistribute[DistributionSpecHash] +------------------------------------------hashAgg[LOCAL] +--------------------------------------------PhysicalProject +----------------------------------------------hashJoin[INNER_JOIN] hashCondition=((store.s_store_sk = store_sales.ss_store_sk)) otherCondition=() +------------------------------------------------hashJoin[INNER_JOIN] hashCondition=((date_dim.d_date_sk = store_sales.ss_sold_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk] +--------------------------------------------------PhysicalProject +----------------------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 +--------------------------------------------------PhysicalDistribute[DistributionSpecReplicated] +----------------------------------------------------PhysicalProject +------------------------------------------------------filter((date_dim.d_month_seq <= 1224) and (date_dim.d_month_seq >= 1213)) +--------------------------------------------------------PhysicalOlapScan[date_dim] +------------------------------------------------PhysicalDistribute[DistributionSpecReplicated] +--------------------------------------------------PhysicalProject +----------------------------------------------------PhysicalOlapScan[store] ------------------------------------PhysicalDistribute[DistributionSpecHash] --------------------------------------PhysicalProject ----------------------------------------PhysicalOlapScan[store] - diff --git a/regression-test/data/nereids_tpcds_shape_sf100_p0/no_stats_shape/query70.out b/regression-test/data/nereids_tpcds_shape_sf100_p0/no_stats_shape/query70.out index c0383fa20db..a5776a3d5e1 100644 --- a/regression-test/data/nereids_tpcds_shape_sf100_p0/no_stats_shape/query70.out +++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/no_stats_shape/query70.out @@ -27,26 +27,21 @@ PhysicalResultSink --------------------------------PhysicalDistribute[DistributionSpecReplicated] ----------------------------------hashJoin[RIGHT_SEMI_JOIN] hashCondition=((store.s_state = tmp1.s_state)) otherCondition=() build RFs:RF2 s_state->[s_state] ------------------------------------PhysicalProject ---------------------------------------filter((ranking <= 5)) -----------------------------------------PhysicalWindow -------------------------------------------PhysicalQuickSort[LOCAL_SORT] ---------------------------------------------PhysicalPartitionTopN -----------------------------------------------PhysicalProject -------------------------------------------------hashAgg[GLOBAL] ---------------------------------------------------PhysicalDistribute[DistributionSpecHash] -----------------------------------------------------hashAgg[LOCAL] -------------------------------------------------------PhysicalProject ---------------------------------------------------------hashJoin[INNER_JOIN] hashCondition=((store.s_store_sk = store_sales.ss_store_sk)) otherCondition=() build RFs:RF1 s_store_sk->[ss_store_sk] -----------------------------------------------------------hashJoin[INNER_JOIN] hashCondition=((date_dim.d_date_sk = store_sales.ss_sold_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk] -------------------------------------------------------------PhysicalProject ---------------------------------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1 -------------------------------------------------------------PhysicalDistribute[DistributionSpecReplicated] ---------------------------------------------------------------PhysicalProject -----------------------------------------------------------------filter((date_dim.d_month_seq <= 1224) and (date_dim.d_month_seq >= 1213)) -------------------------------------------------------------------PhysicalOlapScan[date_dim] -----------------------------------------------------------PhysicalDistribute[DistributionSpecReplicated] -------------------------------------------------------------PhysicalProject ---------------------------------------------------------------PhysicalOlapScan[store] apply RFs: RF2 +--------------------------------------hashAgg[GLOBAL] +----------------------------------------PhysicalDistribute[DistributionSpecHash] +------------------------------------------hashAgg[LOCAL] +--------------------------------------------PhysicalProject +----------------------------------------------hashJoin[INNER_JOIN] hashCondition=((store.s_store_sk = store_sales.ss_store_sk)) otherCondition=() build RFs:RF1 s_store_sk->[ss_store_sk] +------------------------------------------------hashJoin[INNER_JOIN] hashCondition=((date_dim.d_date_sk = store_sales.ss_sold_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk] +--------------------------------------------------PhysicalProject +----------------------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1 +--------------------------------------------------PhysicalDistribute[DistributionSpecReplicated] +----------------------------------------------------PhysicalProject +------------------------------------------------------filter((date_dim.d_month_seq <= 1224) and (date_dim.d_month_seq >= 1213)) +--------------------------------------------------------PhysicalOlapScan[date_dim] +------------------------------------------------PhysicalDistribute[DistributionSpecReplicated] +--------------------------------------------------PhysicalProject +----------------------------------------------------PhysicalOlapScan[store] apply RFs: RF2 ------------------------------------PhysicalDistribute[DistributionSpecHash] --------------------------------------PhysicalProject ----------------------------------------PhysicalOlapScan[store] diff --git a/regression-test/data/nereids_tpcds_shape_sf100_p0/rf_prune/query70.out b/regression-test/data/nereids_tpcds_shape_sf100_p0/rf_prune/query70.out index 93610210333..f3e524aabcf 100644 --- a/regression-test/data/nereids_tpcds_shape_sf100_p0/rf_prune/query70.out +++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/rf_prune/query70.out @@ -30,24 +30,18 @@ PhysicalResultSink --------------------------------------PhysicalOlapScan[store] apply RFs: RF2 ------------------------------------PhysicalDistribute[DistributionSpecReplicated] --------------------------------------PhysicalProject -----------------------------------------filter((ranking <= 5)) -------------------------------------------PhysicalWindow ---------------------------------------------PhysicalQuickSort[LOCAL_SORT] -----------------------------------------------PhysicalPartitionTopN -------------------------------------------------PhysicalProject ---------------------------------------------------hashAgg[GLOBAL] -----------------------------------------------------PhysicalDistribute[DistributionSpecHash] -------------------------------------------------------hashAgg[LOCAL] ---------------------------------------------------------PhysicalProject -----------------------------------------------------------hashJoin[INNER_JOIN] hashCondition=((store.s_store_sk = store_sales.ss_store_sk)) otherCondition=() -------------------------------------------------------------hashJoin[INNER_JOIN] hashCondition=((date_dim.d_date_sk = store_sales.ss_sold_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk] ---------------------------------------------------------------PhysicalProject -----------------------------------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 ---------------------------------------------------------------PhysicalDistribute[DistributionSpecReplicated] -----------------------------------------------------------------PhysicalProject -------------------------------------------------------------------filter((date_dim.d_month_seq <= 1224) and (date_dim.d_month_seq >= 1213)) ---------------------------------------------------------------------PhysicalOlapScan[date_dim] -------------------------------------------------------------PhysicalDistribute[DistributionSpecReplicated] ---------------------------------------------------------------PhysicalProject -----------------------------------------------------------------PhysicalOlapScan[store] - +----------------------------------------hashAgg[GLOBAL] +------------------------------------------PhysicalDistribute[DistributionSpecHash] +--------------------------------------------hashAgg[LOCAL] +----------------------------------------------PhysicalProject +------------------------------------------------hashJoin[INNER_JOIN] hashCondition=((store.s_store_sk = store_sales.ss_store_sk)) otherCondition=() +--------------------------------------------------hashJoin[INNER_JOIN] hashCondition=((date_dim.d_date_sk = store_sales.ss_sold_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk] +----------------------------------------------------PhysicalProject +------------------------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 +----------------------------------------------------PhysicalDistribute[DistributionSpecReplicated] +------------------------------------------------------PhysicalProject +--------------------------------------------------------filter((date_dim.d_month_seq <= 1224) and (date_dim.d_month_seq >= 1213)) +----------------------------------------------------------PhysicalOlapScan[date_dim] +--------------------------------------------------PhysicalDistribute[DistributionSpecReplicated] +----------------------------------------------------PhysicalProject +------------------------------------------------------PhysicalOlapScan[store] diff --git a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query70.out b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query70.out index f3e8b1c3a72..38de6f0c4ed 100644 --- a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query70.out +++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query70.out @@ -30,24 +30,19 @@ PhysicalResultSink --------------------------------------PhysicalOlapScan[store] apply RFs: RF2 ------------------------------------PhysicalDistribute[DistributionSpecReplicated] --------------------------------------PhysicalProject -----------------------------------------filter((ranking <= 5)) -------------------------------------------PhysicalWindow ---------------------------------------------PhysicalQuickSort[LOCAL_SORT] -----------------------------------------------PhysicalPartitionTopN -------------------------------------------------PhysicalProject ---------------------------------------------------hashAgg[GLOBAL] -----------------------------------------------------PhysicalDistribute[DistributionSpecHash] -------------------------------------------------------hashAgg[LOCAL] ---------------------------------------------------------PhysicalProject -----------------------------------------------------------hashJoin[INNER_JOIN] hashCondition=((store.s_store_sk = store_sales.ss_store_sk)) otherCondition=() build RFs:RF1 s_store_sk->[ss_store_sk] -------------------------------------------------------------hashJoin[INNER_JOIN] hashCondition=((date_dim.d_date_sk = store_sales.ss_sold_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk] ---------------------------------------------------------------PhysicalProject -----------------------------------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1 ---------------------------------------------------------------PhysicalDistribute[DistributionSpecReplicated] -----------------------------------------------------------------PhysicalProject -------------------------------------------------------------------filter((date_dim.d_month_seq <= 1224) and (date_dim.d_month_seq >= 1213)) ---------------------------------------------------------------------PhysicalOlapScan[date_dim] -------------------------------------------------------------PhysicalDistribute[DistributionSpecReplicated] ---------------------------------------------------------------PhysicalProject -----------------------------------------------------------------PhysicalOlapScan[store] +----------------------------------------hashAgg[GLOBAL] +------------------------------------------PhysicalDistribute[DistributionSpecHash] +--------------------------------------------hashAgg[LOCAL] +----------------------------------------------PhysicalProject +------------------------------------------------hashJoin[INNER_JOIN] hashCondition=((store.s_store_sk = store_sales.ss_store_sk)) otherCondition=() build RFs:RF1 s_store_sk->[ss_store_sk] +--------------------------------------------------hashJoin[INNER_JOIN] hashCondition=((date_dim.d_date_sk = store_sales.ss_sold_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk] +----------------------------------------------------PhysicalProject +------------------------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1 +----------------------------------------------------PhysicalDistribute[DistributionSpecReplicated] +------------------------------------------------------PhysicalProject +--------------------------------------------------------filter((date_dim.d_month_seq <= 1224) and (date_dim.d_month_seq >= 1213)) +----------------------------------------------------------PhysicalOlapScan[date_dim] +--------------------------------------------------PhysicalDistribute[DistributionSpecReplicated] +----------------------------------------------------PhysicalProject +------------------------------------------------------PhysicalOlapScan[store] diff --git a/regression-test/suites/nereids_rules_p0/simplify_window_expression/simplify_window_expression.groovy b/regression-test/suites/nereids_rules_p0/simplify_window_expression/simplify_window_expression.groovy new file mode 100644 index 00000000000..11ad672c74f --- /dev/null +++ b/regression-test/suites/nereids_rules_p0/simplify_window_expression/simplify_window_expression.groovy @@ -0,0 +1,110 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("simplify_window_expression") { + sql "SET enable_nereids_planner=true" + sql "SET enable_fallback_to_original_planner=false" + sql """ + DROP TABLE IF EXISTS mal_test_simplify_window + """ + + sql """ + create table mal_test_simplify_window(a int, b int, c int) unique key(a,b) distributed by hash(a) buckets 10 + properties('replication_num' = '1'); + """ + + sql """ + insert into mal_test_simplify_window values(6,null,6),(4,5,6),(1,1,4) + ,(6,7,1),(2,1,7),(2,4,2),(2,3,9),(1,3,6),(3,5,8),(3,2,8),(null,null,3); + """ + + sql "sync" + + qt_select_count_col """ + select a,count(a) over (partition by a,b) c1, count(a) over (partition by a,b order by a) c2 + from mal_test_simplify_window order by 1,2,3;""" + qt_select_rank """ + select a,rank() over (partition by a,b) c1, rank() over (partition by a,b order by b) c2 + from mal_test_simplify_window order by 1,2,3;""" + qt_select_dense_rank """ + select a,dense_rank() over (partition by a,b) c1, dense_rank() over (partition by a,b order by a,b) c1 + from mal_test_simplify_window order by 1,2,3;""" + qt_select_row_number """ + select a,row_number() over (partition by a,b) c1, row_number() over (partition by a,b order by 1) c2 + from mal_test_simplify_window order by 1,2,3;""" + qt_select_first_value """ + select a,first_value(a) over (partition by a,b) c1, first_value(a) over (partition by a,b order by a) c2 + from mal_test_simplify_window order by 1,2,3;""" + qt_select_last_value """ + select a,last_value(a) over (partition by a,b) c1,last_value(a) over (partition by a,b order by b) c2 + from mal_test_simplify_window order by 1,2,3;""" + qt_select_min """ + select b,min(b) over (partition by a,b) c1, min(b) over (partition by a,b order by a,b) c2 + from mal_test_simplify_window order by 1,2,3;""" + qt_select_max """ + select b,max(b) over (partition by a,b) c1,max(b) over (partition by a,b order by a,b) c2 + from mal_test_simplify_window order by 1,2,3;""" + qt_select_sum """ + select a,sum(a) over (partition by a,b) c1, sum(a) over (partition by a,b order by a) c2 + from mal_test_simplify_window order by 1,2,3;""" + qt_select_avg """ + select b, avg(b) over (partition by a,b) c1, avg(b) over (partition by a,b order by b) c2 + from mal_test_simplify_window order by 1,2,3;""" + qt_more_than_pk """ + select b, avg(b) over (partition by a,b,c) c1, avg(b) over (partition by a,b,c order by b) c2 + from mal_test_simplify_window order by 1,2,3;""" + + qt_select_last_value_shape """explain shape plan + select a,last_value(a) over (partition by a,b) c1,last_value(a) over (partition by a,b order by b) c2 + from mal_test_simplify_window""" + qt_select_min_shape """explain shape plan + select b,min(b) over (partition by a,b) c1, min(b) over (partition by a,b order by a,b) c2 + from mal_test_simplify_window""" + qt_more_than_pk_shape """ + explain shape plan + select b, avg(b) over (partition by a,b,c) c1, avg(b) over (partition by a,b,c order by b) c2 + from mal_test_simplify_window""" + + qt_select_upper_plan_use_all_rewrite """ + select b, c1 from (select b,avg(b) over (partition by a,b) c1 + from mal_test_simplify_window) t order by 1,2""" + qt_select_upper_plan_use_rewrite_and_not_rewrite """ + select b, c1, c2 from (select b,sum(b) over (partition by a,b) c1, max(b) over (partition by a order by a) c2 + from mal_test_simplify_window) t order by 1,2,3 """ + qt_select_upper_plan_use_all_not_rewrite """ + select b, c2 from (select b, max(b) over (partition by a order by a) c2 + from mal_test_simplify_window) t order by 1,2 """ + qt_select_upper_plan_use_all_rewrite_shape """ + explain shape plan select b, c1 from (select b,avg(b) over (partition by a,b) c1 + from mal_test_simplify_window) t order by 1,2""" + qt_select_upper_plan_use_rewrite_and_not_rewrite_shape """ + explain shape plan select b, c1, c2 from (select b,sum(b) over (partition by a,b) c1, max(b) over (partition by a order by a) c2 + from mal_test_simplify_window) t order by 1,2,3 """ + qt_select_upper_plan_use_all_not_rewrite_shape """ + explain shape plan select b, c2 from (select b, max(b) over (partition by a order by a) c2 + from mal_test_simplify_window) t order by 1,2 """ + + qt_window_agg """ + select a, rank() over (partition by a order by sum(b) desc) as ranking + from mal_test_simplify_window group by a order by 1,2; + """ + qt_window_agg_shape """ + explain shape plan + select a, rank() over (partition by a order by sum(b) desc) as ranking + from mal_test_simplify_window group by a; + """ +} \ No newline at end of file --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org