This is an automated email from the ASF dual-hosted git repository. starocean999 pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new deb4c285757 [improvement](mtmv) Not roll up when aggregate rewrite if roll up group by expr is uniform (#38387) deb4c285757 is described below commit deb4c285757f1b780864e0cf6865d376b1d3b7d1 Author: seawinde <149132972+seawi...@users.noreply.github.com> AuthorDate: Mon Aug 5 11:15:21 2024 +0800 [improvement](mtmv) Not roll up when aggregate rewrite if roll up group by expr is uniform (#38387) ## Proposed changes Not roll up when aggregate rewrite if roll up group by expr is uniform Such as mv name is mv3_0, and def is: ```sql CREATE MATERIALIZED VIEW mv3_0 BUILD IMMEDIATE REFRESH COMPLETE ON MANUAL DISTRIBUTED BY RANDOM BUCKETS 2 PROPERTIES ('replication_num' = '1') AS select o_orderdate, o_shippriority, o_comment, sum(o_totalprice) as sum_total, max(o_totalprice) as max_total, min(o_totalprice) as min_total, count(*) as count_all from orders group by o_orderdate, o_shippriority, o_comment; ``` query sql is as following: ```sql select o_comment, sum(o_totalprice), max(o_totalprice), min(o_totalprice), count(*) from orders where o_orderdate = '2023-12-09' and o_shippriority = 1 group by o_comment; ``` after rewrite the plan is as following, not need to add aggregate ``` PhysicalResultSink --filter((mv3_0.o_orderdate = '2023-12-09') and (mv3_0.o_shippriority = 1)) ----PhysicalOlapScan[mv3_0] ``` --- .../mv/AbstractMaterializedViewAggregateRule.java | 129 +++++- .../agg_optimize_when_uniform.out | 125 ++++++ .../agg_optimize_when_uniform.groovy | 485 +++++++++++++++++++++ .../agg_with_roll_up/aggregate_with_roll_up.groovy | 30 +- .../aggregate_without_roll_up.groovy | 94 ++-- 5 files changed, 774 insertions(+), 89 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/AbstractMaterializedViewAggregateRule.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/AbstractMaterializedViewAggregateRule.java index 186f39ee35a..0a1c633cb34 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/AbstractMaterializedViewAggregateRule.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/AbstractMaterializedViewAggregateRule.java @@ -20,6 +20,7 @@ package org.apache.doris.nereids.rules.exploration.mv; import org.apache.doris.common.Pair; import org.apache.doris.nereids.CascadesContext; import org.apache.doris.nereids.jobs.executor.Rewriter; +import org.apache.doris.nereids.properties.DataTrait; import org.apache.doris.nereids.rules.analysis.NormalizeRepeat; import org.apache.doris.nereids.rules.exploration.mv.AbstractMaterializedViewAggregateRule.AggregateExpressionRewriteContext.ExpressionRewriteMode; import org.apache.doris.nereids.rules.exploration.mv.StructInfo.PlanCheckContext; @@ -45,6 +46,8 @@ import org.apache.doris.nereids.trees.expressions.visitor.DefaultExpressionRewri import org.apache.doris.nereids.trees.plans.Plan; import org.apache.doris.nereids.trees.plans.algebra.Repeat; import org.apache.doris.nereids.trees.plans.logical.LogicalAggregate; +import org.apache.doris.nereids.trees.plans.logical.LogicalFilter; +import org.apache.doris.nereids.trees.plans.logical.LogicalPlan; import org.apache.doris.nereids.trees.plans.logical.LogicalProject; import org.apache.doris.nereids.trees.plans.logical.LogicalRepeat; import org.apache.doris.nereids.trees.plans.visitor.ExpressionLineageReplacer; @@ -113,7 +116,7 @@ public abstract class AbstractMaterializedViewAggregateRule extends AbstractMate boolean queryContainsGroupSets = queryAggregate.getSourceRepeat().isPresent(); // If group by expression between query and view is equals, try to rewrite expression directly if (!queryContainsGroupSets && isGroupByEquals(queryTopPlanAndAggPair, viewTopPlanAndAggPair, - viewToQuerySlotMapping, queryStructInfo, viewStructInfo, materializationContext, + viewToQuerySlotMapping, queryStructInfo, viewStructInfo, tempRewritedPlan, materializationContext, cascadesContext)) { List<Expression> rewrittenQueryExpressions = rewriteExpression(queryTopPlan.getOutput(), queryTopPlan, @@ -324,18 +327,21 @@ public abstract class AbstractMaterializedViewAggregateRule extends AbstractMate SlotMapping viewToQuerySlotMapping, StructInfo queryStructInfo, StructInfo viewStructInfo, + Plan tempRewrittenPlan, MaterializationContext materializationContext, CascadesContext cascadesContext) { + + if (materializationContext instanceof SyncMaterializationContext) { + // For data correctness, should always add aggregate node if rewritten by sync materialized view + return false; + } Plan queryTopPlan = queryTopPlanAndAggPair.key(); Plan viewTopPlan = viewTopPlanAndAggPair.key(); LogicalAggregate<Plan> queryAggregate = queryTopPlanAndAggPair.value(); LogicalAggregate<Plan> viewAggregate = viewTopPlanAndAggPair.value(); - Set<Expression> queryGroupShuttledExpression = new HashSet<>(); - for (Expression queryExpression : ExpressionUtils.shuttleExpressionWithLineage( - queryAggregate.getGroupByExpressions(), queryTopPlan, queryStructInfo.getTableBitSet())) { - queryGroupShuttledExpression.add(queryExpression); - } + Set<Expression> queryGroupShuttledExpression = new HashSet<>(ExpressionUtils.shuttleExpressionWithLineage( + queryAggregate.getGroupByExpressions(), queryTopPlan, queryStructInfo.getTableBitSet())); // try to eliminate group by dimension by function dependency if group by expression is not in query Map<Expression, Expression> viewShuttledExpressionQueryBasedToGroupByExpressionMap = new HashMap<>(); @@ -355,22 +361,112 @@ public abstract class AbstractMaterializedViewAggregateRule extends AbstractMate viewGroupExpressionQueryBased ); } - if (queryGroupShuttledExpression.equals(viewShuttledExpressionQueryBasedToGroupByExpressionMap.values())) { + if (queryGroupShuttledExpression.equals(viewShuttledExpressionQueryBasedToGroupByExpressionMap.keySet())) { // return true, if equals directly return true; } + + boolean isGroupByEquals = false; + // check is equals by group by eliminate + isGroupByEquals |= isGroupByEqualsAfterGroupByEliminate(queryGroupShuttledExpression, + viewShuttledExpressionQueryBasedToGroupByExpressionMap, + groupByExpressionToViewShuttledExpressionQueryBasedMap, + viewAggregate, + cascadesContext); + // check is equals by equal filter eliminate + Optional<LogicalFilter<Plan>> filterOptional = tempRewrittenPlan.collectFirst(LogicalFilter.class::isInstance); + if (!filterOptional.isPresent()) { + return isGroupByEquals; + } + isGroupByEquals |= isGroupByEqualsAfterEqualFilterEliminate( + (LogicalPlan) tempRewrittenPlan, + queryGroupShuttledExpression, + viewShuttledExpressionQueryBasedToGroupByExpressionMap, + materializationContext); + return isGroupByEquals; + } + + /** + * Check group by is equals by equal filter eliminate + * For example query is select a, b, c from t1 where a = 1 and d = 'xx' group by a, b, c; + * mv is select a, b, c, d from t1 group by a, b, c, d; + * the group by expression between query and view is equals after equal filter eliminate + * should not aggregate roll up + * */ + private static boolean isGroupByEqualsAfterEqualFilterEliminate( + LogicalPlan tempRewrittenPlan, + Set<Expression> queryGroupShuttledExpression, + Map<Expression, Expression> viewShuttledExprQueryBasedToViewGroupByExprMap, + MaterializationContext materializationContext) { + + Map<Expression, Expression> viewShuttledExprToScanExprMapping = + materializationContext.getShuttledExprToScanExprMapping().flattenMap().get(0); + Set<Expression> viewShuttledExprQueryBasedSet = viewShuttledExprQueryBasedToViewGroupByExprMap.keySet(); + // view group by expr can not cover query group by expr + if (!viewShuttledExprQueryBasedSet.containsAll(queryGroupShuttledExpression)) { + return false; + } + Set<Expression> viewShouldUniformExpressionSet = new HashSet<>(); + // calc the group by expr which is needed to roll up and should be uniform + for (Map.Entry<Expression, Expression> expressionEntry : + viewShuttledExprQueryBasedToViewGroupByExprMap.entrySet()) { + if (queryGroupShuttledExpression.contains(expressionEntry.getKey())) { + // the group expr which query has, do not require uniform + continue; + } + viewShouldUniformExpressionSet.add(expressionEntry.getValue()); + } + + DataTrait dataTrait = tempRewrittenPlan.computeDataTrait(); + for (Expression shouldUniformExpr : viewShouldUniformExpressionSet) { + Expression viewScanExpression = viewShuttledExprToScanExprMapping.get(shouldUniformExpr); + if (viewScanExpression == null) { + return false; + } + if (!(viewScanExpression instanceof Slot)) { + return false; + } + if (!dataTrait.isUniform((Slot) viewScanExpression)) { + return false; + } + } + return true; + } + + /** + * Check group by is equal or not after group by eliminate by functional dependency + * Such as query group by expression is (l_orderdate#1, l_supperkey#2) + * materialized view is group by expression is (l_orderdate#4, l_supperkey#5, l_partkey#6) + * materialized view expression mapping is + * {l_orderdate#4:l_orderdate#10, l_supperkey#5:l_supperkey#11, l_partkey#6:l_partkey#12} + * 1. viewShuttledExpressionQueryBasedToGroupByExpressionMap + * is {l_orderdate#1:l_orderdate#10, l_supperkey#2:l_supperkey#11} + * groupByExpressionToViewShuttledExpressionQueryBasedMap + * is {l_orderdate#10:l_orderdate#1, l_supperkey#11:l_supperkey#2:} + * 2. construct projects query used by view group expressions + * projects (l_orderdate#10, l_supperkey#11) + * 3. try to eliminate materialized view group expression + * projects (l_orderdate#10, l_supperkey#11) + * viewAggregate + * 4. check the viewAggregate group by expression is equals queryAggregate expression or not + */ + private static boolean isGroupByEqualsAfterGroupByEliminate(Set<Expression> queryGroupShuttledExpression, + Map<Expression, Expression> viewShuttledExpressionQueryBasedToGroupByExpressionMap, + Map<Expression, Expression> groupByExpressionToViewShuttledExpressionQueryBasedMap, + LogicalAggregate<Plan> viewAggregate, + CascadesContext cascadesContext) { List<NamedExpression> projects = new ArrayList<>(); + // construct projects query used by view group expressions for (Expression expression : queryGroupShuttledExpression) { - if (!viewShuttledExpressionQueryBasedToGroupByExpressionMap.containsKey(expression)) { - // query group expression is not in view group by expression + Expression chosenExpression = viewShuttledExpressionQueryBasedToGroupByExpressionMap.get(expression); + if (chosenExpression == null) { return false; } - Expression chosenExpression = viewShuttledExpressionQueryBasedToGroupByExpressionMap.get(expression); projects.add(chosenExpression instanceof NamedExpression ? (NamedExpression) chosenExpression : new Alias(chosenExpression)); } LogicalProject<LogicalAggregate<Plan>> project = new LogicalProject<>(projects, viewAggregate); - // try to eliminate group by expression which is not in query group by expression + // try to eliminate view group by expression which is not in query group by expression Plan rewrittenPlan = MaterializedViewUtils.rewriteByRules(cascadesContext, childContext -> { Rewriter.getCteChildrenRewriter(childContext, @@ -383,20 +479,21 @@ public abstract class AbstractMaterializedViewAggregateRule extends AbstractMate if (!aggreagateOptional.isPresent()) { return false; } + // check result after view group by eliminate by functional dependency List<Expression> viewEliminatedGroupByExpressions = aggreagateOptional.get().getGroupByExpressions(); if (viewEliminatedGroupByExpressions.size() != queryGroupShuttledExpression.size()) { return false; } Set<Expression> viewGroupShuttledExpressionQueryBased = new HashSet<>(); for (Expression viewExpression : aggreagateOptional.get().getGroupByExpressions()) { - if (!groupByExpressionToViewShuttledExpressionQueryBasedMap.containsKey(viewExpression)) { + Expression viewExpressionQueryBased = + groupByExpressionToViewShuttledExpressionQueryBasedMap.get(viewExpression); + if (viewExpressionQueryBased == null) { return false; } - viewGroupShuttledExpressionQueryBased.add( - groupByExpressionToViewShuttledExpressionQueryBasedMap.get(viewExpression)); + viewGroupShuttledExpressionQueryBased.add(viewExpressionQueryBased); } - return materializationContext instanceof SyncMaterializationContext ? false - : queryGroupShuttledExpression.equals(viewGroupShuttledExpressionQueryBased); + return queryGroupShuttledExpression.equals(viewGroupShuttledExpressionQueryBased); } /** diff --git a/regression-test/data/nereids_rules_p0/mv/agg_optimize_when_uniform/agg_optimize_when_uniform.out b/regression-test/data/nereids_rules_p0/mv/agg_optimize_when_uniform/agg_optimize_when_uniform.out new file mode 100644 index 00000000000..298d8191964 --- /dev/null +++ b/regression-test/data/nereids_rules_p0/mv/agg_optimize_when_uniform/agg_optimize_when_uniform.out @@ -0,0 +1,125 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !query1_0_before -- +2023-12-10 46.00 33.50 2 0 + +-- !shape1_0_after -- +PhysicalResultSink +--hashAgg[GLOBAL] +----hashAgg[LOCAL] +------filter((mv1_0.o_custkey = 1) and (mv1_0.o_orderkey = 3)) +--------PhysicalOlapScan[mv1_0] + +-- !query1_0_after -- +2023-12-10 46.00 33.50 2 0 + +-- !query2_0_before -- +2 2 2 1.0 1.0 1 1 + +-- !shape2_0_after -- +PhysicalResultSink +--hashAgg[DISTINCT_LOCAL] +----hashAgg[GLOBAL] +------hashAgg[LOCAL] +--------filter((mv2_0.o_orderkey = 1) and (mv2_0.o_orderstatus = 'o')) +----------PhysicalOlapScan[mv2_0] + +-- !query2_0_after -- +2 2 2 1.0 1.0 1 1 + +-- !query3_0_before -- +yy 11.50 11.50 11.50 1 + +-- !shape3_0_after -- +PhysicalResultSink +--filter((mv3_0.o_orderdate = '2023-12-09') and (mv3_0.o_shippriority = 1)) +----PhysicalOlapScan[mv3_0] + +-- !query3_0_after -- +yy 11.50 11.50 11.50 1 + +-- !query3_1_before -- +mi 56.20 56.20 56.20 1 + +-- !shape3_1_after -- +PhysicalResultSink +--hashAgg[GLOBAL] +----hashAgg[LOCAL] +------filter((orders.o_orderdate = '2023-12-12') and (orders.o_shippriority = 2) and (orders.o_totalprice = 56.20)) +--------PhysicalOlapScan[orders] + +-- !query3_1_after -- +mi 56.20 56.20 56.20 1 + +-- !query4_0_before -- +yy 11.50 11.50 11.50 1 + +-- !query4_0_after -- +yy 11.50 11.50 11.50 1 + +-- !query5_0_before -- +3 2023-12-12 57.40 56.20 2 0 + +-- !shape5_0_after -- +PhysicalResultSink +--hashAgg[GLOBAL] +----hashAgg[LOCAL] +------filter((mv5_0.l_partkey = 2) and (mv5_0.l_shipdate = '2023-12-12')) +--------PhysicalOlapScan[mv5_0] + +-- !query5_0_after -- +3 2023-12-12 57.40 56.20 2 0 + +-- !query6_0_before -- +2 2 2 2 1.0 1.0 1 1 + +-- !shape6_0_after -- +PhysicalResultSink +--hashAgg[DISTINCT_LOCAL] +----hashAgg[GLOBAL] +------hashAgg[LOCAL] +--------filter((mv6_0.o_orderkey = 1) and (mv6_0.o_orderstatus = 'o')) +----------PhysicalOlapScan[mv6_0] + +-- !query6_0_after -- +2 2 2 2 1.0 1.0 1 1 + +-- !query7_0_before -- +yy 4 11.50 11.50 11.50 1 + +-- !shape7_0_after -- +PhysicalResultSink +--filter((mv7_0.o_orderdate = '2023-12-09') and (mv7_0.o_shippriority = 1)) +----PhysicalOlapScan[mv7_0] + +-- !query7_0_after -- +yy 4 11.50 11.50 11.50 1 + +-- !query7_1_before -- +yy 4 11.50 11.50 11.50 1 + +-- !shape7_1_after -- +PhysicalResultSink +--hashAgg[GLOBAL] +----hashAgg[LOCAL] +------hashJoin[INNER_JOIN] hashCondition=((t1.l_orderkey = orders.o_orderkey) and (t1.l_shipdate = orders.o_orderdate)) otherCondition=() +--------filter((t1.l_shipdate = '2023-12-09')) +----------PhysicalOlapScan[lineitem] +--------filter((orders.o_orderdate = '2023-12-09') and (orders.o_shippriority = 1) and (orders.o_totalprice = 11.50)) +----------PhysicalOlapScan[orders] + +-- !query7_1_after -- +yy 4 11.50 11.50 11.50 1 + +-- !query8_0_before -- +yy 4 11.50 11.50 11.50 1 + +-- !shape8_0_after -- +PhysicalResultSink +--hashAgg[GLOBAL] +----hashAgg[LOCAL] +------filter((mv8_0.o_orderdate = '2023-12-09')) +--------PhysicalOlapScan[mv8_0] + +-- !query8_0_after -- +yy 4 11.50 11.50 11.50 1 + diff --git a/regression-test/suites/nereids_rules_p0/mv/agg_optimize_when_uniform/agg_optimize_when_uniform.groovy b/regression-test/suites/nereids_rules_p0/mv/agg_optimize_when_uniform/agg_optimize_when_uniform.groovy new file mode 100644 index 00000000000..3c789acca18 --- /dev/null +++ b/regression-test/suites/nereids_rules_p0/mv/agg_optimize_when_uniform/agg_optimize_when_uniform.groovy @@ -0,0 +1,485 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("agg_optimize_when_uniform") { + String db = context.config.getDbNameByFile(context.file) + sql "use ${db}" + sql "set runtime_filter_mode=OFF"; + sql "SET ignore_shape_nodes='PhysicalDistribute,PhysicalProject'" + sql """set enable_agg_state=true""" + + sql """ + drop table if exists orders + """ + + sql """ + CREATE TABLE IF NOT EXISTS orders ( + o_orderkey INTEGER NOT NULL, + o_custkey INTEGER NOT NULL, + o_orderstatus CHAR(1) NOT NULL, + o_totalprice DECIMALV3(15,2) NOT NULL, + o_orderdate DATE NOT NULL, + o_orderpriority CHAR(15) NOT NULL, + o_clerk CHAR(15) NOT NULL, + o_shippriority INTEGER NOT NULL, + O_COMMENT VARCHAR(79) NOT NULL + ) + DUPLICATE KEY(o_orderkey, o_custkey) + PARTITION BY RANGE(o_orderdate) ( + PARTITION `day_2` VALUES LESS THAN ('2023-12-9'), + PARTITION `day_3` VALUES LESS THAN ("2023-12-11"), + PARTITION `day_4` VALUES LESS THAN ("2023-12-30") + ) + DISTRIBUTED BY HASH(o_orderkey) BUCKETS 3 + PROPERTIES ( + "replication_num" = "1" + ); + """ + + sql """ + drop table if exists lineitem + """ + + sql""" + CREATE TABLE IF NOT EXISTS lineitem ( + l_orderkey INTEGER NOT NULL, + l_partkey INTEGER NOT NULL, + l_suppkey INTEGER NOT NULL, + l_linenumber INTEGER NOT NULL, + l_quantity DECIMALV3(15,2) NOT NULL, + l_extendedprice DECIMALV3(15,2) NOT NULL, + l_discount DECIMALV3(15,2) NOT NULL, + l_tax DECIMALV3(15,2) NOT NULL, + l_returnflag CHAR(1) NOT NULL, + l_linestatus CHAR(1) NOT NULL, + l_shipdate DATE NOT NULL, + l_commitdate DATE NOT NULL, + l_receiptdate DATE NOT NULL, + l_shipinstruct CHAR(25) NOT NULL, + l_shipmode CHAR(10) NOT NULL, + l_comment VARCHAR(44) NOT NULL + ) + DUPLICATE KEY(l_orderkey, l_partkey, l_suppkey, l_linenumber) + PARTITION BY RANGE(l_shipdate) ( + PARTITION `day_1` VALUES LESS THAN ('2023-12-9'), + PARTITION `day_2` VALUES LESS THAN ("2023-12-11"), + PARTITION `day_3` VALUES LESS THAN ("2023-12-30")) + DISTRIBUTED BY HASH(l_orderkey) BUCKETS 3 + PROPERTIES ( + "replication_num" = "1" + ) + """ + + sql """ + drop table if exists partsupp + """ + + sql """ + CREATE TABLE IF NOT EXISTS partsupp ( + ps_partkey INTEGER NOT NULL, + ps_suppkey INTEGER NOT NULL, + ps_availqty INTEGER NOT NULL, + ps_supplycost DECIMALV3(15,2) NOT NULL, + ps_comment VARCHAR(199) NOT NULL + ) + DUPLICATE KEY(ps_partkey, ps_suppkey) + DISTRIBUTED BY HASH(ps_partkey) BUCKETS 3 + PROPERTIES ( + "replication_num" = "1" + ) + """ + + sql """ insert into lineitem values + (1, 2, 3, 4, 5.5, 6.5, 7.5, 8.5, 'o', 'k', '2023-12-08', '2023-12-09', '2023-12-10', 'a', 'b', 'yyyyyyyyy'), + (2, 4, 3, 4, 5.5, 6.5, 7.5, 8.5, 'o', 'k', '2023-12-09', '2023-12-09', '2023-12-10', 'a', 'b', 'yyyyyyyyy'), + (3, 2, 4, 4, 5.5, 6.5, 7.5, 8.5, 'o', 'k', '2023-12-10', '2023-12-09', '2023-12-10', 'a', 'b', 'yyyyyyyyy'), + (4, 3, 3, 4, 5.5, 6.5, 7.5, 8.5, 'o', 'k', '2023-12-11', '2023-12-09', '2023-12-10', 'a', 'b', 'yyyyyyyyy'), + (5, 2, 3, 6, 7.5, 8.5, 9.5, 10.5, 'k', 'o', '2023-12-12', '2023-12-12', '2023-12-13', 'c', 'd', 'xxxxxxxxx'); + """ + + sql """ + insert into orders values + (1, 1, 'o', 9.5, '2023-12-08', 'a', 'b', 1, 'yy'), + (1, 1, 'o', 10.5, '2023-12-08', 'a', 'b', 1, 'yy'), + (2, 1, 'o', 11.5, '2023-12-09', 'a', 'b', 1, 'yy'), + (3, 1, 'o', 12.5, '2023-12-10', 'a', 'b', 1, 'yy'), + (3, 1, 'o', 33.5, '2023-12-10', 'a', 'b', 1, 'yy'), + (4, 2, 'o', 43.2, '2023-12-11', 'c','d',2, 'mm'), + (5, 2, 'o', 56.2, '2023-12-12', 'c','d',2, 'mi'), + (5, 2, 'o', 1.2, '2023-12-12', 'c','d',2, 'mi'); + """ + + sql """ + insert into partsupp values + (2, 3, 9, 10.01, 'supply1'), + (2, 3, 10, 11.01, 'supply2'); + """ + + // single table + // filter cover all roll up dimensions and contains agg function in mapping, combinator handler + def mv1_0 = """ + select o_orderkey, o_custkey, o_orderdate, + sum_union(sum_state(o_totalprice)), + max_union(max_state(o_totalprice)), + count(*) as count_all, + bitmap_union(to_bitmap(case when o_shippriority > 1 and o_orderkey IN (1, 3) then o_custkey else null end)) as bitmap_union_basic + from orders + group by + o_orderkey, + o_custkey, + o_orderdate; + """ + def query1_0 = + """ + select o_orderdate, + sum(o_totalprice), + max(o_totalprice), + count(*), + count(distinct case when o_shippriority > 1 and o_orderkey IN (1, 3) then o_custkey else null end) + from orders + where o_orderkey = 3 and o_custkey = 1 + group by + o_orderdate; + """ + order_qt_query1_0_before "${query1_0}" + check_mv_rewrite_success(db, mv1_0, query1_0, "mv1_0") + qt_shape1_0_after """explain shape plan ${query1_0}""" + order_qt_query1_0_after "${query1_0}" + sql """ DROP MATERIALIZED VIEW IF EXISTS mv1_0""" + + // filter cover all roll up dimensions and contains agg function in distinct handler + def mv2_0 = """ + select + count(o_totalprice), + o_shippriority, + o_orderstatus, + bin(o_orderkey), + o_orderkey + from orders + group by + o_orderkey, + o_orderstatus, + o_shippriority, + bin(o_orderkey); + """ + def query2_0 = + """ + select + count(o_totalprice), + max(distinct o_shippriority + o_orderkey), + min(distinct o_shippriority + o_orderkey), + avg(distinct o_shippriority), + sum(distinct o_shippriority) / count(distinct o_shippriority), + o_shippriority, + bin(o_orderkey) + from orders + where o_orderkey = 1 and o_orderstatus = 'o' + group by + o_shippriority, + bin(o_orderkey); + """ + order_qt_query2_0_before "${query2_0}" + check_mv_rewrite_success(db, mv2_0, query2_0, "mv2_0") + qt_shape2_0_after """explain shape plan ${query2_0}""" + order_qt_query2_0_after "${query2_0}" + sql """ DROP MATERIALIZED VIEW IF EXISTS mv2_0""" + + // filter cover all roll up dimensions and only contains agg function in direct handler + def mv3_0 = """ + select o_orderdate, o_shippriority, o_comment, + sum(o_totalprice) as sum_total, + max(o_totalprice) as max_total, + min(o_totalprice) as min_total, + count(*) as count_all + from orders + group by + o_orderdate, + o_shippriority, + o_comment; + """ + def query3_0 = + """ + select o_comment, + sum(o_totalprice), + max(o_totalprice), + min(o_totalprice), + count(*) + from orders + where o_orderdate = '2023-12-09' and o_shippriority = 1 + group by + o_comment; + """ + order_qt_query3_0_before "${query3_0}" + check_mv_rewrite_success(db, mv3_0, query3_0, "mv3_0") + // query success and doesn't add aggregate + qt_shape3_0_after """explain shape plan ${query3_0}""" + order_qt_query3_0_after "${query3_0}" + sql """ DROP MATERIALIZED VIEW IF EXISTS mv3_0""" + + + def mv3_1 = """ + select o_orderdate, o_shippriority, o_comment, + sum(o_totalprice) as sum_total, + max(o_totalprice) as max_total, + min(o_totalprice) as min_total, + count(*) as count_all + from orders + group by + o_orderdate, + o_shippriority, + o_comment; + """ + def query3_1 = + """ + select o_comment, + sum(o_totalprice), + max(o_totalprice), + min(o_totalprice), + count(*) + from orders + where o_orderdate = '2023-12-12' and o_shippriority = 2 and o_totalprice = 56.2 + group by + o_comment; + """ + order_qt_query3_1_before "${query3_1}" + // query where has a column not in agg output + check_mv_rewrite_fail(db, mv3_1, query3_1, "mv3_1") + qt_shape3_1_after """explain shape plan ${query3_1}""" + order_qt_query3_1_after "${query3_1}" + sql """ DROP MATERIALIZED VIEW IF EXISTS mv3_1""" + + + // filter does not cover all roll up dimensions + def mv4_0 = """ + select o_orderdate, o_shippriority, o_comment, + sum(o_totalprice) as sum_total, + max(o_totalprice) as max_total, + min(o_totalprice) as min_total, + count(*) as count_all + from orders + group by + o_orderdate, + o_shippriority, + o_comment; + """ + def query4_0 = + """ + select o_comment, + sum(o_totalprice), + max(o_totalprice), + min(o_totalprice), + count(*) + from orders + where o_orderdate = '2023-12-09' + group by + o_comment; + """ + order_qt_query4_0_before "${query4_0}" + // query success but add agg + check_mv_rewrite_success_without_check_chosen(db, mv4_0, query4_0, "mv4_0") + order_qt_query4_0_after "${query4_0}" + sql """ DROP MATERIALIZED VIEW IF EXISTS mv4_0""" + + + // multi table + // filter cover all roll up dimensions and contains agg function in mapping, combinator handler + def mv5_0 = """ + select l_shipdate, o_orderdate, l_partkey, l_suppkey, + sum_union(sum_state(o_totalprice)), + max_union(max_state(o_totalprice)), + count(*) as count_all, + bitmap_union(to_bitmap(case when o_shippriority > 1 and o_orderkey IN (1, 3) then o_custkey else null end)) as bitmap_union_basic + from lineitem + left join orders on lineitem.l_orderkey = orders.o_orderkey and l_shipdate = o_orderdate + group by + l_shipdate, + o_orderdate, + l_partkey, + l_suppkey; + """ + def query5_0 = + """ + select t1.l_suppkey, o_orderdate, + sum(o_totalprice), + max(o_totalprice), + count(*), + count(distinct case when o_shippriority > 1 and o_orderkey IN (1, 3) then o_custkey else null end) + from lineitem t1 + left join orders on t1.l_orderkey = orders.o_orderkey and t1.l_shipdate = o_orderdate + where l_partkey = 2 and l_shipdate = '2023-12-12' + group by + o_orderdate, + l_suppkey; + """ + order_qt_query5_0_before "${query5_0}" + check_mv_rewrite_success(db, mv5_0, query5_0, "mv5_0") + qt_shape5_0_after """explain shape plan ${query5_0}""" + order_qt_query5_0_after "${query5_0}" + sql """ DROP MATERIALIZED VIEW IF EXISTS mv5_0""" + + // filter cover all roll up dimensions and contains agg function in distinct handler + def mv6_0 = """ + select + l_partkey, + count(o_totalprice), + o_shippriority, + o_orderstatus, + bin(o_orderkey), + o_orderkey + from lineitem t1 + left join orders on t1.l_orderkey = orders.o_orderkey and t1.l_shipdate = o_orderdate + group by + o_orderkey, + o_orderstatus, + o_shippriority, + l_partkey, + bin(o_orderkey); + """ + def query6_0 = + """ + select + l_partkey, + count(o_totalprice), + max(distinct o_shippriority + o_orderkey), + min(distinct o_shippriority + o_orderkey), + avg(distinct o_shippriority), + sum(distinct o_shippriority) / count(distinct o_shippriority), + o_shippriority, + bin(o_orderkey) + from lineitem t1 + left join orders on t1.l_orderkey = orders.o_orderkey and t1.l_shipdate = o_orderdate + where o_orderkey = 1 and o_orderstatus = 'o' + group by + l_partkey, + o_shippriority, + bin(o_orderkey); + """ + order_qt_query6_0_before "${query6_0}" + check_mv_rewrite_success(db, mv6_0, query6_0, "mv6_0") + qt_shape6_0_after """explain shape plan ${query6_0}""" + order_qt_query6_0_after "${query6_0}" + sql """ DROP MATERIALIZED VIEW IF EXISTS mv6_0""" + + + // filter cover all roll up dimensions and only contains agg function in direct handler + def mv7_0 = """ + select o_orderdate, o_shippriority, o_comment, l_partkey, + sum(o_totalprice) as sum_total, + max(o_totalprice) as max_total, + min(o_totalprice) as min_total, + count(*) as count_all + from lineitem t1 + left join orders on t1.l_orderkey = orders.o_orderkey and t1.l_shipdate = o_orderdate + group by + l_partkey, + o_orderdate, + o_shippriority, + o_comment; + """ + def query7_0 = + """ + select o_comment, l_partkey, + sum(o_totalprice), + max(o_totalprice), + min(o_totalprice), + count(*) + from lineitem t1 + left join orders on t1.l_orderkey = orders.o_orderkey and t1.l_shipdate = o_orderdate + where o_orderdate = '2023-12-09' and o_shippriority = 1 + group by + l_partkey, + o_comment; + """ + order_qt_query7_0_before "${query7_0}" + check_mv_rewrite_success(db, mv7_0, query7_0, "mv7_0") + // query success and doesn't add aggregate + qt_shape7_0_after """explain shape plan ${query7_0}""" + order_qt_query7_0_after "${query7_0}" + sql """ DROP MATERIALIZED VIEW IF EXISTS mv7_0""" + + + def mv7_1 = """ + select o_orderdate, o_shippriority, o_comment, l_partkey, + sum(o_totalprice) as sum_total, + max(o_totalprice) as max_total, + min(o_totalprice) as min_total, + count(*) as count_all + from lineitem t1 + left join orders on t1.l_orderkey = orders.o_orderkey and t1.l_shipdate = o_orderdate + group by + l_partkey, + o_orderdate, + o_shippriority, + o_comment; + """ + def query7_1 = + """ + select o_comment, l_partkey, + sum(o_totalprice), + max(o_totalprice), + min(o_totalprice), + count(*) + from lineitem t1 + left join orders on t1.l_orderkey = orders.o_orderkey and t1.l_shipdate = o_orderdate + where o_orderdate = '2023-12-09' and o_shippriority = 1 and o_totalprice = 11.5 + group by + l_partkey, + o_comment; + """ + order_qt_query7_1_before "${query7_1}" + // query where has a column not in agg output + check_mv_rewrite_fail(db, mv7_1, query7_1, "mv7_1") + qt_shape7_1_after """explain shape plan ${query7_1}""" + order_qt_query7_1_after "${query7_1}" + sql """ DROP MATERIALIZED VIEW IF EXISTS mv7_1""" + + // filter does not cover all roll up dimensions + def mv8_0 = """ + select o_orderdate, o_shippriority, o_comment, l_partkey, + sum(o_totalprice) as sum_total, + max(o_totalprice) as max_total, + min(o_totalprice) as min_total, + count(*) as count_all + from lineitem t1 + left join orders on t1.l_orderkey = orders.o_orderkey and t1.l_shipdate = o_orderdate + group by + l_partkey, + o_orderdate, + o_shippriority, + o_comment; + """ + def query8_0 = + """ + select o_comment, l_partkey, + sum(o_totalprice), + max(o_totalprice), + min(o_totalprice), + count(*) + from lineitem t1 + left join orders on t1.l_orderkey = orders.o_orderkey and t1.l_shipdate = o_orderdate + where o_orderdate = '2023-12-09' + group by + l_partkey, + o_comment; + """ + order_qt_query8_0_before "${query8_0}" + // query success but add agg + check_mv_rewrite_success(db, mv8_0, query8_0, "mv8_0") + qt_shape8_0_after """explain shape plan ${query8_0}""" + order_qt_query8_0_after "${query8_0}" + sql """ DROP MATERIALIZED VIEW IF EXISTS mv8_0""" +} diff --git a/regression-test/suites/nereids_rules_p0/mv/agg_with_roll_up/aggregate_with_roll_up.groovy b/regression-test/suites/nereids_rules_p0/mv/agg_with_roll_up/aggregate_with_roll_up.groovy index 87c582f80c8..a76744ce962 100644 --- a/regression-test/suites/nereids_rules_p0/mv/agg_with_roll_up/aggregate_with_roll_up.groovy +++ b/regression-test/suites/nereids_rules_p0/mv/agg_with_roll_up/aggregate_with_roll_up.groovy @@ -148,28 +148,6 @@ suite("aggregate_with_roll_up") { } } - def check_rewrite_but_not_chose = { mv_sql, query_sql, mv_name -> - - sql """DROP MATERIALIZED VIEW IF EXISTS ${mv_name}""" - sql""" - CREATE MATERIALIZED VIEW ${mv_name} - BUILD IMMEDIATE REFRESH COMPLETE ON MANUAL - DISTRIBUTED BY RANDOM BUCKETS 2 - PROPERTIES ('replication_num' = '1') - AS ${mv_sql} - """ - - def job_name = getJobName(db, mv_name); - waitingMTMVTaskFinished(job_name) - explain { - sql("${query_sql}") - check {result -> - def splitResult = result.split("MaterializedViewRewriteFail") - splitResult.length == 2 ? splitResult[0].contains(mv_name) : false - } - } - } - // multi table // filter inside + left + use roll up dimension def mv13_0 = @@ -1036,9 +1014,7 @@ suite("aggregate_with_roll_up") { o_comment; """ order_qt_query1_1_before "${query1_1}" - // rewrite success, but not chose - // because data volume is small and mv plan is almost same to query plan - check_rewrite_but_not_chose(mv1_1, query1_1, "mv1_1") + check_mv_rewrite_success(db, mv1_1, query1_1, "mv1_1") order_qt_query1_1_after "${query1_1}" sql """ DROP MATERIALIZED VIEW IF EXISTS mv1_1""" @@ -1070,9 +1046,7 @@ suite("aggregate_with_roll_up") { "o_comment " order_qt_query2_0_before "${query2_0}" - // rewrite success, but not chose - // because data volume is small and mv plan is almost same to query plan - check_rewrite_but_not_chose(mv2_0, query2_0, "mv2_0") + check_mv_rewrite_success(db, mv2_0, query2_0, "mv2_0") order_qt_query2_0_after "${query2_0}" sql """ DROP MATERIALIZED VIEW IF EXISTS mv2_0""" diff --git a/regression-test/suites/nereids_rules_p0/mv/agg_without_roll_up/aggregate_without_roll_up.groovy b/regression-test/suites/nereids_rules_p0/mv/agg_without_roll_up/aggregate_without_roll_up.groovy index d9607a98998..801bdee1ef7 100644 --- a/regression-test/suites/nereids_rules_p0/mv/agg_without_roll_up/aggregate_without_roll_up.groovy +++ b/regression-test/suites/nereids_rules_p0/mv/agg_without_roll_up/aggregate_without_roll_up.groovy @@ -133,31 +133,33 @@ suite("aggregate_without_roll_up") { // single table // with filter - def mv1_0 = "select o_shippriority, o_comment, " + - "sum(o_totalprice) as sum_total, " + - "max(o_totalprice) as max_total, " + - "min(o_totalprice) as min_total, " + - "count(*) as count_all, " + - "count(distinct case when o_shippriority > 1 and o_orderkey IN (1, 3) then o_custkey else null end), " + - "count(distinct case when O_SHIPPRIORITY > 2 and o_orderkey IN (2) then o_custkey else null end) as cnt_2 " + - "from orders " + - "group by " + - "o_shippriority, " + - "o_comment " + def mv1_0 = """ + select o_shippriority, o_comment, + sum(o_totalprice) as sum_total, + max(o_totalprice) as max_total, + min(o_totalprice) as min_total, + count(*) as count_all, + count(distinct case when o_shippriority > 1 and o_orderkey IN (1, 3) then o_custkey else null end), + count(distinct case when O_SHIPPRIORITY > 2 and o_orderkey IN (2) then o_custkey else null end) as cnt_2 + from orders + group by + o_shippriority, + o_comment; + """ def query1_0 = """ - select o_shippriority, o_comment, - count(distinct case when o_shippriority > 1 and o_orderkey IN (1, 3) then o_custkey else null end) as cnt_1, - count(distinct case when O_SHIPPRIORITY > 2 and o_orderkey IN (2) then o_custkey else null end) as cnt_2, - sum(o_totalprice), - max(o_totalprice), - min(o_totalprice), - count(*) - from orders - where o_shippriority in (1, 2) - group by - o_shippriority, - o_comment; - """ + select o_shippriority, o_comment, + count(distinct case when o_shippriority > 1 and o_orderkey IN (1, 3) then o_custkey else null end) as cnt_1, + count(distinct case when O_SHIPPRIORITY > 2 and o_orderkey IN (2) then o_custkey else null end) as cnt_2, + sum(o_totalprice), + max(o_totalprice), + min(o_totalprice), + count(*) + from orders + where o_shippriority in (1, 2) + group by + o_shippriority, + o_comment; + """ order_qt_query1_0_before "${query1_0}" check_mv_rewrite_success(db, mv1_0, query1_0, "mv1_0") order_qt_query1_0_after "${query1_0}" @@ -245,27 +247,29 @@ suite("aggregate_without_roll_up") { "group by " + "O_SHIPPRIORITY, " + "O_COMMENT " - def query1_2 = "select O_SHIPPRIORITY, O_COMMENT, " + - "count(distinct case when O_SHIPPRIORITY > 1 and O_ORDERKEY IN (1, 3) then O_ORDERSTATUS else null end) as filter_cnt_1, " + - "count(distinct case when O_SHIPPRIORITY > 2 and O_ORDERKEY IN (2) then O_ORDERSTATUS else null end) as filter_cnt_2, " + - "count(distinct case when O_SHIPPRIORITY > 3 and O_ORDERKEY IN (3, 4) then O_ORDERSTATUS else null end) as filter_cnt_3, " + - "count(distinct case when O_SHIPPRIORITY > 3 and O_ORDERKEY IN (2, 3) then O_ORDERSTATUS else null end) as filter_cnt_5, " + - "count(distinct case when O_SHIPPRIORITY > 2 and O_ORDERKEY IN (7, 9) then O_ORDERSTATUS else null end) as filter_cnt_6, " + - "count(distinct case when O_SHIPPRIORITY > 4 and O_ORDERKEY IN (11, 13) then O_ORDERSTATUS else null end) as filter_cnt_8, " + - "count(distinct case when O_SHIPPRIORITY > 3 and O_ORDERKEY IN (12, 11) then O_ORDERSTATUS else null end) as filter_cnt_9, " + - "count(distinct case when O_SHIPPRIORITY > 1 and O_ORDERKEY IN (11, 12) then O_ORDERSTATUS else null end) as filter_cnt_11, " + - "count(distinct case when O_SHIPPRIORITY > 4 and O_ORDERKEY IN (3, 6) then O_ORDERSTATUS else null end) as filter_cnt_12, " + - "count(distinct case when O_SHIPPRIORITY > 3 and O_ORDERKEY IN (16, 19) then O_ORDERSTATUS else null end) as filter_cnt_13, " + - "count(distinct case when O_SHIPPRIORITY > 1 and O_ORDERKEY IN (15, 19) then O_ORDERSTATUS else null end) as filter_cnt_15, " + - "count(distinct case when O_SHIPPRIORITY > 1 and O_ORDERKEY IN (13, 21) then O_ORDERSTATUS else null end) as filter_cnt_16, " + - "count(distinct case when O_SHIPPRIORITY > 3 and O_ORDERKEY IN (16, 25) then O_ORDERSTATUS else null end) as filter_cnt_18, " + - "count(distinct case when O_SHIPPRIORITY > 4 and O_ORDERKEY IN (19, 3) then O_ORDERSTATUS else null end) as filter_cnt_19, " + - "count(distinct case when O_SHIPPRIORITY > 1 and O_ORDERKEY IN (1, 20) then O_ORDERSTATUS else null end) as filter_cnt_20 " + - "from orders " + - "where O_ORDERDATE < '2023-12-30' and O_ORDERDATE > '2023-12-01'" + - "group by " + - "O_SHIPPRIORITY, " + - "O_COMMENT " + def query1_2 = """ + select O_SHIPPRIORITY, O_COMMENT, + count(distinct case when O_SHIPPRIORITY > 1 and O_ORDERKEY IN (1, 3) then O_ORDERSTATUS else null end) as filter_cnt_1, + count(distinct case when O_SHIPPRIORITY > 2 and O_ORDERKEY IN (2) then O_ORDERSTATUS else null end) as filter_cnt_2, + count(distinct case when O_SHIPPRIORITY > 3 and O_ORDERKEY IN (3, 4) then O_ORDERSTATUS else null end) as filter_cnt_3, + count(distinct case when O_SHIPPRIORITY > 3 and O_ORDERKEY IN (2, 3) then O_ORDERSTATUS else null end) as filter_cnt_5, + count(distinct case when O_SHIPPRIORITY > 2 and O_ORDERKEY IN (7, 9) then O_ORDERSTATUS else null end) as filter_cnt_6, + count(distinct case when O_SHIPPRIORITY > 4 and O_ORDERKEY IN (11, 13) then O_ORDERSTATUS else null end) as filter_cnt_8, + count(distinct case when O_SHIPPRIORITY > 3 and O_ORDERKEY IN (12, 11) then O_ORDERSTATUS else null end) as filter_cnt_9, + count(distinct case when O_SHIPPRIORITY > 1 and O_ORDERKEY IN (11, 12) then O_ORDERSTATUS else null end) as filter_cnt_11, + count(distinct case when O_SHIPPRIORITY > 4 and O_ORDERKEY IN (3, 6) then O_ORDERSTATUS else null end) as filter_cnt_12, + count(distinct case when O_SHIPPRIORITY > 3 and O_ORDERKEY IN (16, 19) then O_ORDERSTATUS else null end) as filter_cnt_13, + count(distinct case when O_SHIPPRIORITY > 1 and O_ORDERKEY IN (15, 19) then O_ORDERSTATUS else null end) as filter_cnt_15, + count(distinct case when O_SHIPPRIORITY > 1 and O_ORDERKEY IN (13, 21) then O_ORDERSTATUS else null end) as filter_cnt_16, + count(distinct case when O_SHIPPRIORITY > 3 and O_ORDERKEY IN (16, 25) then O_ORDERSTATUS else null end) as filter_cnt_18, + count(distinct case when O_SHIPPRIORITY > 4 and O_ORDERKEY IN (19, 3) then O_ORDERSTATUS else null end) as filter_cnt_19, + count(distinct case when O_SHIPPRIORITY > 1 and O_ORDERKEY IN (1, 20) then O_ORDERSTATUS else null end) as filter_cnt_20 + from orders + where O_ORDERDATE < '2023-12-30' and O_ORDERDATE > '2023-12-01' + group by + O_SHIPPRIORITY, + O_COMMENT; + """ order_qt_query1_2_before "${query1_2}" check_mv_rewrite_success(db, mv1_2, query1_2, "mv1_2") order_qt_query1_2_after "${query1_2}" --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org