This is an automated email from the ASF dual-hosted git repository. huajianlan pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new 2950631f075 [fix](Nereids) fix four phase aggregation compute wrong result (#36128) 2950631f075 is described below commit 2950631f0750e8134415d07373c299aa2ae68f30 Author: 924060929 <924060...@qq.com> AuthorDate: Tue Jun 11 20:38:20 2024 +0800 [fix](Nereids) fix four phase aggregation compute wrong result (#36128) fix a bug introduced by #35871 Backend can process distinct aggregate by two method: 1. use group by key to process distinct, I call it: external distinct of aggregate function 2. use `multi_distinct_xxx` to process distinct, in the corresponding aggregate function, I call it: internal distinct of aggregate function This sql should process distinct by the multi distinct function when use four phase aggregation: The first two phase use group by key: id, field1 and field2. The last two phase use group by key: id. This group by key can not process distinct in the aggregate function, so we should use internal distinct method in four phase aggregate function, if contains multi distinct column(e.g. field1 and field2) ```sql select /*+SET_VAR(disable_nereids_rules='TWO_PHASE_AGGREGATE_SINGLE_DISTINCT_TO_MULTI,TWO_PHASE_AGGREGATE_WITH_MULTI_DISTINCT,THREE_PHASE_AGGREGATE_WITH_DISTINCT,THREE_PHASE_AGGREGATE_WITH_COUNT_DISTINCT_MULTI')*/ id, count(distinct field1), count(distinct field2) from agg_4_phase_tbl2 group by id ``` --- .../rules/implementation/AggregateStrategies.java | 46 ++++++++++++++++++---- .../data/nereids_syntax_p0/agg_4_phase.out | 12 +++++- .../suites/nereids_syntax_p0/agg_4_phase.groovy | 27 ++++++++++++- 3 files changed, 75 insertions(+), 10 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/implementation/AggregateStrategies.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/implementation/AggregateStrategies.java index eae17b1a99e..d15de080913 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/implementation/AggregateStrategies.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/implementation/AggregateStrategies.java @@ -370,6 +370,12 @@ public class AggregateStrategies implements ImplementationRuleFactory { .addAll(agg.getDistinctArguments()) .build().size() > agg.getGroupByExpressions().size() ) + .when(agg -> { + if (agg.getDistinctArguments().size() == 1) { + return true; + } + return couldConvertToMulti(agg); + }) .thenApplyMulti(ctx -> { Function<List<Expression>, RequireProperties> secondPhaseRequireGroupByAndDistinctHash = groupByAndDistinct -> RequireProperties.of( @@ -1849,6 +1855,8 @@ public class AggregateStrategies implements ImplementationRuleFactory { bufferToBufferParam, false, logicalAgg.getLogicalProperties(), secondPhaseRequire, anyLocalAgg); + boolean shouldDistinctAfterPhase2 = distinctArguments.size() > 1; + // phase 3 AggregateParam distinctLocalParam = new AggregateParam( AggPhase.DISTINCT_LOCAL, AggMode.INPUT_TO_BUFFER, couldBanned); @@ -1867,11 +1875,25 @@ public class AggregateStrategies implements ImplementationRuleFactory { || aggregateFunction.getDistinctArguments().size() == 1, "cannot process more than one child in aggregate distinct function: " + aggregateFunction); - AggregateFunction nonDistinct = aggregateFunction - .withDistinctAndChildren(false, ImmutableList.copyOf(aggChild)); - AggregateExpression nonDistinctAggExpr = new AggregateExpression(nonDistinct, - distinctLocalParam, aggregateFunction); - return nonDistinctAggExpr; + + AggregateFunction newDistinct; + if (shouldDistinctAfterPhase2) { + // we use aggregate function to process distinct, + // so need to change to multi distinct function + newDistinct = tryConvertToMultiDistinct( + aggregateFunction.withDistinctAndChildren( + true, ImmutableList.copyOf(aggChild)) + ); + } else { + // we use group by to process distinct, + // so no distinct param in the aggregate function + newDistinct = aggregateFunction.withDistinctAndChildren( + false, ImmutableList.copyOf(aggChild)); + } + + AggregateExpression newDistinctAggExpr = new AggregateExpression( + newDistinct, distinctLocalParam, newDistinct); + return newDistinctAggExpr; } else { needUpdateSlot.add(aggregateFunction); Alias alias = nonDistinctAggFunctionToAliasPhase2.get(expr); @@ -1908,11 +1930,19 @@ public class AggregateStrategies implements ImplementationRuleFactory { || aggregateFunction.getDistinctArguments().size() == 1, "cannot process more than one child in aggregate distinct function: " + aggregateFunction); - AggregateFunction nonDistinct = aggregateFunction - .withDistinctAndChildren(false, ImmutableList.copyOf(aggChild)); + AggregateFunction newDistinct; + if (shouldDistinctAfterPhase2) { + newDistinct = tryConvertToMultiDistinct( + aggregateFunction.withDistinctAndChildren( + true, ImmutableList.copyOf(aggChild)) + ); + } else { + newDistinct = aggregateFunction + .withDistinctAndChildren(false, ImmutableList.copyOf(aggChild)); + } int idx = logicalAgg.getOutputExpressions().indexOf(outputExpr); Alias localDistinctAlias = (Alias) (localDistinctOutput.get(idx)); - return new AggregateExpression(nonDistinct, + return new AggregateExpression(newDistinct, distinctGlobalParam, localDistinctAlias.toSlot()); } else { Alias alias = nonDistinctAggFunctionToAliasPhase3.get(expr); diff --git a/regression-test/data/nereids_syntax_p0/agg_4_phase.out b/regression-test/data/nereids_syntax_p0/agg_4_phase.out index 5c5dde6f855..a939738ea18 100644 --- a/regression-test/data/nereids_syntax_p0/agg_4_phase.out +++ b/regression-test/data/nereids_syntax_p0/agg_4_phase.out @@ -1,4 +1,14 @@ -- This file is automatically generated. You should know what you did if you want to edit this -- !4phase -- -3 160 +3 + +-- !phase4_multi_distinct -- +1 -10,-10 1 a 1 +2 -4,-4 1 b 1 +3 -4 1 f 1 + +-- !phase4_single_distinct -- +1 -10,-10 1 +2 -4,-4 1 +3 -4 1 diff --git a/regression-test/suites/nereids_syntax_p0/agg_4_phase.groovy b/regression-test/suites/nereids_syntax_p0/agg_4_phase.groovy index d3b418660ab..ed69d0018c9 100644 --- a/regression-test/suites/nereids_syntax_p0/agg_4_phase.groovy +++ b/regression-test/suites/nereids_syntax_p0/agg_4_phase.groovy @@ -47,7 +47,7 @@ suite("agg_4_phase") { count(distinct id) from agg_4_phase_tbl; """ - explain{ + explain { sql(test_sql) contains ":VAGGREGATE (merge finalize)" contains ":VEXCHANGE" @@ -60,4 +60,29 @@ suite("agg_4_phase") { sql """select GROUP_CONCAT(distinct name, " ") from agg_4_phase_tbl;""" sql """select /*+SET_VAR(disable_nereids_rules='TWO_PHASE_AGGREGATE_SINGLE_DISTINCT_TO_MULTI,THREE_PHASE_AGGREGATE_WITH_DISTINCT,FOUR_PHASE_AGGREGATE_WITH_DISTINCT')*/ GROUP_CONCAT(distinct name, " ") from agg_4_phase_tbl group by gender;""" + + + sql "drop table if exists agg_4_phase_tbl2" + sql "create table agg_4_phase_tbl2(id int, field1 int, field2 varchar(255)) properties('replication_num'='1');" + sql "insert into agg_4_phase_tbl2 values(1, -10, null), (1, -10, 'a'), (2, -4, null), (2, -4, 'b'), (3, -4, 'f');\n" + + qt_phase4_multi_distinct """ + select /*+SET_VAR(disable_nereids_rules='TWO_PHASE_AGGREGATE_SINGLE_DISTINCT_TO_MULTI,TWO_PHASE_AGGREGATE_WITH_MULTI_DISTINCT,THREE_PHASE_AGGREGATE_WITH_DISTINCT,THREE_PHASE_AGGREGATE_WITH_COUNT_DISTINCT_MULTI')*/ + id, + group_concat(cast(field1 as varchar), ','), + count(distinct field1), + group_concat(cast(field2 as varchar), ','), + count(distinct field2) + from agg_4_phase_tbl2 + group by id + order by id""" + + qt_phase4_single_distinct """ + select /*+SET_VAR(disable_nereids_rules='TWO_PHASE_AGGREGATE_SINGLE_DISTINCT_TO_MULTI,TWO_PHASE_AGGREGATE_WITH_MULTI_DISTINCT,THREE_PHASE_AGGREGATE_WITH_DISTINCT,THREE_PHASE_AGGREGATE_WITH_COUNT_DISTINCT_MULTI')*/ + id, + group_concat(cast(field1 as varchar), ','), + count(distinct field1) + from agg_4_phase_tbl2 + group by id + order by id""" } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org