This is an automated email from the ASF dual-hosted git repository. morningman pushed a commit to branch branch-2.1 in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.1 by this push: new acbfcf7ad92 [fix](Nereids) fix four phase aggregation compute wrong result (#36131) acbfcf7ad92 is described below commit acbfcf7ad92433e3fcbabe8f5fd59aa11d385392 Author: 924060929 <924060...@qq.com> AuthorDate: Tue Jun 11 20:40:18 2024 +0800 [fix](Nereids) fix four phase aggregation compute wrong result (#36131) cherry pick from #36128 --- .../rules/implementation/AggregateStrategies.java | 46 ++++++++++++++++++---- .../data/nereids_syntax_p0/agg_4_phase.out | 12 +++++- .../suites/nereids_syntax_p0/agg_4_phase.groovy | 27 ++++++++++++- 3 files changed, 75 insertions(+), 10 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/implementation/AggregateStrategies.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/implementation/AggregateStrategies.java index 9cb7b4d84a4..96b7507f3dc 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/implementation/AggregateStrategies.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/implementation/AggregateStrategies.java @@ -360,6 +360,12 @@ public class AggregateStrategies implements ImplementationRuleFactory { .addAll(agg.getDistinctArguments()) .build().size() > agg.getGroupByExpressions().size() ) + .when(agg -> { + if (agg.getDistinctArguments().size() == 1) { + return true; + } + return couldConvertToMulti(agg); + }) .thenApplyMulti(ctx -> { Function<List<Expression>, RequireProperties> secondPhaseRequireGroupByAndDistinctHash = groupByAndDistinct -> RequireProperties.of( @@ -1808,6 +1814,8 @@ public class AggregateStrategies implements ImplementationRuleFactory { bufferToBufferParam, false, logicalAgg.getLogicalProperties(), secondPhaseRequire, anyLocalAgg); + boolean shouldDistinctAfterPhase2 = distinctArguments.size() > 1; + // phase 3 AggregateParam distinctLocalParam = new AggregateParam( AggPhase.DISTINCT_LOCAL, AggMode.INPUT_TO_BUFFER, couldBanned); @@ -1826,11 +1834,25 @@ public class AggregateStrategies implements ImplementationRuleFactory { || aggregateFunction.getDistinctArguments().size() == 1, "cannot process more than one child in aggregate distinct function: " + aggregateFunction); - AggregateFunction nonDistinct = aggregateFunction - .withDistinctAndChildren(false, ImmutableList.copyOf(aggChild)); - AggregateExpression nonDistinctAggExpr = new AggregateExpression(nonDistinct, - distinctLocalParam, aggregateFunction); - return nonDistinctAggExpr; + + AggregateFunction newDistinct; + if (shouldDistinctAfterPhase2) { + // we use aggregate function to process distinct, + // so need to change to multi distinct function + newDistinct = tryConvertToMultiDistinct( + aggregateFunction.withDistinctAndChildren( + true, ImmutableList.copyOf(aggChild)) + ); + } else { + // we use group by to process distinct, + // so no distinct param in the aggregate function + newDistinct = aggregateFunction.withDistinctAndChildren( + false, ImmutableList.copyOf(aggChild)); + } + + AggregateExpression newDistinctAggExpr = new AggregateExpression( + newDistinct, distinctLocalParam, newDistinct); + return newDistinctAggExpr; } else { needUpdateSlot.add(aggregateFunction); Alias alias = nonDistinctAggFunctionToAliasPhase2.get(expr); @@ -1867,11 +1889,19 @@ public class AggregateStrategies implements ImplementationRuleFactory { || aggregateFunction.getDistinctArguments().size() == 1, "cannot process more than one child in aggregate distinct function: " + aggregateFunction); - AggregateFunction nonDistinct = aggregateFunction - .withDistinctAndChildren(false, ImmutableList.copyOf(aggChild)); + AggregateFunction newDistinct; + if (shouldDistinctAfterPhase2) { + newDistinct = tryConvertToMultiDistinct( + aggregateFunction.withDistinctAndChildren( + true, ImmutableList.copyOf(aggChild)) + ); + } else { + newDistinct = aggregateFunction + .withDistinctAndChildren(false, ImmutableList.copyOf(aggChild)); + } int idx = logicalAgg.getOutputExpressions().indexOf(outputExpr); Alias localDistinctAlias = (Alias) (localDistinctOutput.get(idx)); - return new AggregateExpression(nonDistinct, + return new AggregateExpression(newDistinct, distinctGlobalParam, localDistinctAlias.toSlot()); } else { Alias alias = nonDistinctAggFunctionToAliasPhase3.get(expr); diff --git a/regression-test/data/nereids_syntax_p0/agg_4_phase.out b/regression-test/data/nereids_syntax_p0/agg_4_phase.out index 5c5dde6f855..a939738ea18 100644 --- a/regression-test/data/nereids_syntax_p0/agg_4_phase.out +++ b/regression-test/data/nereids_syntax_p0/agg_4_phase.out @@ -1,4 +1,14 @@ -- This file is automatically generated. You should know what you did if you want to edit this -- !4phase -- -3 160 +3 + +-- !phase4_multi_distinct -- +1 -10,-10 1 a 1 +2 -4,-4 1 b 1 +3 -4 1 f 1 + +-- !phase4_single_distinct -- +1 -10,-10 1 +2 -4,-4 1 +3 -4 1 diff --git a/regression-test/suites/nereids_syntax_p0/agg_4_phase.groovy b/regression-test/suites/nereids_syntax_p0/agg_4_phase.groovy index d3b418660ab..ed69d0018c9 100644 --- a/regression-test/suites/nereids_syntax_p0/agg_4_phase.groovy +++ b/regression-test/suites/nereids_syntax_p0/agg_4_phase.groovy @@ -47,7 +47,7 @@ suite("agg_4_phase") { count(distinct id) from agg_4_phase_tbl; """ - explain{ + explain { sql(test_sql) contains ":VAGGREGATE (merge finalize)" contains ":VEXCHANGE" @@ -60,4 +60,29 @@ suite("agg_4_phase") { sql """select GROUP_CONCAT(distinct name, " ") from agg_4_phase_tbl;""" sql """select /*+SET_VAR(disable_nereids_rules='TWO_PHASE_AGGREGATE_SINGLE_DISTINCT_TO_MULTI,THREE_PHASE_AGGREGATE_WITH_DISTINCT,FOUR_PHASE_AGGREGATE_WITH_DISTINCT')*/ GROUP_CONCAT(distinct name, " ") from agg_4_phase_tbl group by gender;""" + + + sql "drop table if exists agg_4_phase_tbl2" + sql "create table agg_4_phase_tbl2(id int, field1 int, field2 varchar(255)) properties('replication_num'='1');" + sql "insert into agg_4_phase_tbl2 values(1, -10, null), (1, -10, 'a'), (2, -4, null), (2, -4, 'b'), (3, -4, 'f');\n" + + qt_phase4_multi_distinct """ + select /*+SET_VAR(disable_nereids_rules='TWO_PHASE_AGGREGATE_SINGLE_DISTINCT_TO_MULTI,TWO_PHASE_AGGREGATE_WITH_MULTI_DISTINCT,THREE_PHASE_AGGREGATE_WITH_DISTINCT,THREE_PHASE_AGGREGATE_WITH_COUNT_DISTINCT_MULTI')*/ + id, + group_concat(cast(field1 as varchar), ','), + count(distinct field1), + group_concat(cast(field2 as varchar), ','), + count(distinct field2) + from agg_4_phase_tbl2 + group by id + order by id""" + + qt_phase4_single_distinct """ + select /*+SET_VAR(disable_nereids_rules='TWO_PHASE_AGGREGATE_SINGLE_DISTINCT_TO_MULTI,TWO_PHASE_AGGREGATE_WITH_MULTI_DISTINCT,THREE_PHASE_AGGREGATE_WITH_DISTINCT,THREE_PHASE_AGGREGATE_WITH_COUNT_DISTINCT_MULTI')*/ + id, + group_concat(cast(field1 as varchar), ','), + count(distinct field1) + from agg_4_phase_tbl2 + group by id + order by id""" } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org