This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.1 by this push:
     new acbfcf7ad92 [fix](Nereids) fix four phase aggregation compute wrong 
result (#36131)
acbfcf7ad92 is described below

commit acbfcf7ad92433e3fcbabe8f5fd59aa11d385392
Author: 924060929 <924060...@qq.com>
AuthorDate: Tue Jun 11 20:40:18 2024 +0800

    [fix](Nereids) fix four phase aggregation compute wrong result (#36131)
    
    cherry pick from #36128
---
 .../rules/implementation/AggregateStrategies.java  | 46 ++++++++++++++++++----
 .../data/nereids_syntax_p0/agg_4_phase.out         | 12 +++++-
 .../suites/nereids_syntax_p0/agg_4_phase.groovy    | 27 ++++++++++++-
 3 files changed, 75 insertions(+), 10 deletions(-)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/implementation/AggregateStrategies.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/implementation/AggregateStrategies.java
index 9cb7b4d84a4..96b7507f3dc 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/implementation/AggregateStrategies.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/implementation/AggregateStrategies.java
@@ -360,6 +360,12 @@ public class AggregateStrategies implements 
ImplementationRuleFactory {
                             .addAll(agg.getDistinctArguments())
                             .build().size() > 
agg.getGroupByExpressions().size()
                     )
+                    .when(agg -> {
+                        if (agg.getDistinctArguments().size() == 1) {
+                            return true;
+                        }
+                        return couldConvertToMulti(agg);
+                    })
                     .thenApplyMulti(ctx -> {
                         Function<List<Expression>, RequireProperties> 
secondPhaseRequireGroupByAndDistinctHash =
                                 groupByAndDistinct -> RequireProperties.of(
@@ -1808,6 +1814,8 @@ public class AggregateStrategies implements 
ImplementationRuleFactory {
                 bufferToBufferParam, false, logicalAgg.getLogicalProperties(),
                 secondPhaseRequire, anyLocalAgg);
 
+        boolean shouldDistinctAfterPhase2 = distinctArguments.size() > 1;
+
         // phase 3
         AggregateParam distinctLocalParam = new AggregateParam(
                 AggPhase.DISTINCT_LOCAL, AggMode.INPUT_TO_BUFFER, couldBanned);
@@ -1826,11 +1834,25 @@ public class AggregateStrategies implements 
ImplementationRuleFactory {
                                                 || 
aggregateFunction.getDistinctArguments().size() == 1,
                                         "cannot process more than one child in 
aggregate distinct function: "
                                                 + aggregateFunction);
-                                AggregateFunction nonDistinct = 
aggregateFunction
-                                        .withDistinctAndChildren(false, 
ImmutableList.copyOf(aggChild));
-                                AggregateExpression nonDistinctAggExpr = new 
AggregateExpression(nonDistinct,
-                                        distinctLocalParam, aggregateFunction);
-                                return nonDistinctAggExpr;
+
+                                AggregateFunction newDistinct;
+                                if (shouldDistinctAfterPhase2) {
+                                    // we use aggregate function to process 
distinct,
+                                    // so need to change to multi distinct 
function
+                                    newDistinct = tryConvertToMultiDistinct(
+                                            
aggregateFunction.withDistinctAndChildren(
+                                                    true, 
ImmutableList.copyOf(aggChild))
+                                    );
+                                } else {
+                                    // we use group by to process distinct,
+                                    // so no distinct param in the aggregate 
function
+                                    newDistinct = 
aggregateFunction.withDistinctAndChildren(
+                                            false, 
ImmutableList.copyOf(aggChild));
+                                }
+
+                                AggregateExpression newDistinctAggExpr = new 
AggregateExpression(
+                                        newDistinct, distinctLocalParam, 
newDistinct);
+                                return newDistinctAggExpr;
                             } else {
                                 needUpdateSlot.add(aggregateFunction);
                                 Alias alias = 
nonDistinctAggFunctionToAliasPhase2.get(expr);
@@ -1867,11 +1889,19 @@ public class AggregateStrategies implements 
ImplementationRuleFactory {
                                         || 
aggregateFunction.getDistinctArguments().size() == 1,
                                 "cannot process more than one child in 
aggregate distinct function: "
                                         + aggregateFunction);
-                        AggregateFunction nonDistinct = aggregateFunction
-                                .withDistinctAndChildren(false, 
ImmutableList.copyOf(aggChild));
+                        AggregateFunction newDistinct;
+                        if (shouldDistinctAfterPhase2) {
+                            newDistinct = tryConvertToMultiDistinct(
+                                    aggregateFunction.withDistinctAndChildren(
+                                            true, 
ImmutableList.copyOf(aggChild))
+                            );
+                        } else {
+                            newDistinct = aggregateFunction
+                                    .withDistinctAndChildren(false, 
ImmutableList.copyOf(aggChild));
+                        }
                         int idx = 
logicalAgg.getOutputExpressions().indexOf(outputExpr);
                         Alias localDistinctAlias = (Alias) 
(localDistinctOutput.get(idx));
-                        return new AggregateExpression(nonDistinct,
+                        return new AggregateExpression(newDistinct,
                                 distinctGlobalParam, 
localDistinctAlias.toSlot());
                     } else {
                         Alias alias = 
nonDistinctAggFunctionToAliasPhase3.get(expr);
diff --git a/regression-test/data/nereids_syntax_p0/agg_4_phase.out 
b/regression-test/data/nereids_syntax_p0/agg_4_phase.out
index 5c5dde6f855..a939738ea18 100644
--- a/regression-test/data/nereids_syntax_p0/agg_4_phase.out
+++ b/regression-test/data/nereids_syntax_p0/agg_4_phase.out
@@ -1,4 +1,14 @@
 -- This file is automatically generated. You should know what you did if you 
want to edit this
 -- !4phase --
-3      160
+3
+
+-- !phase4_multi_distinct --
+1      -10,-10 1       a       1
+2      -4,-4   1       b       1
+3      -4      1       f       1
+
+-- !phase4_single_distinct --
+1      -10,-10 1
+2      -4,-4   1
+3      -4      1
 
diff --git a/regression-test/suites/nereids_syntax_p0/agg_4_phase.groovy 
b/regression-test/suites/nereids_syntax_p0/agg_4_phase.groovy
index d3b418660ab..ed69d0018c9 100644
--- a/regression-test/suites/nereids_syntax_p0/agg_4_phase.groovy
+++ b/regression-test/suites/nereids_syntax_p0/agg_4_phase.groovy
@@ -47,7 +47,7 @@ suite("agg_4_phase") {
             count(distinct id)
         from agg_4_phase_tbl;
         """
-    explain{
+    explain {
         sql(test_sql)
         contains ":VAGGREGATE (merge finalize)"
         contains ":VEXCHANGE"
@@ -60,4 +60,29 @@ suite("agg_4_phase") {
     sql """select GROUP_CONCAT(distinct name, " ") from agg_4_phase_tbl;"""
 
     sql """select 
/*+SET_VAR(disable_nereids_rules='TWO_PHASE_AGGREGATE_SINGLE_DISTINCT_TO_MULTI,THREE_PHASE_AGGREGATE_WITH_DISTINCT,FOUR_PHASE_AGGREGATE_WITH_DISTINCT')*/
 GROUP_CONCAT(distinct name, " ") from agg_4_phase_tbl group by gender;"""
+
+
+    sql "drop table if exists agg_4_phase_tbl2"
+    sql "create table agg_4_phase_tbl2(id int, field1 int, field2 
varchar(255)) properties('replication_num'='1');"
+    sql "insert into agg_4_phase_tbl2 values(1, -10, null), (1, -10, 'a'), (2, 
-4, null), (2, -4, 'b'), (3, -4, 'f');\n"
+
+    qt_phase4_multi_distinct """
+        select 
/*+SET_VAR(disable_nereids_rules='TWO_PHASE_AGGREGATE_SINGLE_DISTINCT_TO_MULTI,TWO_PHASE_AGGREGATE_WITH_MULTI_DISTINCT,THREE_PHASE_AGGREGATE_WITH_DISTINCT,THREE_PHASE_AGGREGATE_WITH_COUNT_DISTINCT_MULTI')*/
+            id,
+            group_concat(cast(field1 as varchar), ','),
+            count(distinct field1),
+            group_concat(cast(field2 as varchar), ','),
+            count(distinct field2)
+        from agg_4_phase_tbl2
+        group by id
+        order by id"""
+
+    qt_phase4_single_distinct """
+        select 
/*+SET_VAR(disable_nereids_rules='TWO_PHASE_AGGREGATE_SINGLE_DISTINCT_TO_MULTI,TWO_PHASE_AGGREGATE_WITH_MULTI_DISTINCT,THREE_PHASE_AGGREGATE_WITH_DISTINCT,THREE_PHASE_AGGREGATE_WITH_COUNT_DISTINCT_MULTI')*/
+            id,
+            group_concat(cast(field1 as varchar), ','),
+            count(distinct field1)
+        from agg_4_phase_tbl2
+        group by id
+        order by id"""
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to