This is an automated email from the ASF dual-hosted git repository.

huajianlan pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 2950631f075 [fix](Nereids) fix four phase aggregation compute wrong 
result (#36128)
2950631f075 is described below

commit 2950631f0750e8134415d07373c299aa2ae68f30
Author: 924060929 <924060...@qq.com>
AuthorDate: Tue Jun 11 20:38:20 2024 +0800

    [fix](Nereids) fix four phase aggregation compute wrong result (#36128)
    
    
    fix a bug introduced by #35871
    
    Backend can process distinct aggregate by two method:
    1. use group by key to process distinct, I call it: external distinct of 
aggregate function
    2. use `multi_distinct_xxx` to process distinct, in the corresponding 
aggregate function, I call it: internal distinct of aggregate function
    
    
    This sql should process distinct by the multi distinct function when use 
four phase aggregation:
    The first two phase use group by key: id,  field1 and field2.
    The last two phase use group by key: id. This group by key can not process 
distinct in the aggregate function, so we should use internal distinct method 
in four phase aggregate function, if contains multi distinct column(e.g. field1 
and field2)
    
    ```sql
    select 
/*+SET_VAR(disable_nereids_rules='TWO_PHASE_AGGREGATE_SINGLE_DISTINCT_TO_MULTI,TWO_PHASE_AGGREGATE_WITH_MULTI_DISTINCT,THREE_PHASE_AGGREGATE_WITH_DISTINCT,THREE_PHASE_AGGREGATE_WITH_COUNT_DISTINCT_MULTI')*/
      id,
      count(distinct field1),
      count(distinct field2)
    from agg_4_phase_tbl2
    group by id
    ```
---
 .../rules/implementation/AggregateStrategies.java  | 46 ++++++++++++++++++----
 .../data/nereids_syntax_p0/agg_4_phase.out         | 12 +++++-
 .../suites/nereids_syntax_p0/agg_4_phase.groovy    | 27 ++++++++++++-
 3 files changed, 75 insertions(+), 10 deletions(-)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/implementation/AggregateStrategies.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/implementation/AggregateStrategies.java
index eae17b1a99e..d15de080913 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/implementation/AggregateStrategies.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/implementation/AggregateStrategies.java
@@ -370,6 +370,12 @@ public class AggregateStrategies implements 
ImplementationRuleFactory {
                             .addAll(agg.getDistinctArguments())
                             .build().size() > 
agg.getGroupByExpressions().size()
                     )
+                    .when(agg -> {
+                        if (agg.getDistinctArguments().size() == 1) {
+                            return true;
+                        }
+                        return couldConvertToMulti(agg);
+                    })
                     .thenApplyMulti(ctx -> {
                         Function<List<Expression>, RequireProperties> 
secondPhaseRequireGroupByAndDistinctHash =
                                 groupByAndDistinct -> RequireProperties.of(
@@ -1849,6 +1855,8 @@ public class AggregateStrategies implements 
ImplementationRuleFactory {
                 bufferToBufferParam, false, logicalAgg.getLogicalProperties(),
                 secondPhaseRequire, anyLocalAgg);
 
+        boolean shouldDistinctAfterPhase2 = distinctArguments.size() > 1;
+
         // phase 3
         AggregateParam distinctLocalParam = new AggregateParam(
                 AggPhase.DISTINCT_LOCAL, AggMode.INPUT_TO_BUFFER, couldBanned);
@@ -1867,11 +1875,25 @@ public class AggregateStrategies implements 
ImplementationRuleFactory {
                                                 || 
aggregateFunction.getDistinctArguments().size() == 1,
                                         "cannot process more than one child in 
aggregate distinct function: "
                                                 + aggregateFunction);
-                                AggregateFunction nonDistinct = 
aggregateFunction
-                                        .withDistinctAndChildren(false, 
ImmutableList.copyOf(aggChild));
-                                AggregateExpression nonDistinctAggExpr = new 
AggregateExpression(nonDistinct,
-                                        distinctLocalParam, aggregateFunction);
-                                return nonDistinctAggExpr;
+
+                                AggregateFunction newDistinct;
+                                if (shouldDistinctAfterPhase2) {
+                                    // we use aggregate function to process 
distinct,
+                                    // so need to change to multi distinct 
function
+                                    newDistinct = tryConvertToMultiDistinct(
+                                            
aggregateFunction.withDistinctAndChildren(
+                                                    true, 
ImmutableList.copyOf(aggChild))
+                                    );
+                                } else {
+                                    // we use group by to process distinct,
+                                    // so no distinct param in the aggregate 
function
+                                    newDistinct = 
aggregateFunction.withDistinctAndChildren(
+                                            false, 
ImmutableList.copyOf(aggChild));
+                                }
+
+                                AggregateExpression newDistinctAggExpr = new 
AggregateExpression(
+                                        newDistinct, distinctLocalParam, 
newDistinct);
+                                return newDistinctAggExpr;
                             } else {
                                 needUpdateSlot.add(aggregateFunction);
                                 Alias alias = 
nonDistinctAggFunctionToAliasPhase2.get(expr);
@@ -1908,11 +1930,19 @@ public class AggregateStrategies implements 
ImplementationRuleFactory {
                                         || 
aggregateFunction.getDistinctArguments().size() == 1,
                                 "cannot process more than one child in 
aggregate distinct function: "
                                         + aggregateFunction);
-                        AggregateFunction nonDistinct = aggregateFunction
-                                .withDistinctAndChildren(false, 
ImmutableList.copyOf(aggChild));
+                        AggregateFunction newDistinct;
+                        if (shouldDistinctAfterPhase2) {
+                            newDistinct = tryConvertToMultiDistinct(
+                                    aggregateFunction.withDistinctAndChildren(
+                                            true, 
ImmutableList.copyOf(aggChild))
+                            );
+                        } else {
+                            newDistinct = aggregateFunction
+                                    .withDistinctAndChildren(false, 
ImmutableList.copyOf(aggChild));
+                        }
                         int idx = 
logicalAgg.getOutputExpressions().indexOf(outputExpr);
                         Alias localDistinctAlias = (Alias) 
(localDistinctOutput.get(idx));
-                        return new AggregateExpression(nonDistinct,
+                        return new AggregateExpression(newDistinct,
                                 distinctGlobalParam, 
localDistinctAlias.toSlot());
                     } else {
                         Alias alias = 
nonDistinctAggFunctionToAliasPhase3.get(expr);
diff --git a/regression-test/data/nereids_syntax_p0/agg_4_phase.out 
b/regression-test/data/nereids_syntax_p0/agg_4_phase.out
index 5c5dde6f855..a939738ea18 100644
--- a/regression-test/data/nereids_syntax_p0/agg_4_phase.out
+++ b/regression-test/data/nereids_syntax_p0/agg_4_phase.out
@@ -1,4 +1,14 @@
 -- This file is automatically generated. You should know what you did if you 
want to edit this
 -- !4phase --
-3      160
+3
+
+-- !phase4_multi_distinct --
+1      -10,-10 1       a       1
+2      -4,-4   1       b       1
+3      -4      1       f       1
+
+-- !phase4_single_distinct --
+1      -10,-10 1
+2      -4,-4   1
+3      -4      1
 
diff --git a/regression-test/suites/nereids_syntax_p0/agg_4_phase.groovy 
b/regression-test/suites/nereids_syntax_p0/agg_4_phase.groovy
index d3b418660ab..ed69d0018c9 100644
--- a/regression-test/suites/nereids_syntax_p0/agg_4_phase.groovy
+++ b/regression-test/suites/nereids_syntax_p0/agg_4_phase.groovy
@@ -47,7 +47,7 @@ suite("agg_4_phase") {
             count(distinct id)
         from agg_4_phase_tbl;
         """
-    explain{
+    explain {
         sql(test_sql)
         contains ":VAGGREGATE (merge finalize)"
         contains ":VEXCHANGE"
@@ -60,4 +60,29 @@ suite("agg_4_phase") {
     sql """select GROUP_CONCAT(distinct name, " ") from agg_4_phase_tbl;"""
 
     sql """select 
/*+SET_VAR(disable_nereids_rules='TWO_PHASE_AGGREGATE_SINGLE_DISTINCT_TO_MULTI,THREE_PHASE_AGGREGATE_WITH_DISTINCT,FOUR_PHASE_AGGREGATE_WITH_DISTINCT')*/
 GROUP_CONCAT(distinct name, " ") from agg_4_phase_tbl group by gender;"""
+
+
+    sql "drop table if exists agg_4_phase_tbl2"
+    sql "create table agg_4_phase_tbl2(id int, field1 int, field2 
varchar(255)) properties('replication_num'='1');"
+    sql "insert into agg_4_phase_tbl2 values(1, -10, null), (1, -10, 'a'), (2, 
-4, null), (2, -4, 'b'), (3, -4, 'f');\n"
+
+    qt_phase4_multi_distinct """
+        select 
/*+SET_VAR(disable_nereids_rules='TWO_PHASE_AGGREGATE_SINGLE_DISTINCT_TO_MULTI,TWO_PHASE_AGGREGATE_WITH_MULTI_DISTINCT,THREE_PHASE_AGGREGATE_WITH_DISTINCT,THREE_PHASE_AGGREGATE_WITH_COUNT_DISTINCT_MULTI')*/
+            id,
+            group_concat(cast(field1 as varchar), ','),
+            count(distinct field1),
+            group_concat(cast(field2 as varchar), ','),
+            count(distinct field2)
+        from agg_4_phase_tbl2
+        group by id
+        order by id"""
+
+    qt_phase4_single_distinct """
+        select 
/*+SET_VAR(disable_nereids_rules='TWO_PHASE_AGGREGATE_SINGLE_DISTINCT_TO_MULTI,TWO_PHASE_AGGREGATE_WITH_MULTI_DISTINCT,THREE_PHASE_AGGREGATE_WITH_DISTINCT,THREE_PHASE_AGGREGATE_WITH_COUNT_DISTINCT_MULTI')*/
+            id,
+            group_concat(cast(field1 as varchar), ','),
+            count(distinct field1)
+        from agg_4_phase_tbl2
+        group by id
+        order by id"""
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to