This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch branch-4.0
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-4.0 by this push:
     new bb383f1620e branch-4.0: [fix](distinct)fix distinct info is lost when 
union's children are distinct #60085 (#60176)
bb383f1620e is described below

commit bb383f1620e0cd2a5d7d382975916813b97eddd7
Author: github-actions[bot] 
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Sat Jan 24 12:30:46 2026 +0800

    branch-4.0: [fix](distinct)fix distinct info is lost when union's children 
are distinct #60085 (#60176)
    
    Cherry-picked from #60085
    
    Co-authored-by: starocean999 <[email protected]>
---
 .../nereids/rules/analysis/BindExpression.java     |  7 +++--
 .../rules/expression/ExpressionRewrite.java        | 30 +++++++++++++++++++---
 .../project_distinct_to_agg.out                    |  4 +++
 ...ush_project_into_union_with_unique_function.out |  2 +-
 .../project_distinct_to_agg.groovy                 | 17 ++++++++++++
 5 files changed, 52 insertions(+), 8 deletions(-)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/BindExpression.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/BindExpression.java
index 852d13c1412..9409261fc52 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/BindExpression.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/BindExpression.java
@@ -91,7 +91,6 @@ import 
org.apache.doris.nereids.trees.plans.logical.LogicalSort;
 import org.apache.doris.nereids.trees.plans.logical.LogicalSubQueryAlias;
 import org.apache.doris.nereids.trees.plans.logical.LogicalTVFRelation;
 import org.apache.doris.nereids.trees.plans.logical.LogicalUsingJoin;
-import org.apache.doris.nereids.trees.plans.logical.ProjectProcessor;
 import org.apache.doris.nereids.trees.plans.visitor.InferPlanOutputAlias;
 import org.apache.doris.nereids.types.BooleanType;
 import org.apache.doris.nereids.types.StructField;
@@ -353,9 +352,9 @@ public class BindExpression implements AnalysisRuleFactory {
             if 
(childrenProjections.get(i).stream().allMatch(SlotReference.class::isInstance)) 
{
                 newChild = child;
             } else {
-                List<NamedExpression> parentProject = 
childrenProjections.get(i);
-                newChild = ProjectProcessor.tryProcessProject(parentProject, 
child)
-                        .orElseGet(() -> new LogicalProject<>(parentProject, 
child));
+                // projects can only be mereged if it's not distinct
+                // so we should merge projects after 
ProjectWithDistinctToAggregate
+                newChild = new LogicalProject<>(childrenProjections.get(i), 
child);
             }
             newChildren.add(newChild);
             childrenOutputs.add((List<SlotReference>) (List) 
newChild.getOutput());
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/ExpressionRewrite.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/ExpressionRewrite.java
index f48c3ec9750..e41ce5c3992 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/ExpressionRewrite.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/ExpressionRewrite.java
@@ -50,6 +50,7 @@ import 
org.apache.doris.nereids.trees.plans.logical.LogicalSetOperation;
 import org.apache.doris.nereids.trees.plans.logical.LogicalSink;
 import org.apache.doris.nereids.trees.plans.logical.LogicalSort;
 import org.apache.doris.nereids.trees.plans.logical.LogicalTopN;
+import org.apache.doris.nereids.trees.plans.logical.LogicalUnion;
 import org.apache.doris.nereids.trees.plans.logical.LogicalWindow;
 import org.apache.doris.nereids.util.ExpressionUtils;
 import org.apache.doris.nereids.util.Utils;
@@ -368,10 +369,33 @@ public class ExpressionRewrite implements 
RewriteRuleFactory {
                     changed |= result.changed;
                     newSlotsList.add(result.result);
                 }
-                if (!changed) {
-                    return setOperation;
+                if (setOperation instanceof LogicalUnion) {
+                    LogicalUnion logicalUnion = (LogicalUnion) setOperation;
+                    List<List<NamedExpression>> constantExprsList = 
logicalUnion.getConstantExprsList();
+                    ImmutableList.Builder<List<NamedExpression>> 
newConstantListBuilder = ImmutableList.builder();
+                    for (List<NamedExpression> oneRowProject : 
constantExprsList) {
+                        Builder<NamedExpression> rewrittenExprs = ImmutableList
+                                .builderWithExpectedSize(oneRowProject.size());
+                        for (NamedExpression project : oneRowProject) {
+                            NamedExpression newProject = (NamedExpression) 
rewriter.rewrite(project, context);
+                            if (!changed && !project.deepEquals(newProject)) {
+                                changed = true;
+                            }
+                            rewrittenExprs.add(newProject);
+                        }
+                        newConstantListBuilder.add(rewrittenExprs.build());
+                    }
+                    if (!changed) {
+                        return setOperation;
+                    }
+                    return 
logicalUnion.withChildrenAndConstExprsList(setOperation.children(), 
newSlotsList,
+                            newConstantListBuilder.build());
+                } else {
+                    if (!changed) {
+                        return setOperation;
+                    }
+                    return 
setOperation.withChildrenAndTheirOutputs(setOperation.children(), newSlotsList);
                 }
-                return 
setOperation.withChildrenAndTheirOutputs(setOperation.children(), newSlotsList);
             })
             .toRule(RuleType.REWRITE_SET_OPERATION_EXPRESSION);
         }
diff --git 
a/regression-test/data/nereids_rules_p0/project_distinct_to_agg/project_distinct_to_agg.out
 
b/regression-test/data/nereids_rules_p0/project_distinct_to_agg/project_distinct_to_agg.out
index 3b94bae1305..f6e0f7b6cca 100644
--- 
a/regression-test/data/nereids_rules_p0/project_distinct_to_agg/project_distinct_to_agg.out
+++ 
b/regression-test/data/nereids_rules_p0/project_distinct_to_agg/project_distinct_to_agg.out
@@ -51,3 +51,7 @@ PhysicalResultSink
 -- !agg_result --
 7
 
+-- !select --
+1      1
+1      2
+
diff --git 
a/regression-test/data/nereids_rules_p0/unique_function/push_project_into_union_with_unique_function.out
 
b/regression-test/data/nereids_rules_p0/unique_function/push_project_into_union_with_unique_function.out
index d1844e88c74..7af0a74a43a 100644
--- 
a/regression-test/data/nereids_rules_p0/unique_function/push_project_into_union_with_unique_function.out
+++ 
b/regression-test/data/nereids_rules_p0/unique_function/push_project_into_union_with_unique_function.out
@@ -1,7 +1,7 @@
 -- This file is automatically generated. You should know what you did if you 
want to edit this
 -- !push_down_1 --
 PhysicalResultSink
---PhysicalUnion(constantExprsList=[[(100.0 + random()) AS `b`, (100.0 + 
random()) AS `c`], [(200.0 + random()) AS `b`, (200.0 + random()) AS `c`]])
+--PhysicalUnion(constantExprsList=[[(random() + 100.0) AS `b`, (random() + 
100.0) AS `c`], [(random() + 200.0) AS `b`, (random() + 200.0) AS `c`]])
 
 -- !push_down_2 --
 PhysicalResultSink
diff --git 
a/regression-test/suites/nereids_rules_p0/project_distinct_to_agg/project_distinct_to_agg.groovy
 
b/regression-test/suites/nereids_rules_p0/project_distinct_to_agg/project_distinct_to_agg.groovy
index f6afb9539a9..c0d12ddcd2c 100644
--- 
a/regression-test/suites/nereids_rules_p0/project_distinct_to_agg/project_distinct_to_agg.groovy
+++ 
b/regression-test/suites/nereids_rules_p0/project_distinct_to_agg/project_distinct_to_agg.groovy
@@ -29,4 +29,21 @@ suite('project_distinct_to_agg') {
     explainAndOrderResult 'agg',  "select distinct sum(a) from ${tbl}"
 
     sql "drop table if exists ${tbl} force"
+
+    qt_select '''
+        (
+        SELECT
+            DISTINCT t_alias.u_col, cast(1 as bigint)
+        FROM (select [1,1,1] k1) as t
+            lateral view explode(k1) t_alias as u_col
+        )
+        UNION ALL (
+            SELECT
+                DISTINCT 1, t_alias.u_col
+            FROM (select [2,2,2] k1) as t
+                lateral view explode(k1) t_alias as u_col
+        )
+        ORDER BY
+            1, 2;
+    '''
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to