This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch branch-4.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-4.0 by this push:
new bb383f1620e branch-4.0: [fix](distinct)fix distinct info is lost when
union's children are distinct #60085 (#60176)
bb383f1620e is described below
commit bb383f1620e0cd2a5d7d382975916813b97eddd7
Author: github-actions[bot]
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Sat Jan 24 12:30:46 2026 +0800
branch-4.0: [fix](distinct)fix distinct info is lost when union's children
are distinct #60085 (#60176)
Cherry-picked from #60085
Co-authored-by: starocean999 <[email protected]>
---
.../nereids/rules/analysis/BindExpression.java | 7 +++--
.../rules/expression/ExpressionRewrite.java | 30 +++++++++++++++++++---
.../project_distinct_to_agg.out | 4 +++
...ush_project_into_union_with_unique_function.out | 2 +-
.../project_distinct_to_agg.groovy | 17 ++++++++++++
5 files changed, 52 insertions(+), 8 deletions(-)
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/BindExpression.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/BindExpression.java
index 852d13c1412..9409261fc52 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/BindExpression.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/BindExpression.java
@@ -91,7 +91,6 @@ import
org.apache.doris.nereids.trees.plans.logical.LogicalSort;
import org.apache.doris.nereids.trees.plans.logical.LogicalSubQueryAlias;
import org.apache.doris.nereids.trees.plans.logical.LogicalTVFRelation;
import org.apache.doris.nereids.trees.plans.logical.LogicalUsingJoin;
-import org.apache.doris.nereids.trees.plans.logical.ProjectProcessor;
import org.apache.doris.nereids.trees.plans.visitor.InferPlanOutputAlias;
import org.apache.doris.nereids.types.BooleanType;
import org.apache.doris.nereids.types.StructField;
@@ -353,9 +352,9 @@ public class BindExpression implements AnalysisRuleFactory {
if
(childrenProjections.get(i).stream().allMatch(SlotReference.class::isInstance))
{
newChild = child;
} else {
- List<NamedExpression> parentProject =
childrenProjections.get(i);
- newChild = ProjectProcessor.tryProcessProject(parentProject,
child)
- .orElseGet(() -> new LogicalProject<>(parentProject,
child));
+ // projects can only be mereged if it's not distinct
+ // so we should merge projects after
ProjectWithDistinctToAggregate
+ newChild = new LogicalProject<>(childrenProjections.get(i),
child);
}
newChildren.add(newChild);
childrenOutputs.add((List<SlotReference>) (List)
newChild.getOutput());
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/ExpressionRewrite.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/ExpressionRewrite.java
index f48c3ec9750..e41ce5c3992 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/ExpressionRewrite.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/ExpressionRewrite.java
@@ -50,6 +50,7 @@ import
org.apache.doris.nereids.trees.plans.logical.LogicalSetOperation;
import org.apache.doris.nereids.trees.plans.logical.LogicalSink;
import org.apache.doris.nereids.trees.plans.logical.LogicalSort;
import org.apache.doris.nereids.trees.plans.logical.LogicalTopN;
+import org.apache.doris.nereids.trees.plans.logical.LogicalUnion;
import org.apache.doris.nereids.trees.plans.logical.LogicalWindow;
import org.apache.doris.nereids.util.ExpressionUtils;
import org.apache.doris.nereids.util.Utils;
@@ -368,10 +369,33 @@ public class ExpressionRewrite implements
RewriteRuleFactory {
changed |= result.changed;
newSlotsList.add(result.result);
}
- if (!changed) {
- return setOperation;
+ if (setOperation instanceof LogicalUnion) {
+ LogicalUnion logicalUnion = (LogicalUnion) setOperation;
+ List<List<NamedExpression>> constantExprsList =
logicalUnion.getConstantExprsList();
+ ImmutableList.Builder<List<NamedExpression>>
newConstantListBuilder = ImmutableList.builder();
+ for (List<NamedExpression> oneRowProject :
constantExprsList) {
+ Builder<NamedExpression> rewrittenExprs = ImmutableList
+ .builderWithExpectedSize(oneRowProject.size());
+ for (NamedExpression project : oneRowProject) {
+ NamedExpression newProject = (NamedExpression)
rewriter.rewrite(project, context);
+ if (!changed && !project.deepEquals(newProject)) {
+ changed = true;
+ }
+ rewrittenExprs.add(newProject);
+ }
+ newConstantListBuilder.add(rewrittenExprs.build());
+ }
+ if (!changed) {
+ return setOperation;
+ }
+ return
logicalUnion.withChildrenAndConstExprsList(setOperation.children(),
newSlotsList,
+ newConstantListBuilder.build());
+ } else {
+ if (!changed) {
+ return setOperation;
+ }
+ return
setOperation.withChildrenAndTheirOutputs(setOperation.children(), newSlotsList);
}
- return
setOperation.withChildrenAndTheirOutputs(setOperation.children(), newSlotsList);
})
.toRule(RuleType.REWRITE_SET_OPERATION_EXPRESSION);
}
diff --git
a/regression-test/data/nereids_rules_p0/project_distinct_to_agg/project_distinct_to_agg.out
b/regression-test/data/nereids_rules_p0/project_distinct_to_agg/project_distinct_to_agg.out
index 3b94bae1305..f6e0f7b6cca 100644
---
a/regression-test/data/nereids_rules_p0/project_distinct_to_agg/project_distinct_to_agg.out
+++
b/regression-test/data/nereids_rules_p0/project_distinct_to_agg/project_distinct_to_agg.out
@@ -51,3 +51,7 @@ PhysicalResultSink
-- !agg_result --
7
+-- !select --
+1 1
+1 2
+
diff --git
a/regression-test/data/nereids_rules_p0/unique_function/push_project_into_union_with_unique_function.out
b/regression-test/data/nereids_rules_p0/unique_function/push_project_into_union_with_unique_function.out
index d1844e88c74..7af0a74a43a 100644
---
a/regression-test/data/nereids_rules_p0/unique_function/push_project_into_union_with_unique_function.out
+++
b/regression-test/data/nereids_rules_p0/unique_function/push_project_into_union_with_unique_function.out
@@ -1,7 +1,7 @@
-- This file is automatically generated. You should know what you did if you
want to edit this
-- !push_down_1 --
PhysicalResultSink
---PhysicalUnion(constantExprsList=[[(100.0 + random()) AS `b`, (100.0 +
random()) AS `c`], [(200.0 + random()) AS `b`, (200.0 + random()) AS `c`]])
+--PhysicalUnion(constantExprsList=[[(random() + 100.0) AS `b`, (random() +
100.0) AS `c`], [(random() + 200.0) AS `b`, (random() + 200.0) AS `c`]])
-- !push_down_2 --
PhysicalResultSink
diff --git
a/regression-test/suites/nereids_rules_p0/project_distinct_to_agg/project_distinct_to_agg.groovy
b/regression-test/suites/nereids_rules_p0/project_distinct_to_agg/project_distinct_to_agg.groovy
index f6afb9539a9..c0d12ddcd2c 100644
---
a/regression-test/suites/nereids_rules_p0/project_distinct_to_agg/project_distinct_to_agg.groovy
+++
b/regression-test/suites/nereids_rules_p0/project_distinct_to_agg/project_distinct_to_agg.groovy
@@ -29,4 +29,21 @@ suite('project_distinct_to_agg') {
explainAndOrderResult 'agg', "select distinct sum(a) from ${tbl}"
sql "drop table if exists ${tbl} force"
+
+ qt_select '''
+ (
+ SELECT
+ DISTINCT t_alias.u_col, cast(1 as bigint)
+ FROM (select [1,1,1] k1) as t
+ lateral view explode(k1) t_alias as u_col
+ )
+ UNION ALL (
+ SELECT
+ DISTINCT 1, t_alias.u_col
+ FROM (select [2,2,2] k1) as t
+ lateral view explode(k1) t_alias as u_col
+ )
+ ORDER BY
+ 1, 2;
+ '''
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]