This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.1 by this push:
     new 80ea98b371a [fix](nereids)subquery unnesting get wrong result if 
correlated conjuncts is not slot_a = slot_b (#37683)
80ea98b371a is described below

commit 80ea98b371a5aa55efa049f501ca305ba136f7a7
Author: starocean999 <40539150+starocean...@users.noreply.github.com>
AuthorDate: Tue Jul 16 15:06:40 2024 +0800

    [fix](nereids)subquery unnesting get wrong result if correlated conjuncts 
is not slot_a = slot_b (#37683)
    
    pick from master https://github.com/apache/doris/pull/37644
    
    ## Proposed changes
    
    Issue Number: close #xxx
    
    <!--Describe your changes.-->
---
 .../doris/nereids/jobs/executor/Rewriter.java      |  6 +-
 .../nereids/rules/analysis/SubExprAnalyzer.java    | 10 +++
 .../rewrite/UnCorrelatedApplyAggregateFilter.java  | 18 +++++-
 .../trees/plans/logical/LogicalProject.java        |  4 ++
 .../java/org/apache/doris/nereids/util/Utils.java  | 56 ++++++++++++++---
 .../nereids_syntax_p0/test_subquery_conjunct.out   | 54 ++++++++++++++++
 .../sub_query_diff_old_optimize.groovy             |  2 +-
 .../test_subquery_conjunct.groovy                  | 71 ++++++++++++++++++++++
 8 files changed, 206 insertions(+), 15 deletions(-)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Rewriter.java 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Rewriter.java
index a13e784c7a8..9df26fce31f 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Rewriter.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Rewriter.java
@@ -200,7 +200,11 @@ public class Rewriter extends AbstractBatchJobExecutor {
                                  *  TODO: group these rules to make sure the 
result plan is what we expected.
                                  */
                                 new CorrelateApplyToUnCorrelateApply(),
-                                new ApplyToJoin()
+                                new ApplyToJoin(),
+                                // UnCorrelatedApplyAggregateFilter rule will 
create new aggregate outputs,
+                                // The later rule CheckPrivileges which 
inherent from ColumnPruning only works
+                                // if the aggregation node is normalized, so 
we need call NormalizeAggregate here
+                                new NormalizeAggregate()
                         )
                 ),
                 // before `Subquery unnesting` topic, some correlate slots 
should have appeared at LogicalApply.left,
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/SubExprAnalyzer.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/SubExprAnalyzer.java
index d52fc96ff91..7bfd5256f6a 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/SubExprAnalyzer.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/SubExprAnalyzer.java
@@ -75,6 +75,11 @@ class SubExprAnalyzer<T> extends 
DefaultExpressionRewriter<T> {
 
     @Override
     public Expression visitExistsSubquery(Exists exists, T context) {
+        LogicalPlan queryPlan = exists.getQueryPlan();
+        // distinct is useless, remove it
+        if (queryPlan instanceof LogicalProject && ((LogicalProject) 
queryPlan).isDistinct()) {
+            exists = exists.withSubquery(((LogicalProject) 
queryPlan).withDistinct(false));
+        }
         AnalyzedResult analyzedResult = analyzeSubquery(exists);
         if (analyzedResult.rootIsLimitZero()) {
             return BooleanLiteral.of(exists.isNot());
@@ -89,6 +94,11 @@ class SubExprAnalyzer<T> extends 
DefaultExpressionRewriter<T> {
 
     @Override
     public Expression visitInSubquery(InSubquery expr, T context) {
+        LogicalPlan queryPlan = expr.getQueryPlan();
+        // distinct is useless, remove it
+        if (queryPlan instanceof LogicalProject && ((LogicalProject) 
queryPlan).isDistinct()) {
+            expr = expr.withSubquery(((LogicalProject) 
queryPlan).withDistinct(false));
+        }
         AnalyzedResult analyzedResult = analyzeSubquery(expr);
 
         checkOutputColumn(analyzedResult.getLogicalPlan());
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/UnCorrelatedApplyAggregateFilter.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/UnCorrelatedApplyAggregateFilter.java
index 211e76710c5..f4acd847652 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/UnCorrelatedApplyAggregateFilter.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/UnCorrelatedApplyAggregateFilter.java
@@ -19,8 +19,10 @@ package org.apache.doris.nereids.rules.rewrite;
 
 import org.apache.doris.nereids.rules.Rule;
 import org.apache.doris.nereids.rules.RuleType;
+import org.apache.doris.nereids.trees.expressions.Alias;
 import org.apache.doris.nereids.trees.expressions.Expression;
 import org.apache.doris.nereids.trees.expressions.NamedExpression;
+import org.apache.doris.nereids.trees.expressions.Slot;
 import org.apache.doris.nereids.trees.plans.Plan;
 import org.apache.doris.nereids.trees.plans.logical.LogicalAggregate;
 import org.apache.doris.nereids.trees.plans.logical.LogicalApply;
@@ -31,6 +33,7 @@ import org.apache.doris.nereids.util.Utils;
 
 import com.google.common.collect.ImmutableList;
 import com.google.common.collect.ImmutableSet;
+import com.google.common.collect.Maps;
 
 import java.util.ArrayList;
 import java.util.List;
@@ -97,10 +100,19 @@ public class UnCorrelatedApplyAggregateFilter implements 
RewriteRuleFactory {
         // pull up correlated filter into apply node
         List<NamedExpression> newAggOutput = new 
ArrayList<>(agg.getOutputExpressions());
         List<Expression> newGroupby =
-                Utils.getCorrelatedSlots(correlatedPredicate, 
apply.getCorrelationSlot());
+                Utils.getUnCorrelatedExprs(correlatedPredicate, 
apply.getCorrelationSlot());
         newGroupby.addAll(agg.getGroupByExpressions());
-        
newAggOutput.addAll(newGroupby.stream().map(NamedExpression.class::cast)
-                .collect(ImmutableList.toImmutableList()));
+        Map<Expression, Slot> unCorrelatedExprToSlot = Maps.newHashMap();
+        for (Expression expression : newGroupby) {
+            if (expression instanceof Slot) {
+                newAggOutput.add((NamedExpression) expression);
+            } else {
+                Alias alias = new Alias(expression);
+                unCorrelatedExprToSlot.put(expression, alias.toSlot());
+                newAggOutput.add(alias);
+            }
+        }
+        correlatedPredicate = ExpressionUtils.replace(correlatedPredicate, 
unCorrelatedExprToSlot);
         LogicalAggregate newAgg = new LogicalAggregate<>(newGroupby, 
newAggOutput,
                 
PlanUtils.filterOrSelf(ImmutableSet.copyOf(unCorrelatedPredicate), 
filter.child()));
         return new LogicalApply<>(apply.getCorrelationSlot(), 
apply.getSubqueryExpr(),
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalProject.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalProject.java
index 24d938636aa..4174d625936 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalProject.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalProject.java
@@ -199,6 +199,10 @@ public class LogicalProject<CHILD_TYPE extends Plan> 
extends LogicalUnary<CHILD_
         return new LogicalProject<>(projects, excepts, isDistinct, 
canEliminate, ImmutableList.of(child));
     }
 
+    public LogicalProject<Plan> withDistinct(boolean isDistinct) {
+        return new LogicalProject<>(projects, excepts, isDistinct, 
canEliminate, children);
+    }
+
     public boolean isDistinct() {
         return isDistinct;
     }
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/util/Utils.java 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/util/Utils.java
index 852e148ef1d..0928833c105 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/util/Utils.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/util/Utils.java
@@ -17,8 +17,11 @@
 
 package org.apache.doris.nereids.util;
 
+import org.apache.doris.nereids.exceptions.AnalysisException;
 import org.apache.doris.nereids.trees.expressions.Cast;
 import org.apache.doris.nereids.trees.expressions.Expression;
+import org.apache.doris.nereids.trees.expressions.Not;
+import org.apache.doris.nereids.trees.expressions.Slot;
 import org.apache.doris.nereids.trees.expressions.SlotReference;
 import org.apache.doris.nereids.trees.expressions.shape.BinaryExpression;
 
@@ -178,18 +181,51 @@ public class Utils {
     }
 
     /**
-     * Get the correlated columns that belong to the subquery,
-     * that is, the correlated columns that can be resolved within the 
subquery.
+     * Get the unCorrelated exprs that belong to the subquery,
+     * that is, the unCorrelated exprs that can be resolved within the 
subquery.
      * eg:
-     * select * from t1 where t1.a = (select sum(t2.b) from t2 where t1.c = 
t2.d));
-     * correlatedPredicates : t1.c = t2.d
-     * correlatedSlots : t1.c
-     * return t2.d
+     * select * from t1 where t1.a = (select sum(t2.b) from t2 where t1.c = 
abs(t2.d));
+     * correlatedPredicates : t1.c = abs(t2.d)
+     * unCorrelatedExprs : abs(t2.d)
+     * return abs(t2.d)
      */
-    public static List<Expression> getCorrelatedSlots(List<Expression> 
correlatedPredicates,
-            List<Expression> correlatedSlots) {
-        return ExpressionUtils.getInputSlotSet(correlatedPredicates).stream()
-                .filter(slot -> 
!correlatedSlots.contains(slot)).collect(Collectors.toList());
+    public static List<Expression> getUnCorrelatedExprs(List<Expression> 
correlatedPredicates,
+                                                        List<Expression> 
correlatedSlots) {
+        List<Expression> unCorrelatedExprs = new ArrayList<>();
+        correlatedPredicates.forEach(predicate -> {
+            if (!(predicate instanceof BinaryExpression) && (!(predicate 
instanceof Not)
+                    || !(predicate.child(0) instanceof BinaryExpression))) {
+                throw new AnalysisException(
+                        "Unsupported correlated subquery with correlated 
predicate "
+                                + predicate.toString());
+            }
+
+            BinaryExpression binaryExpression;
+            if (predicate instanceof Not) {
+                binaryExpression = (BinaryExpression) ((Not) 
predicate).child();
+            } else {
+                binaryExpression = (BinaryExpression) predicate;
+            }
+            Expression left = binaryExpression.left();
+            Expression right = binaryExpression.right();
+            Set<Slot> leftInputSlots = left.getInputSlots();
+            Set<Slot> rightInputSlots = right.getInputSlots();
+            boolean correlatedToLeft = !leftInputSlots.isEmpty()
+                    && 
leftInputSlots.stream().allMatch(correlatedSlots::contains)
+                    && 
rightInputSlots.stream().noneMatch(correlatedSlots::contains);
+            boolean correlatedToRight = !rightInputSlots.isEmpty()
+                    && 
rightInputSlots.stream().allMatch(correlatedSlots::contains)
+                    && 
leftInputSlots.stream().noneMatch(correlatedSlots::contains);
+            if (!correlatedToLeft && !correlatedToRight) {
+                throw new AnalysisException(
+                        "Unsupported correlated subquery with correlated 
predicate " + predicate);
+            } else if (correlatedToLeft && !rightInputSlots.isEmpty()) {
+                unCorrelatedExprs.add(right);
+            } else if (correlatedToRight && !leftInputSlots.isEmpty()) {
+                unCorrelatedExprs.add(left);
+            }
+        });
+        return unCorrelatedExprs;
     }
 
     private static List<Expression> collectCorrelatedSlotsFromChildren(
diff --git a/regression-test/data/nereids_syntax_p0/test_subquery_conjunct.out 
b/regression-test/data/nereids_syntax_p0/test_subquery_conjunct.out
new file mode 100644
index 00000000000..bd1ac12b8ea
--- /dev/null
+++ b/regression-test/data/nereids_syntax_p0/test_subquery_conjunct.out
@@ -0,0 +1,54 @@
+-- This file is automatically generated. You should know what you did if you 
want to edit this
+-- !select_simple_scalar --
+-2     -2
+2      2
+3      2
+
+-- !select_complex_scalar --
+2      2
+3      2
+
+-- !select_simple_in --
+1      1
+2      1
+
+-- !select_complex_in --
+1      1
+2      1
+
+-- !select_simple_not_in --
+-2     -2
+-1     -1
+1      1
+2      1
+2      2
+3      2
+
+-- !select_complex_not_in --
+-2     -2
+-1     -1
+1      1
+2      1
+2      2
+3      2
+
+-- !select_simple_exists --
+-2     -2
+2      2
+3      2
+
+-- !select_complex_exists --
+2      2
+3      2
+
+-- !select_simple_not_exists --
+-1     -1
+1      1
+2      1
+
+-- !select_complex_not_exists --
+-2     -2
+-1     -1
+1      1
+2      1
+
diff --git 
a/regression-test/suites/nereids_syntax_p0/sub_query_diff_old_optimize.groovy 
b/regression-test/suites/nereids_syntax_p0/sub_query_diff_old_optimize.groovy
index 248b89a01ef..3c6b013a275 100644
--- 
a/regression-test/suites/nereids_syntax_p0/sub_query_diff_old_optimize.groovy
+++ 
b/regression-test/suites/nereids_syntax_p0/sub_query_diff_old_optimize.groovy
@@ -193,7 +193,7 @@ suite ("sub_query_diff_old_optimize") {
         sql """
             SELECT DISTINCT k1 FROM sub_query_diff_old_optimize_subquery1 i1 
WHERE ((SELECT count(*) FROM sub_query_diff_old_optimize_subquery1 WHERE ((k1 = 
i1.k1) AND (k2 = 2)) or ((k2 = i1.k1) AND (k2 = 1)) )  > 0);
         """
-        exception "scalar subquery's correlatedPredicates's operator must be 
EQ"
+        exception "Unsupported correlated subquery with correlated predicate"
 
     }
 }
diff --git 
a/regression-test/suites/nereids_syntax_p0/test_subquery_conjunct.groovy 
b/regression-test/suites/nereids_syntax_p0/test_subquery_conjunct.groovy
new file mode 100644
index 00000000000..43b47fc2ca4
--- /dev/null
+++ b/regression-test/suites/nereids_syntax_p0/test_subquery_conjunct.groovy
@@ -0,0 +1,71 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_subquery_conjunct") {
+    sql "set enable_nereids_planner=true"
+    sql "set enable_fallback_to_original_planner=false"
+    sql """drop table if exists subquery_conjunct_table;"""
+    sql """CREATE TABLE `subquery_conjunct_table` (
+            `id` INT NOT NULL,
+            `c1` INT NOT NULL
+            ) ENGINE=OLAP
+            DUPLICATE KEY(`id`, `c1`)
+            DISTRIBUTED BY RANDOM BUCKETS AUTO
+            PROPERTIES (
+            "replication_allocation" = "tag.location.default: 1"
+            );"""
+
+    sql """insert into subquery_conjunct_table values(1, 
1),(2,2),(-1,-1),(-2,-2),(2,1),(3,2);"""
+    qt_select_simple_scalar """select * from subquery_conjunct_table t1 where 
abs(t1.c1) != (select sum(c1) from subquery_conjunct_table t2 where t2.c1 + 
t2.id = t1.c1) order by t1.id, t1.c1;"""
+    qt_select_complex_scalar """select * from subquery_conjunct_table t1 where 
abs(t1.c1) != (select sum(c1) from subquery_conjunct_table t2 where abs(t2.c1 + 
t2.id) = t1.c1) order by t1.id, t1.c1;"""
+    qt_select_simple_in """select * from subquery_conjunct_table t1 where 
abs(t1.c1) in (select c1 from subquery_conjunct_table t2 where t2.c1 + t2.id -1 
 = t1.c1) order by t1.id, t1.c1;"""
+    qt_select_complex_in """select * from subquery_conjunct_table t1 where 
abs(t1.c1) in (select c1 from subquery_conjunct_table t2 where abs(t2.c1+ t2.id 
-1) = t1.c1) order by t1.id, t1.c1;"""
+    qt_select_simple_not_in """select * from subquery_conjunct_table t1 where 
abs(t1.c1) not in (select c1 from subquery_conjunct_table t2 where t2.c1 + 
t2.id = t1.c1) order by t1.id, t1.c1;"""
+    qt_select_complex_not_in """select * from subquery_conjunct_table t1 where 
abs(t1.c1) not in (select c1 from subquery_conjunct_table t2 where abs(t2.c1 + 
t2.id) = t1.c1) order by t1.id, t1.c1;"""
+    qt_select_simple_exists """select * from subquery_conjunct_table t1 where 
exists (select c1 from subquery_conjunct_table t2 where t2.c1 + t2.id = t1.c1) 
order by t1.id, t1.c1;"""
+    qt_select_complex_exists """select * from subquery_conjunct_table t1 where 
exists (select c1 from subquery_conjunct_table t2 where abs(t2.c1 + t2.id) = 
t1.c1) order by t1.id, t1.c1;"""
+    qt_select_simple_not_exists """select * from subquery_conjunct_table t1 
where not exists (select c1 from subquery_conjunct_table t2 where t2.c1 + t2.id 
= t1.c1) order by t1.id, t1.c1;"""
+    qt_select_complex_not_exists """select * from subquery_conjunct_table t1 
where not exists (select c1 from subquery_conjunct_table t2 where abs(t2.c1 + 
t2.id) = t1.c1) order by t1.id, t1.c1;"""
+    test {
+        sql """ select * from subquery_conjunct_table t1 where abs(t1.c1) != 
(select sum(c1) from subquery_conjunct_table t2 where abs(t2.c1) - t1.c1 = 0) 
order by t1.id; """
+        exception "Unsupported correlated subquery with correlated predicate"
+    }
+    test {
+        sql """ select * from subquery_conjunct_table t1 where abs(t1.c1) != ( 
select sum(c1) from subquery_conjunct_table t2 where abs(t2.c1 -1) + t1.id = 
t1.c1) order by t1.id, t1.c1; """
+        exception "Unsupported correlated subquery with correlated predicate"
+    }
+    test {
+        sql """ select * from subquery_conjunct_table t1 where abs(t1.c1) != 
(select sum(c1) from subquery_conjunct_table t2 where abs(t2.c1) > t1.c1) order 
by t1.id; """
+        exception "scalar subquery's correlatedPredicates's operator must be 
EQ"
+    }
+    test {
+        sql """ select * from subquery_conjunct_table t1 where abs(t1.c1) in 
(select sum(c1) from subquery_conjunct_table t2 where t2.c1 + 1 = t1.c1) order 
by t1.id, t1.c1; """
+        exception "Unsupported correlated subquery with grouping and/or 
aggregation"
+    }
+    test {
+        sql """ select * from subquery_conjunct_table t1 where abs(t1.c1) in 
(select sum(c1) from subquery_conjunct_table t2 where abs(t2.c1) = t1.c1) order 
by t1.id, t1.c1; """
+        exception "Unsupported correlated subquery with grouping and/or 
aggregation"
+    }
+    test {
+        sql """ select * from subquery_conjunct_table t1 where abs(t1.c1) not 
in (select sum(c1) from subquery_conjunct_table t2 where t2.c1 + 1= t1.c1) 
order by t1.id, t1.c1; """
+        exception "Unsupported correlated subquery with grouping and/or 
aggregation"
+    }
+    test {
+        sql """ select * from subquery_conjunct_table t1 where abs(t1.c1) not 
in (select sum(c1) from subquery_conjunct_table t2 where abs(t2.c1 -1) = t1.c1) 
order by t1.id, t1.c1; """
+        exception "Unsupported correlated subquery with grouping and/or 
aggregation"
+    }
+}
\ No newline at end of file


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to