This is an automated email from the ASF dual-hosted git repository.

starocean999 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 17b11086357 [fix](nereids)support uncorrelated subquery in join 
condition (#26672)
17b11086357 is described below

commit 17b110863578a805a73205093b745f030c588fbf
Author: starocean999 <40539150+starocean...@users.noreply.github.com>
AuthorDate: Mon Nov 13 11:49:11 2023 +0800

    [fix](nereids)support uncorrelated subquery in join condition (#26672)
    
    sql select * from t1 a join t1 b on b.id in (select 1) and a.id = b.id; 
will report an error.
    This pr support uncorrelated subquery in join condition to fix it
---
 .../nereids/rules/analysis/SubqueryToApply.java    | 123 ++++++++++++++++-----
 .../nereids_syntax_p0/sub_query_correlated.out     |   3 +
 .../nereids_p0/subquery/test_subquery.groovy       |  56 ++++++++++
 .../nereids_syntax_p0/sub_query_correlated.groovy  |  28 +++++
 .../suites/query_p0/join/test_join.groovy          |   2 +-
 5 files changed, 183 insertions(+), 29 deletions(-)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/SubqueryToApply.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/SubqueryToApply.java
index 4c6a8fdb8ad..2852db11089 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/SubqueryToApply.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/SubqueryToApply.java
@@ -23,7 +23,6 @@ import org.apache.doris.nereids.rules.Rule;
 import org.apache.doris.nereids.rules.RuleType;
 import org.apache.doris.nereids.trees.expressions.Alias;
 import org.apache.doris.nereids.trees.expressions.BinaryOperator;
-import org.apache.doris.nereids.trees.expressions.ComparisonPredicate;
 import org.apache.doris.nereids.trees.expressions.Exists;
 import org.apache.doris.nereids.trees.expressions.Expression;
 import org.apache.doris.nereids.trees.expressions.InSubquery;
@@ -45,6 +44,7 @@ import 
org.apache.doris.nereids.trees.plans.logical.LogicalJoin;
 import org.apache.doris.nereids.trees.plans.logical.LogicalOneRowRelation;
 import org.apache.doris.nereids.trees.plans.logical.LogicalPlan;
 import org.apache.doris.nereids.trees.plans.logical.LogicalProject;
+import org.apache.doris.nereids.trees.plans.logical.LogicalSort;
 
 import com.google.common.collect.ImmutableList;
 import com.google.common.collect.ImmutableSet;
@@ -180,7 +180,13 @@ public class SubqueryToApply implements 
AnalysisRuleFactory {
                                     Collectors.toList()));
                     List<Expression> subqueryConjuncts = 
joinConjuncts.get(true);
                     if (subqueryConjuncts == null || subqueryConjuncts.stream()
-                            .anyMatch(expr -> !isValidSubqueryConjunct(expr, 
join.left()))) {
+                            .anyMatch(expr -> !isValidSubqueryConjunct(expr))) 
{
+                        return join;
+                    }
+
+                    List<RelatedInfo> relatedInfoList = collectRelatedInfo(
+                            subqueryConjuncts, join.left(), join.right());
+                    if (relatedInfoList.stream().anyMatch(info -> info == 
RelatedInfo.UnSupported)) {
                         return join;
                     }
 
@@ -188,8 +194,9 @@ public class SubqueryToApply implements AnalysisRuleFactory 
{
                             .<Set<SubqueryExpr>>map(e -> 
e.collect(SubqueryExpr.class::isInstance))
                             .collect(ImmutableList.toImmutableList());
                     ImmutableList.Builder<Expression> newConjuncts = new 
ImmutableList.Builder<>();
-                    LogicalPlan applyPlan = null;
+                    LogicalPlan applyPlan;
                     LogicalPlan leftChildPlan = (LogicalPlan) join.left();
+                    LogicalPlan rightChildPlan = (LogicalPlan) join.right();
 
                     // Subquery traversal with the conjunct of and as the 
granularity.
                     for (int i = 0; i < subqueryExprsList.size(); ++i) {
@@ -207,9 +214,14 @@ public class SubqueryToApply implements 
AnalysisRuleFactory {
 
                         applyPlan = subqueryToApply(
                                 
subqueryExprs.stream().collect(ImmutableList.toImmutableList()),
-                                leftChildPlan, 
context.getSubqueryToMarkJoinSlot(),
+                                relatedInfoList.get(i) == 
RelatedInfo.RelatedToLeft ? leftChildPlan : rightChildPlan,
+                                context.getSubqueryToMarkJoinSlot(),
                                 ctx.cascadesContext, Optional.of(conjunct), 
false);
-                        leftChildPlan = applyPlan;
+                        if (relatedInfoList.get(i) == 
RelatedInfo.RelatedToLeft) {
+                            leftChildPlan = applyPlan;
+                        } else {
+                            rightChildPlan = applyPlan;
+                        }
                         newConjuncts.add(conjunct);
                     }
                     List<Expression> simpleConjuncts = 
joinConjuncts.get(false);
@@ -217,34 +229,82 @@ public class SubqueryToApply implements 
AnalysisRuleFactory {
                         newConjuncts.addAll(simpleConjuncts);
                     }
                     Plan newJoin = 
join.withConjunctsChildren(join.getHashJoinConjuncts(),
-                            newConjuncts.build(), applyPlan, join.right());
+                            newConjuncts.build(), leftChildPlan, 
rightChildPlan);
                     return newJoin;
                 }))
         );
     }
 
-    private static boolean isValidSubqueryConjunct(Expression expression, Plan 
leftChild) {
-        // the subquery must be uncorrelated subquery or only correlated to 
the left child
-        // currently only support the following 4 simple scenarios
-        // 1. col ComparisonPredicate subquery
-        // 2. col in (subquery)
-        // 3. exists (subquery)
-        // 4. col1 ComparisonPredicate subquery or xxx (no more subquery)
-        List<Slot> slots = leftChild.getOutput();
-        if (expression instanceof ComparisonPredicate && expression.child(1) 
instanceof ScalarSubquery) {
-            ScalarSubquery subquery = (ScalarSubquery) expression.child(1);
-            return slots.containsAll(subquery.getCorrelateSlots());
-        } else if (expression instanceof InSubquery) {
-            return slots.containsAll(((InSubquery) 
expression).getCorrelateSlots());
-        } else if (expression instanceof Exists) {
-            return slots.containsAll(((Exists) 
expression).getCorrelateSlots());
-        } else {
+    private static boolean isValidSubqueryConjunct(Expression expression) {
+        // only support 1 subquery expr in the expression
+        // don't support expression like subquery1 or subquery2
+        return expression.collectToList(SubqueryExpr.class::isInstance).size() 
== 1;
+    }
+
+    private enum RelatedInfo {
+        // both subquery and its output don't related to any child. like 
(select sum(t.a) from t) > 1
+        Unrelated,
+        // either subquery or its output only related to left child. like 
bellow:
+        // tableLeft.a in (select t.a from t)
+        // 3 in (select t.b from t where t.a = tableLeft.a)
+        // tableLeft.a > (select sum(t.a) from t where tableLeft.b = t.b)
+        RelatedToLeft,
+        // like above, but related to right child
+        RelatedToRight,
+        // subquery related to both left and child is not supported:
+        // tableLeft.a > (select sum(t.a) from t where t.b = tableRight.b)
+        UnSupported
+    }
+
+    private ImmutableList<RelatedInfo> collectRelatedInfo(List<Expression> 
subqueryConjuncts,
+            Plan leftChild, Plan rightChild) {
+        int size = subqueryConjuncts.size();
+        ImmutableList.Builder<RelatedInfo> correlatedInfoList = new 
ImmutableList.Builder<>();
+        Set<Slot> leftOutputSlots = leftChild.getOutputSet();
+        Set<Slot> rightOutputSlots = rightChild.getOutputSet();
+        for (int i = 0; i < size; ++i) {
+            Expression expression = subqueryConjuncts.get(i);
             List<SubqueryExpr> subqueryExprs = 
expression.collectToList(SubqueryExpr.class::isInstance);
+            RelatedInfo relatedInfo = RelatedInfo.UnSupported;
             if (subqueryExprs.size() == 1) {
-                return 
slots.containsAll(subqueryExprs.get(0).getCorrelateSlots());
+                SubqueryExpr subqueryExpr = subqueryExprs.get(0);
+                List<Slot> correlatedSlots = subqueryExpr.getCorrelateSlots();
+                if (subqueryExpr instanceof ScalarSubquery) {
+                    Set<Slot> inputSlots = expression.getInputSlots();
+                    if (correlatedSlots.isEmpty() && inputSlots.isEmpty()) {
+                        relatedInfo = RelatedInfo.Unrelated;
+                    } else if (leftOutputSlots.containsAll(inputSlots)
+                            && leftOutputSlots.containsAll(correlatedSlots)) {
+                        relatedInfo = RelatedInfo.RelatedToLeft;
+                    } else if (rightOutputSlots.containsAll(inputSlots)
+                            && rightOutputSlots.containsAll(correlatedSlots)) {
+                        relatedInfo = RelatedInfo.RelatedToRight;
+                    }
+                } else if (subqueryExpr instanceof InSubquery) {
+                    InSubquery inSubquery = (InSubquery) subqueryExpr;
+                    Set<Slot> compareSlots = 
inSubquery.getCompareExpr().getInputSlots();
+                    if (compareSlots.isEmpty()) {
+                        relatedInfo = RelatedInfo.UnSupported;
+                    } else if (leftOutputSlots.containsAll(compareSlots)
+                            && leftOutputSlots.containsAll(correlatedSlots)) {
+                        relatedInfo = RelatedInfo.RelatedToLeft;
+                    } else if (rightOutputSlots.containsAll(compareSlots)
+                            && rightOutputSlots.containsAll(correlatedSlots)) {
+                        relatedInfo = RelatedInfo.RelatedToRight;
+                    }
+                } else if (subqueryExpr instanceof Exists) {
+                    if (correlatedSlots.isEmpty()) {
+                        relatedInfo = RelatedInfo.Unrelated;
+                    } else if (leftOutputSlots.containsAll(correlatedSlots)) {
+                        relatedInfo = RelatedInfo.RelatedToLeft;
+                    } else if (rightOutputSlots.containsAll(correlatedSlots)) {
+                        relatedInfo = RelatedInfo.RelatedToRight;
+                    }
+                }
             }
+            correlatedInfoList.add(relatedInfo);
         }
-        return false;
+        return correlatedInfoList.build();
     }
 
     private LogicalPlan subqueryToApply(List<SubqueryExpr> subqueryExprs, 
LogicalPlan childPlan,
@@ -270,10 +330,17 @@ public class SubqueryToApply implements 
AnalysisRuleFactory {
     private boolean nonMarkJoinExistsWithAgg(SubqueryExpr exists,
                  Map<SubqueryExpr, Optional<MarkJoinSlotReference>> 
subqueryToMarkJoinSlot) {
         return exists instanceof Exists
-                && exists.getQueryPlan()
-                        .anyMatch(planTreeNode -> planTreeNode instanceof 
LogicalAggregate
-                                && ((LogicalAggregate<?>) 
planTreeNode).getGroupByExpressions().isEmpty())
-                && !subqueryToMarkJoinSlot.get(exists).isPresent();
+                && !subqueryToMarkJoinSlot.get(exists).isPresent()
+                && hasTopLevelAggWithoutGroupBy(exists.getQueryPlan());
+    }
+
+    private boolean hasTopLevelAggWithoutGroupBy(Plan plan) {
+        if (plan instanceof LogicalAggregate) {
+            return ((LogicalAggregate) plan).getGroupByExpressions().isEmpty();
+        } else if (plan instanceof LogicalProject || plan instanceof 
LogicalSort) {
+            return hasTopLevelAggWithoutGroupBy(plan.child(0));
+        }
+        return false;
     }
 
     private LogicalPlan addApply(SubqueryExpr subquery, LogicalPlan childPlan,
diff --git a/regression-test/data/nereids_syntax_p0/sub_query_correlated.out 
b/regression-test/data/nereids_syntax_p0/sub_query_correlated.out
index c8051946673..0564d1884d9 100644
--- a/regression-test/data/nereids_syntax_p0/sub_query_correlated.out
+++ b/regression-test/data/nereids_syntax_p0/sub_query_correlated.out
@@ -486,3 +486,6 @@ true
 -- !cir_5218_exists_ok_6 --
 0
 
+-- !doris_7643 --
+3      3
+
diff --git a/regression-test/suites/nereids_p0/subquery/test_subquery.groovy 
b/regression-test/suites/nereids_p0/subquery/test_subquery.groovy
index 1c2dd55676d..01e347031f8 100644
--- a/regression-test/suites/nereids_p0/subquery/test_subquery.groovy
+++ b/regression-test/suites/nereids_p0/subquery/test_subquery.groovy
@@ -65,6 +65,62 @@ suite("test_subquery") {
             select * from nereids_test_query_db.baseall where k1 = (select k1 
from nereids_test_query_db.baseall order by k1 desc limit 1)
         """
 
+    sql """DROP TABLE IF EXISTS table_1000_undef_undef"""
+    sql """DROP TABLE IF EXISTS table_1000_undef_undef2"""
+    sql """CREATE TABLE `table_1000_undef_undef` (
+            `pk` int(11) NULL,
+            `col_bigint_undef_signed` bigint(20) NULL,
+            `col_bigint_undef_signed2` bigint(20) NULL
+            ) ENGINE=OLAP
+            DUPLICATE KEY(`pk`, `col_bigint_undef_signed`, 
`col_bigint_undef_signed2`)
+            COMMENT 'OLAP'
+            DISTRIBUTED BY HASH(`pk`) BUCKETS 10
+            PROPERTIES (
+            "replication_allocation" = "tag.location.default: 1",
+            "is_being_synced" = "false",
+            "storage_format" = "V2",
+            "light_schema_change" = "true",
+            "disable_auto_compaction" = "false",
+            "enable_single_replica_compaction" = "false"
+            );  """
+
+    sql """ CREATE TABLE `table_1000_undef_undef2` (
+            `pk` int(11) NULL,
+            `col_bigint_undef_signed` bigint(20) NULL,
+            `col_bigint_undef_signed2` bigint(20) NULL
+            ) ENGINE=OLAP
+            DUPLICATE KEY(`pk`, `col_bigint_undef_signed`, 
`col_bigint_undef_signed2`)
+            COMMENT 'OLAP'
+            DISTRIBUTED BY HASH(`pk`) BUCKETS 10
+            PROPERTIES (
+            "replication_allocation" = "tag.location.default: 1",
+            "is_being_synced" = "false",
+            "storage_format" = "V2",
+            "light_schema_change" = "true",
+            "disable_auto_compaction" = "false",
+            "enable_single_replica_compaction" = "false"
+            );"""
+    explain {
+        sql """
+            SELECT `col_bigint_undef_signed` '00:39:36' , 
`col_bigint_undef_signed` '11:19:45', `col_bigint_undef_signed` '11:55:37', 
`col_bigint_undef_signed2` '19:01:23'
+                FROM table_1000_undef_undef2
+                WHERE EXISTS 
+                    (SELECT `col_bigint_undef_signed` '17:38:13' , 
`col_bigint_undef_signed2` '17:36:21'
+                    FROM table_1000_undef_undef2
+                    WHERE `col_bigint_undef_signed2` NOT IN 
+                        (SELECT `col_bigint_undef_signed`
+                        FROM table_1000_undef_undef2
+                        WHERE `col_bigint_undef_signed2` < 
+                            (SELECT AVG(`col_bigint_undef_signed`)
+                            FROM table_1000_undef_undef2
+                            WHERE `col_bigint_undef_signed2` < 2)) ) ; 
+        """
+        contains("VAGGREGATE")
+    }
+
+    sql """DROP TABLE IF EXISTS table_1000_undef_undef"""
+    sql """DROP TABLE IF EXISTS table_1000_undef_undef2"""
+
     sql """drop table if exists test_one_row_relation;"""
     sql """
         CREATE TABLE `test_one_row_relation` (
diff --git 
a/regression-test/suites/nereids_syntax_p0/sub_query_correlated.groovy 
b/regression-test/suites/nereids_syntax_p0/sub_query_correlated.groovy
index f64cba11097..0e98510e96f 100644
--- a/regression-test/suites/nereids_syntax_p0/sub_query_correlated.groovy
+++ b/regression-test/suites/nereids_syntax_p0/sub_query_correlated.groovy
@@ -648,6 +648,34 @@ suite ("sub_query_correlated") {
         exception "Unsupported correlated subquery with grouping and/or 
aggregation";
     }
 
+    qt_doris_7643 """
+        SELECT sub_query_correlated_subquery6.*
+        FROM sub_query_correlated_subquery6
+        JOIN sub_query_correlated_subquery7
+            ON sub_query_correlated_subquery6.k2 = 
sub_query_correlated_subquery7.k3
+                AND EXISTS 
+            (SELECT sub_query_correlated_subquery8.k1
+            FROM sub_query_correlated_subquery8 )
+                AND sub_query_correlated_subquery6.k2 IN 
+            (SELECT sub_query_correlated_subquery8.k2
+            FROM sub_query_correlated_subquery8 )
+                AND sub_query_correlated_subquery6.k1 IN 
+            (SELECT sub_query_correlated_subquery8.k2
+            FROM sub_query_correlated_subquery8
+            WHERE sub_query_correlated_subquery6.k2 = 
sub_query_correlated_subquery8.k2 )
+                AND sub_query_correlated_subquery7.k3 IN 
+            (SELECT sub_query_correlated_subquery8.k1
+            FROM sub_query_correlated_subquery8 )
+                AND 10 > 
+            (SELECT min(sub_query_correlated_subquery8.k2)
+            FROM sub_query_correlated_subquery8 )
+                AND sub_query_correlated_subquery7.k3 IN 
+            (SELECT sub_query_correlated_subquery8.k2
+            FROM sub_query_correlated_subquery8
+            WHERE sub_query_correlated_subquery7.v1 = 
sub_query_correlated_subquery8.k2 )
+        ORDER BY  sub_query_correlated_subquery6.k1, 
sub_query_correlated_subquery6.k2; 
+        """
+
     // order_qt_doris_6937_2 """
     //     select * from sub_query_correlated_subquery1 where 
sub_query_correlated_subquery1.k1 not in (select 
sub_query_correlated_subquery3.k3 from sub_query_correlated_subquery3 where 
sub_query_correlated_subquery3.v2 > sub_query_correlated_subquery1.k2) or k1 < 
10 order by k1, k2;
     // """
diff --git a/regression-test/suites/query_p0/join/test_join.groovy 
b/regression-test/suites/query_p0/join/test_join.groovy
index e6d2906c0d3..15b010fdfae 100644
--- a/regression-test/suites/query_p0/join/test_join.groovy
+++ b/regression-test/suites/query_p0/join/test_join.groovy
@@ -1107,7 +1107,7 @@ suite("test_join", "query,p0") {
     qt_join_on_predicate7"""SELECT t2.k1,t2.k2,t3.k1,t3.k2 FROM baseall t2 
LEFT JOIN test t3 ON t2.k2=t3.k2 WHERE t2.k1 = 4 OR (t2.k1 > 4 AND t3.k1 IS 
NULL) order by 1, 2, 3, 4"""
 
     test {
-        sql "select a.k1 from baseall a join test b on b.k2 in (select 49) and 
a.k1 = b.k1 order by k1;"
+        sql "select /*+ SET_VAR(enable_nereids_planner=false) */ a.k1 from 
baseall a join test b on b.k2 in (select 49) and a.k1 = b.k1 order by k1;"
         exception "Not support OnClause contain Subquery"
     }
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to