This is an automated email from the ASF dual-hosted git repository. starocean999 pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new 17b11086357 [fix](nereids)support uncorrelated subquery in join condition (#26672) 17b11086357 is described below commit 17b110863578a805a73205093b745f030c588fbf Author: starocean999 <40539150+starocean...@users.noreply.github.com> AuthorDate: Mon Nov 13 11:49:11 2023 +0800 [fix](nereids)support uncorrelated subquery in join condition (#26672) sql select * from t1 a join t1 b on b.id in (select 1) and a.id = b.id; will report an error. This pr support uncorrelated subquery in join condition to fix it --- .../nereids/rules/analysis/SubqueryToApply.java | 123 ++++++++++++++++----- .../nereids_syntax_p0/sub_query_correlated.out | 3 + .../nereids_p0/subquery/test_subquery.groovy | 56 ++++++++++ .../nereids_syntax_p0/sub_query_correlated.groovy | 28 +++++ .../suites/query_p0/join/test_join.groovy | 2 +- 5 files changed, 183 insertions(+), 29 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/SubqueryToApply.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/SubqueryToApply.java index 4c6a8fdb8ad..2852db11089 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/SubqueryToApply.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/SubqueryToApply.java @@ -23,7 +23,6 @@ import org.apache.doris.nereids.rules.Rule; import org.apache.doris.nereids.rules.RuleType; import org.apache.doris.nereids.trees.expressions.Alias; import org.apache.doris.nereids.trees.expressions.BinaryOperator; -import org.apache.doris.nereids.trees.expressions.ComparisonPredicate; import org.apache.doris.nereids.trees.expressions.Exists; import org.apache.doris.nereids.trees.expressions.Expression; import org.apache.doris.nereids.trees.expressions.InSubquery; @@ -45,6 +44,7 @@ import org.apache.doris.nereids.trees.plans.logical.LogicalJoin; import org.apache.doris.nereids.trees.plans.logical.LogicalOneRowRelation; import org.apache.doris.nereids.trees.plans.logical.LogicalPlan; import org.apache.doris.nereids.trees.plans.logical.LogicalProject; +import org.apache.doris.nereids.trees.plans.logical.LogicalSort; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableSet; @@ -180,7 +180,13 @@ public class SubqueryToApply implements AnalysisRuleFactory { Collectors.toList())); List<Expression> subqueryConjuncts = joinConjuncts.get(true); if (subqueryConjuncts == null || subqueryConjuncts.stream() - .anyMatch(expr -> !isValidSubqueryConjunct(expr, join.left()))) { + .anyMatch(expr -> !isValidSubqueryConjunct(expr))) { + return join; + } + + List<RelatedInfo> relatedInfoList = collectRelatedInfo( + subqueryConjuncts, join.left(), join.right()); + if (relatedInfoList.stream().anyMatch(info -> info == RelatedInfo.UnSupported)) { return join; } @@ -188,8 +194,9 @@ public class SubqueryToApply implements AnalysisRuleFactory { .<Set<SubqueryExpr>>map(e -> e.collect(SubqueryExpr.class::isInstance)) .collect(ImmutableList.toImmutableList()); ImmutableList.Builder<Expression> newConjuncts = new ImmutableList.Builder<>(); - LogicalPlan applyPlan = null; + LogicalPlan applyPlan; LogicalPlan leftChildPlan = (LogicalPlan) join.left(); + LogicalPlan rightChildPlan = (LogicalPlan) join.right(); // Subquery traversal with the conjunct of and as the granularity. for (int i = 0; i < subqueryExprsList.size(); ++i) { @@ -207,9 +214,14 @@ public class SubqueryToApply implements AnalysisRuleFactory { applyPlan = subqueryToApply( subqueryExprs.stream().collect(ImmutableList.toImmutableList()), - leftChildPlan, context.getSubqueryToMarkJoinSlot(), + relatedInfoList.get(i) == RelatedInfo.RelatedToLeft ? leftChildPlan : rightChildPlan, + context.getSubqueryToMarkJoinSlot(), ctx.cascadesContext, Optional.of(conjunct), false); - leftChildPlan = applyPlan; + if (relatedInfoList.get(i) == RelatedInfo.RelatedToLeft) { + leftChildPlan = applyPlan; + } else { + rightChildPlan = applyPlan; + } newConjuncts.add(conjunct); } List<Expression> simpleConjuncts = joinConjuncts.get(false); @@ -217,34 +229,82 @@ public class SubqueryToApply implements AnalysisRuleFactory { newConjuncts.addAll(simpleConjuncts); } Plan newJoin = join.withConjunctsChildren(join.getHashJoinConjuncts(), - newConjuncts.build(), applyPlan, join.right()); + newConjuncts.build(), leftChildPlan, rightChildPlan); return newJoin; })) ); } - private static boolean isValidSubqueryConjunct(Expression expression, Plan leftChild) { - // the subquery must be uncorrelated subquery or only correlated to the left child - // currently only support the following 4 simple scenarios - // 1. col ComparisonPredicate subquery - // 2. col in (subquery) - // 3. exists (subquery) - // 4. col1 ComparisonPredicate subquery or xxx (no more subquery) - List<Slot> slots = leftChild.getOutput(); - if (expression instanceof ComparisonPredicate && expression.child(1) instanceof ScalarSubquery) { - ScalarSubquery subquery = (ScalarSubquery) expression.child(1); - return slots.containsAll(subquery.getCorrelateSlots()); - } else if (expression instanceof InSubquery) { - return slots.containsAll(((InSubquery) expression).getCorrelateSlots()); - } else if (expression instanceof Exists) { - return slots.containsAll(((Exists) expression).getCorrelateSlots()); - } else { + private static boolean isValidSubqueryConjunct(Expression expression) { + // only support 1 subquery expr in the expression + // don't support expression like subquery1 or subquery2 + return expression.collectToList(SubqueryExpr.class::isInstance).size() == 1; + } + + private enum RelatedInfo { + // both subquery and its output don't related to any child. like (select sum(t.a) from t) > 1 + Unrelated, + // either subquery or its output only related to left child. like bellow: + // tableLeft.a in (select t.a from t) + // 3 in (select t.b from t where t.a = tableLeft.a) + // tableLeft.a > (select sum(t.a) from t where tableLeft.b = t.b) + RelatedToLeft, + // like above, but related to right child + RelatedToRight, + // subquery related to both left and child is not supported: + // tableLeft.a > (select sum(t.a) from t where t.b = tableRight.b) + UnSupported + } + + private ImmutableList<RelatedInfo> collectRelatedInfo(List<Expression> subqueryConjuncts, + Plan leftChild, Plan rightChild) { + int size = subqueryConjuncts.size(); + ImmutableList.Builder<RelatedInfo> correlatedInfoList = new ImmutableList.Builder<>(); + Set<Slot> leftOutputSlots = leftChild.getOutputSet(); + Set<Slot> rightOutputSlots = rightChild.getOutputSet(); + for (int i = 0; i < size; ++i) { + Expression expression = subqueryConjuncts.get(i); List<SubqueryExpr> subqueryExprs = expression.collectToList(SubqueryExpr.class::isInstance); + RelatedInfo relatedInfo = RelatedInfo.UnSupported; if (subqueryExprs.size() == 1) { - return slots.containsAll(subqueryExprs.get(0).getCorrelateSlots()); + SubqueryExpr subqueryExpr = subqueryExprs.get(0); + List<Slot> correlatedSlots = subqueryExpr.getCorrelateSlots(); + if (subqueryExpr instanceof ScalarSubquery) { + Set<Slot> inputSlots = expression.getInputSlots(); + if (correlatedSlots.isEmpty() && inputSlots.isEmpty()) { + relatedInfo = RelatedInfo.Unrelated; + } else if (leftOutputSlots.containsAll(inputSlots) + && leftOutputSlots.containsAll(correlatedSlots)) { + relatedInfo = RelatedInfo.RelatedToLeft; + } else if (rightOutputSlots.containsAll(inputSlots) + && rightOutputSlots.containsAll(correlatedSlots)) { + relatedInfo = RelatedInfo.RelatedToRight; + } + } else if (subqueryExpr instanceof InSubquery) { + InSubquery inSubquery = (InSubquery) subqueryExpr; + Set<Slot> compareSlots = inSubquery.getCompareExpr().getInputSlots(); + if (compareSlots.isEmpty()) { + relatedInfo = RelatedInfo.UnSupported; + } else if (leftOutputSlots.containsAll(compareSlots) + && leftOutputSlots.containsAll(correlatedSlots)) { + relatedInfo = RelatedInfo.RelatedToLeft; + } else if (rightOutputSlots.containsAll(compareSlots) + && rightOutputSlots.containsAll(correlatedSlots)) { + relatedInfo = RelatedInfo.RelatedToRight; + } + } else if (subqueryExpr instanceof Exists) { + if (correlatedSlots.isEmpty()) { + relatedInfo = RelatedInfo.Unrelated; + } else if (leftOutputSlots.containsAll(correlatedSlots)) { + relatedInfo = RelatedInfo.RelatedToLeft; + } else if (rightOutputSlots.containsAll(correlatedSlots)) { + relatedInfo = RelatedInfo.RelatedToRight; + } + } } + correlatedInfoList.add(relatedInfo); } - return false; + return correlatedInfoList.build(); } private LogicalPlan subqueryToApply(List<SubqueryExpr> subqueryExprs, LogicalPlan childPlan, @@ -270,10 +330,17 @@ public class SubqueryToApply implements AnalysisRuleFactory { private boolean nonMarkJoinExistsWithAgg(SubqueryExpr exists, Map<SubqueryExpr, Optional<MarkJoinSlotReference>> subqueryToMarkJoinSlot) { return exists instanceof Exists - && exists.getQueryPlan() - .anyMatch(planTreeNode -> planTreeNode instanceof LogicalAggregate - && ((LogicalAggregate<?>) planTreeNode).getGroupByExpressions().isEmpty()) - && !subqueryToMarkJoinSlot.get(exists).isPresent(); + && !subqueryToMarkJoinSlot.get(exists).isPresent() + && hasTopLevelAggWithoutGroupBy(exists.getQueryPlan()); + } + + private boolean hasTopLevelAggWithoutGroupBy(Plan plan) { + if (plan instanceof LogicalAggregate) { + return ((LogicalAggregate) plan).getGroupByExpressions().isEmpty(); + } else if (plan instanceof LogicalProject || plan instanceof LogicalSort) { + return hasTopLevelAggWithoutGroupBy(plan.child(0)); + } + return false; } private LogicalPlan addApply(SubqueryExpr subquery, LogicalPlan childPlan, diff --git a/regression-test/data/nereids_syntax_p0/sub_query_correlated.out b/regression-test/data/nereids_syntax_p0/sub_query_correlated.out index c8051946673..0564d1884d9 100644 --- a/regression-test/data/nereids_syntax_p0/sub_query_correlated.out +++ b/regression-test/data/nereids_syntax_p0/sub_query_correlated.out @@ -486,3 +486,6 @@ true -- !cir_5218_exists_ok_6 -- 0 +-- !doris_7643 -- +3 3 + diff --git a/regression-test/suites/nereids_p0/subquery/test_subquery.groovy b/regression-test/suites/nereids_p0/subquery/test_subquery.groovy index 1c2dd55676d..01e347031f8 100644 --- a/regression-test/suites/nereids_p0/subquery/test_subquery.groovy +++ b/regression-test/suites/nereids_p0/subquery/test_subquery.groovy @@ -65,6 +65,62 @@ suite("test_subquery") { select * from nereids_test_query_db.baseall where k1 = (select k1 from nereids_test_query_db.baseall order by k1 desc limit 1) """ + sql """DROP TABLE IF EXISTS table_1000_undef_undef""" + sql """DROP TABLE IF EXISTS table_1000_undef_undef2""" + sql """CREATE TABLE `table_1000_undef_undef` ( + `pk` int(11) NULL, + `col_bigint_undef_signed` bigint(20) NULL, + `col_bigint_undef_signed2` bigint(20) NULL + ) ENGINE=OLAP + DUPLICATE KEY(`pk`, `col_bigint_undef_signed`, `col_bigint_undef_signed2`) + COMMENT 'OLAP' + DISTRIBUTED BY HASH(`pk`) BUCKETS 10 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1", + "is_being_synced" = "false", + "storage_format" = "V2", + "light_schema_change" = "true", + "disable_auto_compaction" = "false", + "enable_single_replica_compaction" = "false" + ); """ + + sql """ CREATE TABLE `table_1000_undef_undef2` ( + `pk` int(11) NULL, + `col_bigint_undef_signed` bigint(20) NULL, + `col_bigint_undef_signed2` bigint(20) NULL + ) ENGINE=OLAP + DUPLICATE KEY(`pk`, `col_bigint_undef_signed`, `col_bigint_undef_signed2`) + COMMENT 'OLAP' + DISTRIBUTED BY HASH(`pk`) BUCKETS 10 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1", + "is_being_synced" = "false", + "storage_format" = "V2", + "light_schema_change" = "true", + "disable_auto_compaction" = "false", + "enable_single_replica_compaction" = "false" + );""" + explain { + sql """ + SELECT `col_bigint_undef_signed` '00:39:36' , `col_bigint_undef_signed` '11:19:45', `col_bigint_undef_signed` '11:55:37', `col_bigint_undef_signed2` '19:01:23' + FROM table_1000_undef_undef2 + WHERE EXISTS + (SELECT `col_bigint_undef_signed` '17:38:13' , `col_bigint_undef_signed2` '17:36:21' + FROM table_1000_undef_undef2 + WHERE `col_bigint_undef_signed2` NOT IN + (SELECT `col_bigint_undef_signed` + FROM table_1000_undef_undef2 + WHERE `col_bigint_undef_signed2` < + (SELECT AVG(`col_bigint_undef_signed`) + FROM table_1000_undef_undef2 + WHERE `col_bigint_undef_signed2` < 2)) ) ; + """ + contains("VAGGREGATE") + } + + sql """DROP TABLE IF EXISTS table_1000_undef_undef""" + sql """DROP TABLE IF EXISTS table_1000_undef_undef2""" + sql """drop table if exists test_one_row_relation;""" sql """ CREATE TABLE `test_one_row_relation` ( diff --git a/regression-test/suites/nereids_syntax_p0/sub_query_correlated.groovy b/regression-test/suites/nereids_syntax_p0/sub_query_correlated.groovy index f64cba11097..0e98510e96f 100644 --- a/regression-test/suites/nereids_syntax_p0/sub_query_correlated.groovy +++ b/regression-test/suites/nereids_syntax_p0/sub_query_correlated.groovy @@ -648,6 +648,34 @@ suite ("sub_query_correlated") { exception "Unsupported correlated subquery with grouping and/or aggregation"; } + qt_doris_7643 """ + SELECT sub_query_correlated_subquery6.* + FROM sub_query_correlated_subquery6 + JOIN sub_query_correlated_subquery7 + ON sub_query_correlated_subquery6.k2 = sub_query_correlated_subquery7.k3 + AND EXISTS + (SELECT sub_query_correlated_subquery8.k1 + FROM sub_query_correlated_subquery8 ) + AND sub_query_correlated_subquery6.k2 IN + (SELECT sub_query_correlated_subquery8.k2 + FROM sub_query_correlated_subquery8 ) + AND sub_query_correlated_subquery6.k1 IN + (SELECT sub_query_correlated_subquery8.k2 + FROM sub_query_correlated_subquery8 + WHERE sub_query_correlated_subquery6.k2 = sub_query_correlated_subquery8.k2 ) + AND sub_query_correlated_subquery7.k3 IN + (SELECT sub_query_correlated_subquery8.k1 + FROM sub_query_correlated_subquery8 ) + AND 10 > + (SELECT min(sub_query_correlated_subquery8.k2) + FROM sub_query_correlated_subquery8 ) + AND sub_query_correlated_subquery7.k3 IN + (SELECT sub_query_correlated_subquery8.k2 + FROM sub_query_correlated_subquery8 + WHERE sub_query_correlated_subquery7.v1 = sub_query_correlated_subquery8.k2 ) + ORDER BY sub_query_correlated_subquery6.k1, sub_query_correlated_subquery6.k2; + """ + // order_qt_doris_6937_2 """ // select * from sub_query_correlated_subquery1 where sub_query_correlated_subquery1.k1 not in (select sub_query_correlated_subquery3.k3 from sub_query_correlated_subquery3 where sub_query_correlated_subquery3.v2 > sub_query_correlated_subquery1.k2) or k1 < 10 order by k1, k2; // """ diff --git a/regression-test/suites/query_p0/join/test_join.groovy b/regression-test/suites/query_p0/join/test_join.groovy index e6d2906c0d3..15b010fdfae 100644 --- a/regression-test/suites/query_p0/join/test_join.groovy +++ b/regression-test/suites/query_p0/join/test_join.groovy @@ -1107,7 +1107,7 @@ suite("test_join", "query,p0") { qt_join_on_predicate7"""SELECT t2.k1,t2.k2,t3.k1,t3.k2 FROM baseall t2 LEFT JOIN test t3 ON t2.k2=t3.k2 WHERE t2.k1 = 4 OR (t2.k1 > 4 AND t3.k1 IS NULL) order by 1, 2, 3, 4""" test { - sql "select a.k1 from baseall a join test b on b.k2 in (select 49) and a.k1 = b.k1 order by k1;" + sql "select /*+ SET_VAR(enable_nereids_planner=false) */ a.k1 from baseall a join test b on b.k2 in (select 49) and a.k1 = b.k1 order by k1;" exception "Not support OnClause contain Subquery" } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org