This is an automated email from the ASF dual-hosted git repository. yiguolei pushed a commit to branch branch-2.1 in repository https://gitbox.apache.org/repos/asf/doris.git
commit 98106ad60f17d8c6838864f516d3dda1653edf65 Author: starocean999 <40539150+starocean...@users.noreply.github.com> AuthorDate: Wed Feb 21 15:12:59 2024 +0800 [fix](nereids) fix bug of unnest subuqery with having clause (#31152) 1. run PushDownFilterThroughAggregation, PushDownFilterThroughProject and MergeFilters before subquery unnesting 2. should keep plan unchanged even left semi join's condition is true (because the right table may be empty()) 3. PushDownFilterThroughProject need check if filter's input slots are all coming from project's output --- .../doris/nereids/jobs/executor/Rewriter.java | 47 ++++++++++++- .../nereids/rules/rewrite/EliminateSemiJoin.java | 6 +- .../rewrite/PushDownFilterThroughProject.java | 77 +++++++++++++++++----- .../rules/rewrite/EliminateSemiJoinTest.java | 2 +- .../nereids_p0/subquery/subquery_unnesting.out | 4 ++ .../eliminate_join_condition.out | 4 +- .../nereids_p0/subquery/subquery_unnesting.groovy | 2 + 7 files changed, 121 insertions(+), 21 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Rewriter.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Rewriter.java index 8f60fbddfdf..e2d386dc910 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Rewriter.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Rewriter.java @@ -102,6 +102,7 @@ import org.apache.doris.nereids.rules.rewrite.PushConjunctsIntoOdbcScan; import org.apache.doris.nereids.rules.rewrite.PushDownAggThroughJoin; import org.apache.doris.nereids.rules.rewrite.PushDownAggThroughJoinOneSide; import org.apache.doris.nereids.rules.rewrite.PushDownDistinctThroughJoin; +import org.apache.doris.nereids.rules.rewrite.PushDownFilterThroughAggregation; import org.apache.doris.nereids.rules.rewrite.PushDownFilterThroughProject; import org.apache.doris.nereids.rules.rewrite.PushDownLimit; import org.apache.doris.nereids.rules.rewrite.PushDownLimitDistinctThroughJoin; @@ -167,7 +168,51 @@ public class Rewriter extends AbstractBatchJobExecutor { // after doing NormalizeAggregate in analysis job // we need run the following 2 rules to make AGG_SCALAR_SUBQUERY_TO_WINDOW_FUNCTION work bottomUp(new PullUpProjectUnderApply()), - topDown(new PushDownFilterThroughProject()), + topDown( + /* + * for subquery unnest, we need hand sql like + * + * SELECT * + * FROM table1 AS t1 + * WHERE EXISTS + * (SELECT `pk` + * FROM table2 AS t2 + * WHERE t1.pk = t2 .pk + * GROUP BY t2.pk + * HAVING t2.pk > 0) ; + * + * before: + * apply + * / \ + * child Filter(t2.pk > 0) + * | + * Project(t2.pk) + * | + * agg + * | + * Project(t2.pk) + * | + * Filter(t1.pk=t2.pk) + * | + * child + * + * after: + * apply + * / \ + * child agg + * | + * Project(t2.pk) + * | + * Filter(t1.pk=t2.pk and t2.pk >0) + * | + * child + * + * then PullUpCorrelatedFilterUnderApplyAggregateProject rule can match the node pattern + */ + new PushDownFilterThroughAggregation(), + new PushDownFilterThroughProject(), + new MergeFilters() + ), custom(RuleType.AGG_SCALAR_SUBQUERY_TO_WINDOW_FUNCTION, AggScalarSubQueryToWindowFunction::new), bottomUp( diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/EliminateSemiJoin.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/EliminateSemiJoin.java index f514a1d14e9..808dcd7b185 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/EliminateSemiJoin.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/EliminateSemiJoin.java @@ -57,8 +57,10 @@ public class EliminateSemiJoin extends OneRewriteRuleFactory { } else { return null; } - if (joinType == JoinType.LEFT_SEMI_JOIN && condition - || (joinType == JoinType.LEFT_ANTI_JOIN && !condition)) { + if (joinType == JoinType.LEFT_SEMI_JOIN && condition) { + // the right table may be empty, we need keep plan unchanged + return null; + } else if (joinType == JoinType.LEFT_ANTI_JOIN && !condition) { return join.left(); } else if (joinType == JoinType.LEFT_SEMI_JOIN && !condition || (joinType == JoinType.LEFT_ANTI_JOIN && condition)) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PushDownFilterThroughProject.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PushDownFilterThroughProject.java index c0fe79fe01d..77c90820a25 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PushDownFilterThroughProject.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PushDownFilterThroughProject.java @@ -17,18 +17,24 @@ package org.apache.doris.nereids.rules.rewrite; +import org.apache.doris.common.Pair; import org.apache.doris.nereids.rules.Rule; import org.apache.doris.nereids.rules.RuleType; +import org.apache.doris.nereids.trees.expressions.Expression; +import org.apache.doris.nereids.trees.expressions.Slot; import org.apache.doris.nereids.trees.expressions.WindowExpression; import org.apache.doris.nereids.trees.plans.Plan; import org.apache.doris.nereids.trees.plans.logical.LogicalFilter; import org.apache.doris.nereids.trees.plans.logical.LogicalLimit; import org.apache.doris.nereids.trees.plans.logical.LogicalProject; import org.apache.doris.nereids.util.ExpressionUtils; +import org.apache.doris.nereids.util.PlanUtils; import com.google.common.collect.ImmutableList; +import com.google.common.collect.Sets; import java.util.List; +import java.util.Set; /** * Push down filter through project. @@ -37,6 +43,7 @@ import java.util.List; * output: * project(c+d as a, e as b) -> filter(c+d>2, e=0). */ + public class PushDownFilterThroughProject implements RewriteRuleFactory { @Override public List<Rule> buildRules() { @@ -53,27 +60,65 @@ public class PushDownFilterThroughProject implements RewriteRuleFactory { .whenNot(filter -> filter.child().child().getProjects().stream() .anyMatch(expr -> expr.anyMatch(WindowExpression.class::isInstance))) .whenNot(filter -> filter.child().child().hasPushedDownToProjectionFunctions()) - .then(filter -> { - LogicalLimit<LogicalProject<Plan>> limit = filter.child(); - LogicalProject<Plan> project = limit.child(); - - return project.withProjectsAndChild(project.getProjects(), - new LogicalFilter<>( - ExpressionUtils.replace(filter.getConjuncts(), - project.getAliasToProducer()), - limit.withChildren(project.child()))); - }).toRule(RuleType.PUSH_DOWN_FILTER_THROUGH_PROJECT_UNDER_LIMIT) + .then(PushDownFilterThroughProject::pushdownFilterThroughLimitProject) + .toRule(RuleType.PUSH_DOWN_FILTER_THROUGH_PROJECT_UNDER_LIMIT) ); } /** pushdown Filter through project */ - public static Plan pushdownFilterThroughProject(LogicalFilter<LogicalProject<Plan>> filter) { + private static Plan pushdownFilterThroughProject(LogicalFilter<LogicalProject<Plan>> filter) { LogicalProject<Plan> project = filter.child(); - return project.withChildren( + Set<Slot> childOutputs = project.getOutputSet(); + // we need run this rule before subquey unnesting + // therefore the conjuncts may contain slots from outer query + // we should only push down conjuncts without any outer query's slot + // so we split the conjuncts into two parts: + // splitConjuncts.first -> conjuncts having outer query slots which should NOT be pushed down + // splitConjuncts.second -> conjuncts without any outer query slots which should be pushed down + Pair<Set<Expression>, Set<Expression>> splitConjuncts = + splitConjunctsByChildOutput(filter.getConjuncts(), childOutputs); + if (splitConjuncts.second.isEmpty()) { + // all conjuncts contain outer query's slots, no conjunct can be pushed down + // just return unchanged plan + return null; + } + project = (LogicalProject<Plan>) project.withChildren(new LogicalFilter<>( + ExpressionUtils.replace(splitConjuncts.second, project.getAliasToProducer()), + project.child())); + return PlanUtils.filterOrSelf(splitConjuncts.first, project); + } + + private static Plan pushdownFilterThroughLimitProject( + LogicalFilter<LogicalLimit<LogicalProject<Plan>>> filter) { + LogicalLimit<LogicalProject<Plan>> limit = filter.child(); + LogicalProject<Plan> project = limit.child(); + Set<Slot> childOutputs = project.getOutputSet(); + // split the conjuncts by child's output + Pair<Set<Expression>, Set<Expression>> splitConjuncts = + splitConjunctsByChildOutput(filter.getConjuncts(), childOutputs); + if (splitConjuncts.second.isEmpty()) { + return null; + } + project = project.withProjectsAndChild(project.getProjects(), new LogicalFilter<>( - ExpressionUtils.replace(filter.getConjuncts(), project.getAliasToProducer()), - project.child() - ) - ); + ExpressionUtils.replace(splitConjuncts.second, + project.getAliasToProducer()), + limit.withChildren(project.child()))); + return PlanUtils.filterOrSelf(splitConjuncts.first, project); + } + + private static Pair<Set<Expression>, Set<Expression>> splitConjunctsByChildOutput( + Set<Expression> conjuncts, Set<Slot> childOutputs) { + Set<Expression> pushDownPredicates = Sets.newLinkedHashSet(); + Set<Expression> remainPredicates = Sets.newLinkedHashSet(); + conjuncts.forEach(conjunct -> { + Set<Slot> conjunctSlots = conjunct.getInputSlots(); + if (childOutputs.containsAll(conjunctSlots)) { + pushDownPredicates.add(conjunct); + } else { + remainPredicates.add(conjunct); + } + }); + return Pair.of(remainPredicates, pushDownPredicates); } } diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/EliminateSemiJoinTest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/EliminateSemiJoinTest.java index 69c1f41d90b..61218ae6dc2 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/EliminateSemiJoinTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/EliminateSemiJoinTest.java @@ -50,7 +50,7 @@ class EliminateSemiJoinTest extends TestWithFeService implements MemoPatternMatc .rewrite() .matches( logicalResultSink( - logicalOlapScan() + logicalProject(logicalJoin()) ) ); } diff --git a/regression-test/data/nereids_p0/subquery/subquery_unnesting.out b/regression-test/data/nereids_p0/subquery/subquery_unnesting.out index df30f668188..e262f19b212 100644 --- a/regression-test/data/nereids_p0/subquery/subquery_unnesting.out +++ b/regression-test/data/nereids_p0/subquery/subquery_unnesting.out @@ -1504,3 +1504,7 @@ 22 3 24 4 +-- !select61 -- + +-- !select62 -- + diff --git a/regression-test/data/nereids_rules_p0/eliminate_join_condition/eliminate_join_condition.out b/regression-test/data/nereids_rules_p0/eliminate_join_condition/eliminate_join_condition.out index 4c765dcf37f..571a81995eb 100644 --- a/regression-test/data/nereids_rules_p0/eliminate_join_condition/eliminate_join_condition.out +++ b/regression-test/data/nereids_rules_p0/eliminate_join_condition/eliminate_join_condition.out @@ -25,7 +25,9 @@ PhysicalResultSink -- !left_semi_join -- PhysicalResultSink ---PhysicalOlapScan[t] +--NestedLoopJoin[LEFT_SEMI_JOIN] +----PhysicalOlapScan[t] +----PhysicalOlapScan[t] -- !left_anti_join -- PhysicalResultSink diff --git a/regression-test/suites/nereids_p0/subquery/subquery_unnesting.groovy b/regression-test/suites/nereids_p0/subquery/subquery_unnesting.groovy index 2bae4cc7bb1..174a72df621 100644 --- a/regression-test/suites/nereids_p0/subquery/subquery_unnesting.groovy +++ b/regression-test/suites/nereids_p0/subquery/subquery_unnesting.groovy @@ -130,4 +130,6 @@ suite ("subquery_unnesting") { qt_select58 """select t1.* from t1 left join t2 on t1.k2 = t2.k3 and not exists ( select t3.k1 from t3 ) or t1.k1 < 10 order by t1.k1, t1.k2;""" qt_select59 """select t1.* from t1 left join t2 on t1.k2 = t2.k3 and not exists ( select t3.k1 from t3 ) order by t1.k1, t1.k2;""" qt_select60 """select * from t1 where exists(select distinct k1 from t2 where t1.k1 > t2.k3 or t1.k2 < t2.v1) order by t1.k1, t1.k2;""" + qt_select61 """SELECT * FROM t1 AS t1 WHERE EXISTS (SELECT k1 FROM t1 AS t2 WHERE t1.k1 <> t2.k1 + 7 GROUP BY k1 HAVING k1 >= 100);""" + qt_select62 """select * from t1 left semi join ( select * from t1 where t1.k1 < -1 ) l on true;""" } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org