This is an automated email from the ASF dual-hosted git repository.
kxiao pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.0 by this push:
new 0c18819a516 [fix](nereids)unnest in-subquery with agg node in proper
condition #25800 (#26687)
0c18819a516 is described below
commit 0c18819a5160f2ebd57bdde0f36ddc9e2a106e76
Author: starocean999 <[email protected]>
AuthorDate: Thu Nov 9 23:33:57 2023 +0800
[fix](nereids)unnest in-subquery with agg node in proper condition #25800
(#26687)
---
.../nereids/rules/analysis/SubExprAnalyzer.java | 44 ++++++--
.../nereids/rules/analysis/SubqueryToApply.java | 7 +-
.../org/apache/doris/nereids/trees/plans/Plan.java | 7 ++
.../nereids_syntax_p0/sub_query_correlated.out | 21 ++++
.../nereids_syntax_p0/sub_query_correlated.groovy | 116 +++++++++++++++++++++
5 files changed, 182 insertions(+), 13 deletions(-)
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/SubExprAnalyzer.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/SubExprAnalyzer.java
index d64a5516f51..4513d2f7977 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/SubExprAnalyzer.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/SubExprAnalyzer.java
@@ -31,6 +31,7 @@ import org.apache.doris.nereids.trees.expressions.Slot;
import org.apache.doris.nereids.trees.expressions.SubqueryExpr;
import org.apache.doris.nereids.trees.expressions.literal.BooleanLiteral;
import
org.apache.doris.nereids.trees.expressions.visitor.DefaultExpressionRewriter;
+import org.apache.doris.nereids.trees.plans.Plan;
import org.apache.doris.nereids.trees.plans.logical.LogicalAggregate;
import org.apache.doris.nereids.trees.plans.logical.LogicalLimit;
import org.apache.doris.nereids.trees.plans.logical.LogicalPlan;
@@ -38,6 +39,7 @@ import
org.apache.doris.nereids.trees.plans.logical.LogicalPlan;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableSet;
+import java.util.ArrayDeque;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
@@ -89,8 +91,7 @@ class SubExprAnalyzer extends
DefaultExpressionRewriter<CascadesContext> {
AnalyzedResult analyzedResult = analyzeSubquery(expr);
checkOutputColumn(analyzedResult.getLogicalPlan());
- checkHasNotAgg(analyzedResult);
- checkHasGroupBy(analyzedResult);
+ checkNoCorrelatedSlotsUnderAgg(analyzedResult);
checkRootIsLimit(analyzedResult);
return new InSubquery(
@@ -105,7 +106,7 @@ class SubExprAnalyzer extends
DefaultExpressionRewriter<CascadesContext> {
checkOutputColumn(analyzedResult.getLogicalPlan());
checkHasAgg(analyzedResult);
- checkHasGroupBy(analyzedResult);
+ checkHasNoGroupBy(analyzedResult);
return new ScalarSubquery(analyzedResult.getLogicalPlan(),
analyzedResult.getCorrelatedSlots());
}
@@ -135,7 +136,7 @@ class SubExprAnalyzer extends
DefaultExpressionRewriter<CascadesContext> {
}
}
- private void checkHasGroupBy(AnalyzedResult analyzedResult) {
+ private void checkHasNoGroupBy(AnalyzedResult analyzedResult) {
if (!analyzedResult.isCorrelated()) {
return;
}
@@ -145,13 +146,11 @@ class SubExprAnalyzer extends
DefaultExpressionRewriter<CascadesContext> {
}
}
- private void checkHasNotAgg(AnalyzedResult analyzedResult) {
- if (!analyzedResult.isCorrelated()) {
- return;
- }
- if (analyzedResult.hasAgg()) {
- throw new AnalysisException("Unsupported correlated subquery with
grouping and/or aggregation "
- + analyzedResult.getLogicalPlan());
+ private void checkNoCorrelatedSlotsUnderAgg(AnalyzedResult analyzedResult)
{
+ if (analyzedResult.hasCorrelatedSlotsUnderAgg()) {
+ throw new AnalysisException(
+ "Unsupported correlated subquery with grouping and/or
aggregation "
+ + analyzedResult.getLogicalPlan());
}
}
@@ -223,6 +222,29 @@ class SubExprAnalyzer extends
DefaultExpressionRewriter<CascadesContext> {
return false;
}
+ public boolean hasCorrelatedSlotsUnderAgg() {
+ return correlatedSlots.isEmpty() ? false
+ : findAggContainsCorrelatedSlots(logicalPlan,
ImmutableSet.copyOf(correlatedSlots));
+ }
+
+ private boolean findAggContainsCorrelatedSlots(Plan rootPlan,
ImmutableSet<Slot> slots) {
+ ArrayDeque<Plan> planQueue = new ArrayDeque<>();
+ planQueue.add(rootPlan);
+ while (!planQueue.isEmpty()) {
+ Plan plan = planQueue.poll();
+ if (plan instanceof LogicalAggregate) {
+ if (plan.containsSlots(slots)) {
+ return true;
+ }
+ } else {
+ for (Plan child : plan.children()) {
+ planQueue.add(child);
+ }
+ }
+ }
+ return false;
+ }
+
public boolean rootIsLimit() {
return logicalPlan instanceof LogicalLimit;
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/SubqueryToApply.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/SubqueryToApply.java
index 7ea406677ed..4c6a8fdb8ad 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/SubqueryToApply.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/SubqueryToApply.java
@@ -38,6 +38,7 @@ import
org.apache.doris.nereids.trees.expressions.literal.BooleanLiteral;
import
org.apache.doris.nereids.trees.expressions.visitor.DefaultExpressionRewriter;
import org.apache.doris.nereids.trees.plans.Plan;
import org.apache.doris.nereids.trees.plans.algebra.Aggregate;
+import org.apache.doris.nereids.trees.plans.logical.LogicalAggregate;
import org.apache.doris.nereids.trees.plans.logical.LogicalApply;
import org.apache.doris.nereids.trees.plans.logical.LogicalFilter;
import org.apache.doris.nereids.trees.plans.logical.LogicalJoin;
@@ -269,8 +270,10 @@ public class SubqueryToApply implements
AnalysisRuleFactory {
private boolean nonMarkJoinExistsWithAgg(SubqueryExpr exists,
Map<SubqueryExpr, Optional<MarkJoinSlotReference>>
subqueryToMarkJoinSlot) {
return exists instanceof Exists
- && exists.getQueryPlan().anyMatch(Aggregate.class::isInstance)
- && !subqueryToMarkJoinSlot.get(exists).isPresent();
+ && exists.getQueryPlan()
+ .anyMatch(planTreeNode -> planTreeNode instanceof
LogicalAggregate
+ && ((LogicalAggregate<?>)
planTreeNode).getGroupByExpressions().isEmpty())
+ && !subqueryToMarkJoinSlot.get(exists).isPresent();
}
private LogicalPlan addApply(SubqueryExpr subquery, LogicalPlan childPlan,
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/Plan.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/Plan.java
index 2f21b824554..c23c4b0536d 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/Plan.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/Plan.java
@@ -30,6 +30,7 @@ import org.apache.doris.qe.ConnectContext;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableSet;
+import com.google.common.collect.Sets;
import java.util.List;
import java.util.Optional;
@@ -64,6 +65,12 @@ public interface Plan extends TreeNode<Plan> {
return getExpressions().stream().anyMatch(Expression::hasUnbound);
}
+ default boolean containsSlots(ImmutableSet<Slot> slots) {
+ return getExpressions().stream().anyMatch(
+ expression -> !Sets.intersection(slots,
expression.getInputSlots()).isEmpty()
+ || children().stream().anyMatch(plan ->
plan.containsSlots(slots)));
+ }
+
default LogicalProperties computeLogicalProperties() {
throw new IllegalStateException("Not support compute logical
properties for " + getClass().getName());
}
diff --git a/regression-test/data/nereids_syntax_p0/sub_query_correlated.out
b/regression-test/data/nereids_syntax_p0/sub_query_correlated.out
index 647babc200d..c8051946673 100644
--- a/regression-test/data/nereids_syntax_p0/sub_query_correlated.out
+++ b/regression-test/data/nereids_syntax_p0/sub_query_correlated.out
@@ -465,3 +465,24 @@ true
\N
\N
+-- !cir_5218_in_ok --
+4
+
+-- !cir_5218_exists_ok_1 --
+13
+
+-- !cir_5218_exists_ok_2 --
+3
+
+-- !cir_5218_exists_ok_3 --
+5
+
+-- !cir_5218_exists_ok_4 --
+13
+
+-- !cir_5218_exists_ok_5 --
+13
+
+-- !cir_5218_exists_ok_6 --
+0
+
diff --git
a/regression-test/suites/nereids_syntax_p0/sub_query_correlated.groovy
b/regression-test/suites/nereids_syntax_p0/sub_query_correlated.groovy
index 6664ad0c6c7..f64cba11097 100644
--- a/regression-test/suites/nereids_syntax_p0/sub_query_correlated.groovy
+++ b/regression-test/suites/nereids_syntax_p0/sub_query_correlated.groovy
@@ -58,6 +58,10 @@ suite ("sub_query_correlated") {
DROP TABLE IF EXISTS `sub_query_correlated_subquery9`
"""
+ sql """
+ DROP TABLE IF EXISTS `sub_query_correlated_subquery10`
+ """
+
sql """
create table if not exists sub_query_correlated_subquery1
(k1 bigint, k2 bigint)
@@ -128,6 +132,13 @@ suite ("sub_query_correlated") {
properties('replication_num' = '1');
"""
+ sql """
+ create table if not exists sub_query_correlated_subquery10
+ (k1 int, k2 varchar(128), k3 bigint, v1 bigint, v2 bigint)
+ distributed by hash(k2) buckets 1
+ properties('replication_num' = '1');
+ """
+
sql """
insert into sub_query_correlated_subquery1 values (1,2), (1,3), (2,4),
(2,5), (3,3), (3,4), (20,2), (22,3), (24,4)
"""
@@ -532,6 +543,111 @@ suite ("sub_query_correlated") {
select sub_query_correlated_subquery8.k1 in (select
sub_query_correlated_subquery9.k3 from sub_query_correlated_subquery9) from
sub_query_correlated_subquery8 order by k1, k2;
"""
+ qt_cir_5218_in_ok """
+ SELECT count(*)
+ FROM sub_query_correlated_subquery6
+ WHERE k1 IN
+ (SELECT k1
+ FROM
+ (SELECT k1,
+ sum(k3) AS bbb,
+ count(k2) AS aaa
+ FROM sub_query_correlated_subquery7
+ WHERE k1 > 0
+ AND k3 > 0
+ GROUP BY k1 ) y
+ WHERE y.aaa>0
+ AND k1>1);
+ """
+
+ qt_cir_5218_exists_ok_1 """
+ SELECT count(*)
+ FROM sub_query_correlated_subquery6
+ WHERE exists
+ (SELECT k1
+ FROM
+ (SELECT k1,
+ sum(k3) AS bbb,
+ count(k2) AS aaa
+ FROM sub_query_correlated_subquery7
+ WHERE k1 > 0
+ AND k3 > 0
+ GROUP BY k1 ) y
+ WHERE y.aaa>0
+ AND k1>1);
+ """
+
+ qt_cir_5218_exists_ok_2 """
+ SELECT count(*)
+ FROM sub_query_correlated_subquery6
+ WHERE exists
+ (SELECT k1
+ FROM
+ (SELECT k1
+ FROM sub_query_correlated_subquery7
+ WHERE sub_query_correlated_subquery6.k1 > 7
+ GROUP BY k1 ) y);
+ """
+
+ qt_cir_5218_exists_ok_3 """
+ SELECT count(*)
+ FROM sub_query_correlated_subquery6
+ WHERE exists
+ (SELECT k1
+ FROM
+ (SELECT k1
+ FROM sub_query_correlated_subquery7
+ WHERE sub_query_correlated_subquery6.k1 >
sub_query_correlated_subquery7.k3
+ GROUP BY k1 ) y);
+ """
+
+ qt_cir_5218_exists_ok_4 """
+ SELECT count(*)
+ FROM sub_query_correlated_subquery6
+ WHERE exists
+ (SELECT sum(k3)
+ FROM
+ sub_query_correlated_subquery7
+ WHERE sub_query_correlated_subquery6.k1 >
sub_query_correlated_subquery7.k3);
+ """
+
+ qt_cir_5218_exists_ok_5 """
+ SELECT count(*)
+ FROM sub_query_correlated_subquery6
+ WHERE exists
+ (SELECT sum(k3)
+ FROM
+ sub_query_correlated_subquery10);
+ """
+
+ qt_cir_5218_exists_ok_6 """
+ SELECT count(*)
+ FROM sub_query_correlated_subquery6
+ WHERE exists
+ (SELECT sum(k3)
+ FROM
+ sub_query_correlated_subquery10 group by k2);
+ """
+
+ test {
+ sql """
+ SELECT count(*)
+ FROM sub_query_correlated_subquery6
+ WHERE k1 IN
+ (SELECT k1
+ FROM
+ (SELECT k1,
+ sum(k3) AS bbb,
+ count(k2) AS aaa
+ FROM sub_query_correlated_subquery7
+ WHERE k1 > 0
+ AND k3 > 0 and
sub_query_correlated_subquery6.k1 > 2
+ GROUP BY k1 ) y
+ WHERE y.aaa>0
+ AND k1>1); """
+ exception "Unsupported correlated subquery with grouping and/or
aggregation";
+ }
+
// order_qt_doris_6937_2 """
// select * from sub_query_correlated_subquery1 where
sub_query_correlated_subquery1.k1 not in (select
sub_query_correlated_subquery3.k3 from sub_query_correlated_subquery3 where
sub_query_correlated_subquery3.v2 > sub_query_correlated_subquery1.k2) or k1 <
10 order by k1, k2;
// """
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]