This is an automated email from the ASF dual-hosted git repository. kxiao pushed a commit to branch branch-2.0 in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.0 by this push: new f345ccd7710 [fix](nereids)subquery unnest need handle subquery in Not expr correnctly #28713 (#28829) f345ccd7710 is described below commit f345ccd7710869142558aefa92254d1a8bf475cc Author: starocean999 <40539150+starocean...@users.noreply.github.com> AuthorDate: Fri Dec 22 14:22:15 2023 +0800 [fix](nereids)subquery unnest need handle subquery in Not expr correnctly #28713 (#28829) --- .../nereids/rules/analysis/SubqueryToApply.java | 35 ++++++-- .../data/nereids_p0/subquery/test_subquery.out | 4 + .../nereids_p0/subquery/test_subquery.groovy | 93 ++++++++++++++++++++++ 3 files changed, 127 insertions(+), 5 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/SubqueryToApply.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/SubqueryToApply.java index 1b2f7c898bb..9057104599f 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/SubqueryToApply.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/SubqueryToApply.java @@ -23,6 +23,7 @@ import org.apache.doris.nereids.rules.Rule; import org.apache.doris.nereids.rules.RuleType; import org.apache.doris.nereids.trees.TreeNode; import org.apache.doris.nereids.trees.expressions.Alias; +import org.apache.doris.nereids.trees.expressions.And; import org.apache.doris.nereids.trees.expressions.BinaryOperator; import org.apache.doris.nereids.trees.expressions.Exists; import org.apache.doris.nereids.trees.expressions.Expression; @@ -30,6 +31,7 @@ import org.apache.doris.nereids.trees.expressions.InSubquery; import org.apache.doris.nereids.trees.expressions.ListQuery; import org.apache.doris.nereids.trees.expressions.MarkJoinSlotReference; import org.apache.doris.nereids.trees.expressions.NamedExpression; +import org.apache.doris.nereids.trees.expressions.Not; import org.apache.doris.nereids.trees.expressions.Or; import org.apache.doris.nereids.trees.expressions.ScalarSubquery; import org.apache.doris.nereids.trees.expressions.Slot; @@ -38,7 +40,6 @@ import org.apache.doris.nereids.trees.expressions.SubqueryExpr; import org.apache.doris.nereids.trees.expressions.literal.BooleanLiteral; import org.apache.doris.nereids.trees.expressions.visitor.DefaultExpressionRewriter; import org.apache.doris.nereids.trees.plans.Plan; -import org.apache.doris.nereids.trees.plans.algebra.Aggregate; import org.apache.doris.nereids.trees.plans.logical.LogicalAggregate; import org.apache.doris.nereids.trees.plans.logical.LogicalApply; import org.apache.doris.nereids.trees.plans.logical.LogicalFilter; @@ -73,7 +74,8 @@ public class SubqueryToApply implements AnalysisRuleFactory { RuleType.FILTER_SUBQUERY_TO_APPLY.build( logicalFilter().thenApply(ctx -> { LogicalFilter<Plan> filter = ctx.root; - + boolean shouldOutputMarkJoinSlot = filter.getConjuncts().stream() + .anyMatch(expr -> shouldOutputMarkJoinSlot(expr, SearchState.SearchNot)); ImmutableList<Set<SubqueryExpr>> subqueryExprsList = filter.getConjuncts().stream() .<Set<SubqueryExpr>>map(e -> e.collect(SubqueryToApply::canConvertToSupply)) .collect(ImmutableList.toImmutableList()); @@ -98,7 +100,7 @@ public class SubqueryToApply implements AnalysisRuleFactory { // first step: Replace the subquery of predicate in LogicalFilter // second step: Replace subquery with LogicalApply ReplaceSubquery replaceSubquery = new ReplaceSubquery( - ctx.statementContext, false); + ctx.statementContext, shouldOutputMarkJoinSlot); SubqueryContext context = new SubqueryContext(subqueryExprs); Expression conjunct = replaceSubquery.replace(oldConjuncts.get(i), context); @@ -341,7 +343,7 @@ public class SubqueryToApply implements AnalysisRuleFactory { && hasTopLevelAggWithoutGroupBy(exists.getQueryPlan()); } - private boolean hasTopLevelAggWithoutGroupBy(Plan plan) { + private static boolean hasTopLevelAggWithoutGroupBy(Plan plan) { if (plan instanceof LogicalAggregate) { return ((LogicalAggregate) plan).getGroupByExpressions().isEmpty(); } else if (plan instanceof LogicalProject || plan instanceof LogicalSort) { @@ -425,7 +427,7 @@ public class SubqueryToApply implements AnalysisRuleFactory { // it will always consider the returned result to be true boolean needCreateMarkJoinSlot = isMarkJoin || shouldOutputMarkJoinSlot; MarkJoinSlotReference markJoinSlotReference = null; - if (exists.getQueryPlan().anyMatch(Aggregate.class::isInstance) && needCreateMarkJoinSlot) { + if (hasTopLevelAggWithoutGroupBy(exists.getQueryPlan()) && needCreateMarkJoinSlot) { markJoinSlotReference = new MarkJoinSlotReference(statementContext.generateColumnName(), true); } else if (needCreateMarkJoinSlot) { @@ -503,4 +505,27 @@ public class SubqueryToApply implements AnalysisRuleFactory { } + private enum SearchState { + SearchNot, + SearchAnd, + SearchExistsOrInSubquery + } + + private boolean shouldOutputMarkJoinSlot(Expression expr, SearchState searchState) { + if (searchState == SearchState.SearchNot && expr instanceof Not) { + if (shouldOutputMarkJoinSlot(((Not) expr).child(), SearchState.SearchAnd)) { + return true; + } + } else if (searchState == SearchState.SearchAnd && expr instanceof And) { + for (Expression child : expr.children()) { + if (shouldOutputMarkJoinSlot(child, SearchState.SearchExistsOrInSubquery)) { + return true; + } + } + } else if (searchState == SearchState.SearchExistsOrInSubquery + && (expr instanceof InSubquery || expr instanceof Exists)) { + return true; + } + return false; + } } diff --git a/regression-test/data/nereids_p0/subquery/test_subquery.out b/regression-test/data/nereids_p0/subquery/test_subquery.out index 2344cedddd5..004aa0bcad0 100644 --- a/regression-test/data/nereids_p0/subquery/test_subquery.out +++ b/regression-test/data/nereids_p0/subquery/test_subquery.out @@ -26,3 +26,7 @@ true 15 1992 3021 11011920 0.000 true 9999-12-12 2015-04-02T00:00 3.141592653 2 -- !sql_mark_join -- 1 +-- !select_sub -- +1 9 +2 \N + diff --git a/regression-test/suites/nereids_p0/subquery/test_subquery.groovy b/regression-test/suites/nereids_p0/subquery/test_subquery.groovy index c522adf43e1..e84ceaa9682 100644 --- a/regression-test/suites/nereids_p0/subquery/test_subquery.groovy +++ b/regression-test/suites/nereids_p0/subquery/test_subquery.groovy @@ -150,4 +150,97 @@ suite("test_subquery") { qt_sql_mark_join """with A as (select count(*) n1 from test_one_row_relation where exists (select 1 from test_one_row_relation t where t.user_id = test_one_row_relation.user_id) or 1 = 1) select * from A;""" sql """drop table if exists test_one_row_relation;""" + + sql """drop table if exists subquery_test_t1;""" + sql """drop table if exists subquery_test_t2;""" + sql """create table subquery_test_t1 ( + id int + ) + UNIQUE KEY (`id`) + DISTRIBUTED BY HASH(`id`) BUCKETS 1 + PROPERTIES ("replication_allocation" = "tag.location.default: 1");""" + sql """create table subquery_test_t2 ( + id int + ) + UNIQUE KEY (`id`) + DISTRIBUTED BY HASH(`id`) BUCKETS 1 + PROPERTIES ("replication_allocation" = "tag.location.default: 1");""" + + explain { + sql("""analyzed plan select subquery_test_t1.id from subquery_test_t1 + where + not ( + exists(select 1 from subquery_test_t2 where subquery_test_t1.id = subquery_test_t2.id and subquery_test_t2.id = 5) + and + exists(select 1 from subquery_test_t2 where subquery_test_t1.id = subquery_test_t2.id and subquery_test_t2.id = 6) + ); """) + contains("isMarkJoin=true") + } + explain { + sql("""analyzed plan select subquery_test_t1.id from subquery_test_t1 + where + not ( + subquery_test_t1.id > 10 + and + exists(select 1 from subquery_test_t2 where subquery_test_t1.id = subquery_test_t2.id and subquery_test_t2.id = 6) + );""") + contains("isMarkJoin=true") + } + explain { + sql("""analyzed plan select subquery_test_t1.id from subquery_test_t1 + where + not ( + subquery_test_t1.id > 10 + and + subquery_test_t1.id in (select 1 from subquery_test_t2 where subquery_test_t1.id = subquery_test_t2.id and subquery_test_t2.id = 6) + ); """) + contains("isMarkJoin=true") + } + explain { + sql("""analyzed plan select subquery_test_t1.id from subquery_test_t1 + where + not ( + subquery_test_t1.id > 10 + and + subquery_test_t1.id in (select 1 from subquery_test_t2 where subquery_test_t1.id = subquery_test_t2.id and subquery_test_t2.id = 6) + ); """) + contains("isMarkJoin=true") + } + explain { + sql("""analyzed plan select subquery_test_t1.id from subquery_test_t1 + where + not ( + subquery_test_t1.id > 10 + and + ( subquery_test_t1.id < 100 or subquery_test_t1.id in (select 1 from subquery_test_t2 where subquery_test_t1.id = subquery_test_t2.id and subquery_test_t2.id = 6) ) + ); """) + contains("isMarkJoin=true") + } + explain { + sql("""analyzed plan select subquery_test_t1.id from subquery_test_t1 + where + not ( + subquery_test_t1.id > 10 + and + ( subquery_test_t1.id < 100 or case when subquery_test_t1.id in (select 1 from subquery_test_t2 where subquery_test_t1.id = subquery_test_t2.id and subquery_test_t2.id = 6) then 1 else 0 end ) + );""") + contains("isMarkJoin=true") + } + + sql """drop table if exists table_23_undef_undef""" + sql """create table table_23_undef_undef (`pk` int,`col_int_undef_signed` int ,`col_varchar_10__undef_signed` varchar(10) ,`col_varchar_1024__undef_signed` varchar(1024) ) engine=olap distributed by hash(pk) buckets 10 properties( 'replication_num' = '1');""" + sql """drop table if exists table_20_undef_undef""" + sql """create table table_20_undef_undef (`pk` int,`col_int_undef_signed` int ,`col_varchar_10__undef_signed` varchar(10) ,`col_varchar_1024__undef_signed` varchar(1024) ) engine=olap distributed by hash(pk) buckets 10 properties( 'replication_num' = '1');""" + sql """drop table if exists table_9_undef_undef""" + sql """create table table_9_undef_undef (`pk` int,`col_int_undef_signed` int ,`col_varchar_10__undef_signed` varchar(10) ,`col_varchar_1024__undef_signed` varchar(1024) ) engine=olap distributed by hash(pk) buckets 10 properties( 'replication_num' = '1');""" + + sql """insert into table_23_undef_undef values (0,0,'t','p'),(1,6,'q',"really"),(2,3,'p',"of"),(3,null,"he",'k'),(4,8,"this","don't"),(5,6,"see","this"),(6,5,'s','q'),(7,null,'o','j'),(8,9,'l',"could"),(9,null,"one",'l'),(10,7,"can't",'f'),(11,2,"going","not"),(12,null,'g','r'),(13,3,"ok",'s'),(14,6,"she",'k'),(15,null,"she",'p'),(16,8,"what","him"),(17,null,"from","to"),(18,5,"so","up"),(19,null,"my","is"),(20,null,'h',"see"),(21,null,"as","to"),(22,0,"know","the");""" + sql """insert into table_20_undef_undef values (0,null,'r','x'),(1,null,'m',"say"),(2,2,"mean",'h'),(3,null,'n','b'),(4,8,"do","do"),(5,9,'h',"were"),(6,null,"was","one"),(7,2,'o',"she"),(8,0,"who","me"),(9,null,'n',"that"),(10,null,"will",'l'),(11,4,'m',"if"),(12,5,"the","got"),(13,null,"why",'f'),(14,0,"of","for"),(15,null,"or","ok"),(16,null,'c','u'),(17,3,'f','c'),(18,null,"see",'f'),(19,2,'f','z');""" + sql """insert into table_9_undef_undef values (0,3,"his",'g'),(1,8,'p','n'),(2,null,"get","got"),(3,3,'r','r'),(4,null,"or","get"),(5,0,'j',"yeah"),(6,null,'w','x'),(7,8,'q',"for"),(8,3,'p',"that");""" + + qt_select_sub"""SELECT DISTINCT alias1.`pk` AS field1, alias2.`col_int_undef_signed` AS field2 FROM table_23_undef_undef AS alias1, table_20_undef_undef AS alias2 WHERE ( EXISTS ( SELECT DISTINCT SQ1_alias1.`col_varchar_10__undef_signed` AS SQ1_field1 FROM table_9_undef_undef AS SQ1_alias1 WHERE SQ1_alias1.`col_varchar_10__undef_signed` = alias1.`col_varchar_10__undef_signed` ) ) OR alias1.`col_varchar_1024__ [...] + sql """drop table if exists table_23_undef_undef""" + sql """drop table if exists table_20_undef_undef""" + sql """drop table if exists table_9_undef_undef""" + } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org