This is an automated email from the ASF dual-hosted git repository.

kxiao pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.0 by this push:
     new f345ccd7710 [fix](nereids)subquery unnest need handle subquery in Not 
expr correnctly #28713 (#28829)
f345ccd7710 is described below

commit f345ccd7710869142558aefa92254d1a8bf475cc
Author: starocean999 <40539150+starocean...@users.noreply.github.com>
AuthorDate: Fri Dec 22 14:22:15 2023 +0800

    [fix](nereids)subquery unnest need handle subquery in Not expr correnctly 
#28713 (#28829)
---
 .../nereids/rules/analysis/SubqueryToApply.java    | 35 ++++++--
 .../data/nereids_p0/subquery/test_subquery.out     |  4 +
 .../nereids_p0/subquery/test_subquery.groovy       | 93 ++++++++++++++++++++++
 3 files changed, 127 insertions(+), 5 deletions(-)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/SubqueryToApply.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/SubqueryToApply.java
index 1b2f7c898bb..9057104599f 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/SubqueryToApply.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/SubqueryToApply.java
@@ -23,6 +23,7 @@ import org.apache.doris.nereids.rules.Rule;
 import org.apache.doris.nereids.rules.RuleType;
 import org.apache.doris.nereids.trees.TreeNode;
 import org.apache.doris.nereids.trees.expressions.Alias;
+import org.apache.doris.nereids.trees.expressions.And;
 import org.apache.doris.nereids.trees.expressions.BinaryOperator;
 import org.apache.doris.nereids.trees.expressions.Exists;
 import org.apache.doris.nereids.trees.expressions.Expression;
@@ -30,6 +31,7 @@ import org.apache.doris.nereids.trees.expressions.InSubquery;
 import org.apache.doris.nereids.trees.expressions.ListQuery;
 import org.apache.doris.nereids.trees.expressions.MarkJoinSlotReference;
 import org.apache.doris.nereids.trees.expressions.NamedExpression;
+import org.apache.doris.nereids.trees.expressions.Not;
 import org.apache.doris.nereids.trees.expressions.Or;
 import org.apache.doris.nereids.trees.expressions.ScalarSubquery;
 import org.apache.doris.nereids.trees.expressions.Slot;
@@ -38,7 +40,6 @@ import 
org.apache.doris.nereids.trees.expressions.SubqueryExpr;
 import org.apache.doris.nereids.trees.expressions.literal.BooleanLiteral;
 import 
org.apache.doris.nereids.trees.expressions.visitor.DefaultExpressionRewriter;
 import org.apache.doris.nereids.trees.plans.Plan;
-import org.apache.doris.nereids.trees.plans.algebra.Aggregate;
 import org.apache.doris.nereids.trees.plans.logical.LogicalAggregate;
 import org.apache.doris.nereids.trees.plans.logical.LogicalApply;
 import org.apache.doris.nereids.trees.plans.logical.LogicalFilter;
@@ -73,7 +74,8 @@ public class SubqueryToApply implements AnalysisRuleFactory {
             RuleType.FILTER_SUBQUERY_TO_APPLY.build(
                 logicalFilter().thenApply(ctx -> {
                     LogicalFilter<Plan> filter = ctx.root;
-
+                    boolean shouldOutputMarkJoinSlot = 
filter.getConjuncts().stream()
+                            .anyMatch(expr -> shouldOutputMarkJoinSlot(expr, 
SearchState.SearchNot));
                     ImmutableList<Set<SubqueryExpr>> subqueryExprsList = 
filter.getConjuncts().stream()
                             .<Set<SubqueryExpr>>map(e -> 
e.collect(SubqueryToApply::canConvertToSupply))
                             .collect(ImmutableList.toImmutableList());
@@ -98,7 +100,7 @@ public class SubqueryToApply implements AnalysisRuleFactory {
                         // first step: Replace the subquery of predicate in 
LogicalFilter
                         // second step: Replace subquery with LogicalApply
                         ReplaceSubquery replaceSubquery = new ReplaceSubquery(
-                                ctx.statementContext, false);
+                                ctx.statementContext, 
shouldOutputMarkJoinSlot);
                         SubqueryContext context = new 
SubqueryContext(subqueryExprs);
                         Expression conjunct = 
replaceSubquery.replace(oldConjuncts.get(i), context);
 
@@ -341,7 +343,7 @@ public class SubqueryToApply implements AnalysisRuleFactory 
{
                 && hasTopLevelAggWithoutGroupBy(exists.getQueryPlan());
     }
 
-    private boolean hasTopLevelAggWithoutGroupBy(Plan plan) {
+    private static boolean hasTopLevelAggWithoutGroupBy(Plan plan) {
         if (plan instanceof LogicalAggregate) {
             return ((LogicalAggregate) plan).getGroupByExpressions().isEmpty();
         } else if (plan instanceof LogicalProject || plan instanceof 
LogicalSort) {
@@ -425,7 +427,7 @@ public class SubqueryToApply implements AnalysisRuleFactory 
{
             // it will always consider the returned result to be true
             boolean needCreateMarkJoinSlot = isMarkJoin || 
shouldOutputMarkJoinSlot;
             MarkJoinSlotReference markJoinSlotReference = null;
-            if (exists.getQueryPlan().anyMatch(Aggregate.class::isInstance) && 
needCreateMarkJoinSlot) {
+            if (hasTopLevelAggWithoutGroupBy(exists.getQueryPlan()) && 
needCreateMarkJoinSlot) {
                 markJoinSlotReference =
                         new 
MarkJoinSlotReference(statementContext.generateColumnName(), true);
             } else if (needCreateMarkJoinSlot) {
@@ -503,4 +505,27 @@ public class SubqueryToApply implements 
AnalysisRuleFactory {
 
     }
 
+    private enum SearchState {
+        SearchNot,
+        SearchAnd,
+        SearchExistsOrInSubquery
+    }
+
+    private boolean shouldOutputMarkJoinSlot(Expression expr, SearchState 
searchState) {
+        if (searchState == SearchState.SearchNot && expr instanceof Not) {
+            if (shouldOutputMarkJoinSlot(((Not) expr).child(), 
SearchState.SearchAnd)) {
+                return true;
+            }
+        } else if (searchState == SearchState.SearchAnd && expr instanceof 
And) {
+            for (Expression child : expr.children()) {
+                if (shouldOutputMarkJoinSlot(child, 
SearchState.SearchExistsOrInSubquery)) {
+                    return true;
+                }
+            }
+        } else if (searchState == SearchState.SearchExistsOrInSubquery
+                && (expr instanceof InSubquery || expr instanceof Exists)) {
+            return true;
+        }
+        return false;
+    }
 }
diff --git a/regression-test/data/nereids_p0/subquery/test_subquery.out 
b/regression-test/data/nereids_p0/subquery/test_subquery.out
index 2344cedddd5..004aa0bcad0 100644
--- a/regression-test/data/nereids_p0/subquery/test_subquery.out
+++ b/regression-test/data/nereids_p0/subquery/test_subquery.out
@@ -26,3 +26,7 @@ true  15      1992    3021    11011920        0.000   true    
9999-12-12      2015-04-02T00:00                3.141592653     2
 -- !sql_mark_join --
 1
 
+-- !select_sub --
+1      9
+2      \N
+
diff --git a/regression-test/suites/nereids_p0/subquery/test_subquery.groovy 
b/regression-test/suites/nereids_p0/subquery/test_subquery.groovy
index c522adf43e1..e84ceaa9682 100644
--- a/regression-test/suites/nereids_p0/subquery/test_subquery.groovy
+++ b/regression-test/suites/nereids_p0/subquery/test_subquery.groovy
@@ -150,4 +150,97 @@ suite("test_subquery") {
     qt_sql_mark_join """with A as (select count(*) n1 from 
test_one_row_relation where exists (select 1 from test_one_row_relation t where 
t.user_id = test_one_row_relation.user_id) or 1 = 1) select * from A;"""
 
     sql """drop table if exists test_one_row_relation;"""
+
+    sql """drop table if exists subquery_test_t1;"""
+    sql """drop table if exists subquery_test_t2;"""
+    sql """create table subquery_test_t1 (
+                id int
+            )
+            UNIQUE KEY (`id`)
+            DISTRIBUTED BY HASH(`id`) BUCKETS 1
+            PROPERTIES ("replication_allocation" = "tag.location.default: 
1");"""
+    sql """create table subquery_test_t2 (
+                id int
+            )
+            UNIQUE KEY (`id`)
+            DISTRIBUTED BY HASH(`id`) BUCKETS 1
+            PROPERTIES ("replication_allocation" = "tag.location.default: 
1");"""
+
+    explain {
+        sql("""analyzed plan select subquery_test_t1.id from subquery_test_t1
+                where
+                    not (
+                            exists(select 1 from subquery_test_t2 where 
subquery_test_t1.id = subquery_test_t2.id and subquery_test_t2.id = 5) 
+                            and
+                            exists(select 1 from subquery_test_t2 where 
subquery_test_t1.id = subquery_test_t2.id and subquery_test_t2.id = 6)
+                        ); """)
+        contains("isMarkJoin=true")
+    }
+    explain {
+        sql("""analyzed plan select subquery_test_t1.id from subquery_test_t1
+                where
+                    not ( 
+                            subquery_test_t1.id > 10 
+                            and 
+                            exists(select 1 from subquery_test_t2 where 
subquery_test_t1.id = subquery_test_t2.id and subquery_test_t2.id = 6)
+                        );""")
+        contains("isMarkJoin=true")
+    }
+    explain {
+        sql("""analyzed plan select subquery_test_t1.id from subquery_test_t1
+                where
+                    not ( 
+                            subquery_test_t1.id > 10 
+                            and 
+                            subquery_test_t1.id in (select 1 from 
subquery_test_t2 where subquery_test_t1.id = subquery_test_t2.id and 
subquery_test_t2.id = 6)
+                        );  """)
+        contains("isMarkJoin=true")
+    }
+    explain {
+        sql("""analyzed plan select subquery_test_t1.id from subquery_test_t1
+                where
+                    not ( 
+                            subquery_test_t1.id > 10 
+                            and 
+                            subquery_test_t1.id in (select 1 from 
subquery_test_t2 where subquery_test_t1.id = subquery_test_t2.id and 
subquery_test_t2.id = 6)
+                        ); """)
+        contains("isMarkJoin=true")
+    }
+    explain {
+        sql("""analyzed plan select subquery_test_t1.id from subquery_test_t1
+                where
+                    not ( 
+                            subquery_test_t1.id > 10 
+                            and 
+                            ( subquery_test_t1.id < 100 or subquery_test_t1.id 
in (select 1 from subquery_test_t2 where subquery_test_t1.id = 
subquery_test_t2.id and subquery_test_t2.id = 6) )
+                        ); """)
+        contains("isMarkJoin=true")
+    }
+    explain {
+        sql("""analyzed plan select subquery_test_t1.id from subquery_test_t1
+                where
+                    not ( 
+                            subquery_test_t1.id > 10 
+                            and 
+                            ( subquery_test_t1.id < 100 or case when 
subquery_test_t1.id in (select 1 from subquery_test_t2 where 
subquery_test_t1.id = subquery_test_t2.id and subquery_test_t2.id = 6) then 1 
else 0 end )
+                        );""")
+        contains("isMarkJoin=true")
+    }
+
+    sql """drop table if exists table_23_undef_undef"""
+    sql """create table table_23_undef_undef (`pk` int,`col_int_undef_signed` 
int  ,`col_varchar_10__undef_signed` varchar(10)  
,`col_varchar_1024__undef_signed` varchar(1024)  ) engine=olap distributed by 
hash(pk) buckets 10 properties(    'replication_num' = '1');"""
+    sql """drop table if exists table_20_undef_undef"""
+    sql """create table table_20_undef_undef (`pk` int,`col_int_undef_signed` 
int  ,`col_varchar_10__undef_signed` varchar(10)  
,`col_varchar_1024__undef_signed` varchar(1024)  ) engine=olap distributed by 
hash(pk) buckets 10 properties(    'replication_num' = '1');"""
+    sql """drop table if exists table_9_undef_undef"""
+    sql """create table table_9_undef_undef (`pk` int,`col_int_undef_signed` 
int  ,`col_varchar_10__undef_signed` varchar(10)  
,`col_varchar_1024__undef_signed` varchar(1024)  ) engine=olap distributed by 
hash(pk) buckets 10 properties(    'replication_num' = '1');"""
+
+    sql """insert into table_23_undef_undef values 
(0,0,'t','p'),(1,6,'q',"really"),(2,3,'p',"of"),(3,null,"he",'k'),(4,8,"this","don't"),(5,6,"see","this"),(6,5,'s','q'),(7,null,'o','j'),(8,9,'l',"could"),(9,null,"one",'l'),(10,7,"can't",'f'),(11,2,"going","not"),(12,null,'g','r'),(13,3,"ok",'s'),(14,6,"she",'k'),(15,null,"she",'p'),(16,8,"what","him"),(17,null,"from","to"),(18,5,"so","up"),(19,null,"my","is"),(20,null,'h',"see"),(21,null,"as","to"),(22,0,"know","the");"""
+    sql """insert into table_20_undef_undef values 
(0,null,'r','x'),(1,null,'m',"say"),(2,2,"mean",'h'),(3,null,'n','b'),(4,8,"do","do"),(5,9,'h',"were"),(6,null,"was","one"),(7,2,'o',"she"),(8,0,"who","me"),(9,null,'n',"that"),(10,null,"will",'l'),(11,4,'m',"if"),(12,5,"the","got"),(13,null,"why",'f'),(14,0,"of","for"),(15,null,"or","ok"),(16,null,'c','u'),(17,3,'f','c'),(18,null,"see",'f'),(19,2,'f','z');"""
+    sql """insert into table_9_undef_undef values 
(0,3,"his",'g'),(1,8,'p','n'),(2,null,"get","got"),(3,3,'r','r'),(4,null,"or","get"),(5,0,'j',"yeah"),(6,null,'w','x'),(7,8,'q',"for"),(8,3,'p',"that");"""
+
+    qt_select_sub"""SELECT DISTINCT alias1.`pk` AS field1,     
alias2.`col_int_undef_signed` AS field2 FROM table_23_undef_undef AS alias1,    
 table_20_undef_undef AS alias2 WHERE (         EXISTS (             SELECT 
DISTINCT SQ1_alias1.`col_varchar_10__undef_signed` AS SQ1_field1             
FROM table_9_undef_undef AS SQ1_alias1             WHERE 
SQ1_alias1.`col_varchar_10__undef_signed` = 
alias1.`col_varchar_10__undef_signed`             )         )     OR 
alias1.`col_varchar_1024__ [...]
+    sql """drop table if exists table_23_undef_undef"""
+    sql """drop table if exists table_20_undef_undef"""
+    sql """drop table if exists table_9_undef_undef"""
+
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to