morrySnow commented on code in PR #39471:
URL: https://github.com/apache/doris/pull/39471#discussion_r1723018364


##########
fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/ScalarSubquery.java:
##########
@@ -33,8 +37,10 @@
  */
 public class ScalarSubquery extends SubqueryExpr {
 
+    private final boolean hasTopLevelScalarAgg;
+
     public ScalarSubquery(LogicalPlan subquery) {
-        super(Objects.requireNonNull(subquery, "subquery can not be null"));
+        this(Objects.requireNonNull(subquery, "subquery can not be null"), 
ImmutableList.of());

Review Comment:
   ```suggestion
           this(subquery, ImmutableList.of());
   ```



##########
fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/ColumnPruning.java:
##########
@@ -411,6 +414,14 @@ private <P extends Plan> P pruneChildren(P plan, Set<Slot> 
parentRequiredSlots)
                     childRequiredSlotBuilder.add(childOutput);
                 }
             }
+            if (child instanceof LogicalProject) {
+                // keep NoneMovableFunction for later use
+                for (NamedExpression output : ((LogicalProject<?>) 
child).getOutputs()) {
+                    if (output.containsType(NoneMovableFunction.class)) {
+                        childRequiredSlotBuilder.add(output.toSlot());
+                    }
+                }
+            }

Review Comment:
   i think u should change this interface 
`org.apache.doris.nereids.trees.plans.logical.LogicalProject#pruneOutputs`



##########
fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PullUpProjectUnderApply.java:
##########
@@ -61,9 +59,9 @@ public Rule build() {
                     Plan newCorrelate = apply.withChildren(apply.left(), 
project.child());
                     List<NamedExpression> newProjects = new 
ArrayList<>(apply.left().getOutput());
                     if (apply.getSubqueryExpr() instanceof ScalarSubquery) {
-                        Preconditions.checkState(project.getProjects().size() 
== 1,
-                                "ScalarSubquery should only have one output 
column");
-                        newProjects.add(project.getProjects().get(0));
+                        // unnest correlated scalar subquery may add count(*) 
and any_value() to project list
+                        // then there may be more than one expr, so we add all 
project exprs here

Review Comment:
   should we do more check here?



##########
fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/SubqueryToApply.java:
##########
@@ -383,32 +395,68 @@ private static boolean hasTopLevelScalarAgg(Plan plan) {
         return false;
     }
 
-    private LogicalPlan addApply(SubqueryExpr subquery, LogicalPlan childPlan,
-                                 Map<SubqueryExpr, 
Optional<MarkJoinSlotReference>> subqueryToMarkJoinSlot,
-                                 CascadesContext ctx, Optional<Expression> 
conjunct,
-                                 boolean isProject, boolean singleSubquery, 
boolean isMarkJoinSlotNotNull) {
+    private Pair<LogicalPlan, Optional<Expression>> addApply(SubqueryExpr 
subquery,

Review Comment:
   add fe ut for this function



##########
fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/ScalarSubquery.java:
##########
@@ -47,6 +53,33 @@ public ScalarSubquery(LogicalPlan subquery, List<Slot> 
correlateSlots, Optional<
         super(Objects.requireNonNull(subquery, "subquery can not be null"),
                 Objects.requireNonNull(correlateSlots, "correlateSlots can not 
be null"),
                 typeCoercionExpr);
+        hasTopLevelScalarAgg = findTopLevelScalarAgg(subquery);
+    }
+
+    // check if the query has top level scalar agg
+    // if the correlated subquery doesn't have top level scalar agg
+    // we need create one in subquery unnesting step
+    private static boolean findTopLevelScalarAgg(Plan plan) {
+        if (plan instanceof LogicalAggregate) {
+            if (((LogicalAggregate<?>) 
plan).getGroupByExpressions().isEmpty()) {
+                return true;
+            } else {
+                return false;
+            }
+        } else if (plan instanceof LogicalJoin) {

Review Comment:
   why only join return false directly? could u add some comment to explain it? 
what about other operator, such as sort, limit, window or generate?



##########
fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/SubqueryToApply.java:
##########
@@ -383,32 +395,68 @@ private static boolean hasTopLevelScalarAgg(Plan plan) {
         return false;
     }
 
-    private LogicalPlan addApply(SubqueryExpr subquery, LogicalPlan childPlan,
-                                 Map<SubqueryExpr, 
Optional<MarkJoinSlotReference>> subqueryToMarkJoinSlot,
-                                 CascadesContext ctx, Optional<Expression> 
conjunct,
-                                 boolean isProject, boolean singleSubquery, 
boolean isMarkJoinSlotNotNull) {
+    private Pair<LogicalPlan, Optional<Expression>> addApply(SubqueryExpr 
subquery,
+            LogicalPlan childPlan,
+            Map<SubqueryExpr, Optional<MarkJoinSlotReference>> 
subqueryToMarkJoinSlot,
+            CascadesContext ctx, Optional<Expression> conjunct, boolean 
isProject,
+            boolean singleSubquery, boolean isMarkJoinSlotNotNull) {
         ctx.setSubqueryExprIsAnalyzed(subquery, true);
+        Optional<MarkJoinSlotReference> markJoinSlot = 
subqueryToMarkJoinSlot.get(subquery);
         boolean needAddScalarSubqueryOutputToProjects = 
isConjunctContainsScalarSubqueryOutput(
                 subquery, conjunct, isProject, singleSubquery);
+        boolean useNewSubquery = false;

Review Comment:
   ```suggestion
           boolean needRuntimeAssertCount = false;
   ```



##########
fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/ScalarSubquery.java:
##########
@@ -47,6 +53,33 @@ public ScalarSubquery(LogicalPlan subquery, List<Slot> 
correlateSlots, Optional<
         super(Objects.requireNonNull(subquery, "subquery can not be null"),
                 Objects.requireNonNull(correlateSlots, "correlateSlots can not 
be null"),
                 typeCoercionExpr);
+        hasTopLevelScalarAgg = findTopLevelScalarAgg(subquery);
+    }
+
+    // check if the query has top level scalar agg
+    // if the correlated subquery doesn't have top level scalar agg
+    // we need create one in subquery unnesting step
+    private static boolean findTopLevelScalarAgg(Plan plan) {

Review Comment:
   add fe ut for this static function



##########
fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/SubqueryToApply.java:
##########
@@ -383,32 +395,68 @@ private static boolean hasTopLevelScalarAgg(Plan plan) {
         return false;
     }
 
-    private LogicalPlan addApply(SubqueryExpr subquery, LogicalPlan childPlan,
-                                 Map<SubqueryExpr, 
Optional<MarkJoinSlotReference>> subqueryToMarkJoinSlot,
-                                 CascadesContext ctx, Optional<Expression> 
conjunct,
-                                 boolean isProject, boolean singleSubquery, 
boolean isMarkJoinSlotNotNull) {
+    private Pair<LogicalPlan, Optional<Expression>> addApply(SubqueryExpr 
subquery,
+            LogicalPlan childPlan,
+            Map<SubqueryExpr, Optional<MarkJoinSlotReference>> 
subqueryToMarkJoinSlot,
+            CascadesContext ctx, Optional<Expression> conjunct, boolean 
isProject,
+            boolean singleSubquery, boolean isMarkJoinSlotNotNull) {
         ctx.setSubqueryExprIsAnalyzed(subquery, true);
+        Optional<MarkJoinSlotReference> markJoinSlot = 
subqueryToMarkJoinSlot.get(subquery);
         boolean needAddScalarSubqueryOutputToProjects = 
isConjunctContainsScalarSubqueryOutput(
                 subquery, conjunct, isProject, singleSubquery);
+        boolean useNewSubquery = false;
+        NamedExpression oldSubqueryOutput = 
subquery.getQueryPlan().getOutput().get(0);
+        Slot countSlot = null;
+        Slot anyValueSlot = null;
+        Optional<Expression> newConjunct = conjunct;
+        if (needAddScalarSubqueryOutputToProjects && subquery instanceof 
ScalarSubquery
+                && !subquery.getCorrelateSlots().isEmpty()
+                && !((ScalarSubquery) subquery).hasTopLevelScalarAgg()) {
+            // if scalar subquery doesn't have top level scalar agg we will 
create one, for example
+            // select (select t2.c1 from t2 where t2.c2 = t1.c2) from t1;
+            // the original output of the correlate subquery is t2.c1, after 
adding a scalar agg, it will be
+            // select (select count(*), any_value(t2.c1) from t2 where t2.c2 = 
t1.c2) from t1;
+            Alias countAlias = new Alias(new Count());
+            Alias anyValueAlias = new Alias(new AnyValue(oldSubqueryOutput));
+            LogicalAggregate<Plan> aggregate = new 
LogicalAggregate<>(ImmutableList.of(),
+                    ImmutableList.of(countAlias, anyValueAlias), 
subquery.getQueryPlan());
+            countSlot = countAlias.toSlot();
+            anyValueSlot = anyValueAlias.toSlot();
+            subquery = subquery.withSubquery(aggregate);
+            if (conjunct.isPresent()) {
+                Map<Expression, Expression> replaceMap = new HashMap<>();
+                replaceMap.put(oldSubqueryOutput, anyValueSlot);
+                newConjunct = 
Optional.of(ExpressionUtils.replace(conjunct.get(), replaceMap));
+            }
+            useNewSubquery = true;
+        }
         LogicalApply newApply = new LogicalApply(
                 subquery.getCorrelateSlots(),
                 subquery, Optional.empty(),
-                subqueryToMarkJoinSlot.get(subquery),
+                markJoinSlot,
                 needAddScalarSubqueryOutputToProjects, isProject, 
isMarkJoinSlotNotNull,
                 childPlan, subquery.getQueryPlan());
 
-        List<NamedExpression> projects = 
ImmutableList.<NamedExpression>builder()
-                    // left child
-                    .addAll(childPlan.getOutput())
-                    // markJoinSlotReference
-                    .addAll(subqueryToMarkJoinSlot.get(subquery).isPresent()
-                        ? 
ImmutableList.of(subqueryToMarkJoinSlot.get(subquery).get()) : 
ImmutableList.of())
-                    // scalarSubquery output
-                    .addAll(needAddScalarSubqueryOutputToProjects
-                        ? 
ImmutableList.of(subquery.getQueryPlan().getOutput().get(0)) : 
ImmutableList.of())
-                    .build();
-
-        return new LogicalProject(projects, newApply);
+        ImmutableList.Builder<NamedExpression> projects = 
ImmutableList.builder();

Review Comment:
   ```suggestion
           ImmutableList.Builder<NamedExpression> projects = 
ImmutableList.builderWithExpectedSize(childPlan.getOutput().size() + 3);
   ```



##########
fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/SubqueryToApply.java:
##########
@@ -383,32 +395,68 @@ private static boolean hasTopLevelScalarAgg(Plan plan) {
         return false;
     }
 
-    private LogicalPlan addApply(SubqueryExpr subquery, LogicalPlan childPlan,
-                                 Map<SubqueryExpr, 
Optional<MarkJoinSlotReference>> subqueryToMarkJoinSlot,
-                                 CascadesContext ctx, Optional<Expression> 
conjunct,
-                                 boolean isProject, boolean singleSubquery, 
boolean isMarkJoinSlotNotNull) {
+    private Pair<LogicalPlan, Optional<Expression>> addApply(SubqueryExpr 
subquery,
+            LogicalPlan childPlan,
+            Map<SubqueryExpr, Optional<MarkJoinSlotReference>> 
subqueryToMarkJoinSlot,
+            CascadesContext ctx, Optional<Expression> conjunct, boolean 
isProject,
+            boolean singleSubquery, boolean isMarkJoinSlotNotNull) {
         ctx.setSubqueryExprIsAnalyzed(subquery, true);
+        Optional<MarkJoinSlotReference> markJoinSlot = 
subqueryToMarkJoinSlot.get(subquery);
         boolean needAddScalarSubqueryOutputToProjects = 
isConjunctContainsScalarSubqueryOutput(
                 subquery, conjunct, isProject, singleSubquery);
+        boolean useNewSubquery = false;
+        NamedExpression oldSubqueryOutput = 
subquery.getQueryPlan().getOutput().get(0);
+        Slot countSlot = null;
+        Slot anyValueSlot = null;
+        Optional<Expression> newConjunct = conjunct;
+        if (needAddScalarSubqueryOutputToProjects && subquery instanceof 
ScalarSubquery
+                && !subquery.getCorrelateSlots().isEmpty()
+                && !((ScalarSubquery) subquery).hasTopLevelScalarAgg()) {
+            // if scalar subquery doesn't have top level scalar agg we will 
create one, for example
+            // select (select t2.c1 from t2 where t2.c2 = t1.c2) from t1;
+            // the original output of the correlate subquery is t2.c1, after 
adding a scalar agg, it will be
+            // select (select count(*), any_value(t2.c1) from t2 where t2.c2 = 
t1.c2) from t1;
+            Alias countAlias = new Alias(new Count());
+            Alias anyValueAlias = new Alias(new AnyValue(oldSubqueryOutput));
+            LogicalAggregate<Plan> aggregate = new 
LogicalAggregate<>(ImmutableList.of(),
+                    ImmutableList.of(countAlias, anyValueAlias), 
subquery.getQueryPlan());
+            countSlot = countAlias.toSlot();
+            anyValueSlot = anyValueAlias.toSlot();
+            subquery = subquery.withSubquery(aggregate);
+            if (conjunct.isPresent()) {
+                Map<Expression, Expression> replaceMap = new HashMap<>();
+                replaceMap.put(oldSubqueryOutput, anyValueSlot);
+                newConjunct = 
Optional.of(ExpressionUtils.replace(conjunct.get(), replaceMap));
+            }
+            useNewSubquery = true;
+        }
         LogicalApply newApply = new LogicalApply(
                 subquery.getCorrelateSlots(),
                 subquery, Optional.empty(),
-                subqueryToMarkJoinSlot.get(subquery),
+                markJoinSlot,
                 needAddScalarSubqueryOutputToProjects, isProject, 
isMarkJoinSlotNotNull,
                 childPlan, subquery.getQueryPlan());
 
-        List<NamedExpression> projects = 
ImmutableList.<NamedExpression>builder()
-                    // left child
-                    .addAll(childPlan.getOutput())
-                    // markJoinSlotReference
-                    .addAll(subqueryToMarkJoinSlot.get(subquery).isPresent()
-                        ? 
ImmutableList.of(subqueryToMarkJoinSlot.get(subquery).get()) : 
ImmutableList.of())
-                    // scalarSubquery output
-                    .addAll(needAddScalarSubqueryOutputToProjects
-                        ? 
ImmutableList.of(subquery.getQueryPlan().getOutput().get(0)) : 
ImmutableList.of())
-                    .build();
-
-        return new LogicalProject(projects, newApply);
+        ImmutableList.Builder<NamedExpression> projects = 
ImmutableList.builder();
+        // left child
+        projects.addAll(childPlan.getOutput());
+        // markJoinSlotReference
+        projects.addAll(markJoinSlot.isPresent() ? 
ImmutableList.of(markJoinSlot.get()) : ImmutableList.of());

Review Comment:
   ```suggestion
           markJoinSlot.map(projects::add);
   ```



##########
fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/SubqueryToApply.java:
##########
@@ -383,32 +395,68 @@ private static boolean hasTopLevelScalarAgg(Plan plan) {
         return false;
     }
 
-    private LogicalPlan addApply(SubqueryExpr subquery, LogicalPlan childPlan,
-                                 Map<SubqueryExpr, 
Optional<MarkJoinSlotReference>> subqueryToMarkJoinSlot,
-                                 CascadesContext ctx, Optional<Expression> 
conjunct,
-                                 boolean isProject, boolean singleSubquery, 
boolean isMarkJoinSlotNotNull) {
+    private Pair<LogicalPlan, Optional<Expression>> addApply(SubqueryExpr 
subquery,
+            LogicalPlan childPlan,
+            Map<SubqueryExpr, Optional<MarkJoinSlotReference>> 
subqueryToMarkJoinSlot,
+            CascadesContext ctx, Optional<Expression> conjunct, boolean 
isProject,
+            boolean singleSubquery, boolean isMarkJoinSlotNotNull) {
         ctx.setSubqueryExprIsAnalyzed(subquery, true);
+        Optional<MarkJoinSlotReference> markJoinSlot = 
subqueryToMarkJoinSlot.get(subquery);
         boolean needAddScalarSubqueryOutputToProjects = 
isConjunctContainsScalarSubqueryOutput(
                 subquery, conjunct, isProject, singleSubquery);
+        boolean useNewSubquery = false;
+        NamedExpression oldSubqueryOutput = 
subquery.getQueryPlan().getOutput().get(0);
+        Slot countSlot = null;
+        Slot anyValueSlot = null;
+        Optional<Expression> newConjunct = conjunct;
+        if (needAddScalarSubqueryOutputToProjects && subquery instanceof 
ScalarSubquery
+                && !subquery.getCorrelateSlots().isEmpty()
+                && !((ScalarSubquery) subquery).hasTopLevelScalarAgg()) {
+            // if scalar subquery doesn't have top level scalar agg we will 
create one, for example
+            // select (select t2.c1 from t2 where t2.c2 = t1.c2) from t1;
+            // the original output of the correlate subquery is t2.c1, after 
adding a scalar agg, it will be
+            // select (select count(*), any_value(t2.c1) from t2 where t2.c2 = 
t1.c2) from t1;
+            Alias countAlias = new Alias(new Count());
+            Alias anyValueAlias = new Alias(new AnyValue(oldSubqueryOutput));
+            LogicalAggregate<Plan> aggregate = new 
LogicalAggregate<>(ImmutableList.of(),
+                    ImmutableList.of(countAlias, anyValueAlias), 
subquery.getQueryPlan());

Review Comment:
   group by will be add later?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to