morrySnow commented on code in PR #39471: URL: https://github.com/apache/doris/pull/39471#discussion_r1723018364
########## fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/ScalarSubquery.java: ########## @@ -33,8 +37,10 @@ */ public class ScalarSubquery extends SubqueryExpr { + private final boolean hasTopLevelScalarAgg; + public ScalarSubquery(LogicalPlan subquery) { - super(Objects.requireNonNull(subquery, "subquery can not be null")); + this(Objects.requireNonNull(subquery, "subquery can not be null"), ImmutableList.of()); Review Comment: ```suggestion this(subquery, ImmutableList.of()); ``` ########## fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/ColumnPruning.java: ########## @@ -411,6 +414,14 @@ private <P extends Plan> P pruneChildren(P plan, Set<Slot> parentRequiredSlots) childRequiredSlotBuilder.add(childOutput); } } + if (child instanceof LogicalProject) { + // keep NoneMovableFunction for later use + for (NamedExpression output : ((LogicalProject<?>) child).getOutputs()) { + if (output.containsType(NoneMovableFunction.class)) { + childRequiredSlotBuilder.add(output.toSlot()); + } + } + } Review Comment: i think u should change this interface `org.apache.doris.nereids.trees.plans.logical.LogicalProject#pruneOutputs` ########## fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PullUpProjectUnderApply.java: ########## @@ -61,9 +59,9 @@ public Rule build() { Plan newCorrelate = apply.withChildren(apply.left(), project.child()); List<NamedExpression> newProjects = new ArrayList<>(apply.left().getOutput()); if (apply.getSubqueryExpr() instanceof ScalarSubquery) { - Preconditions.checkState(project.getProjects().size() == 1, - "ScalarSubquery should only have one output column"); - newProjects.add(project.getProjects().get(0)); + // unnest correlated scalar subquery may add count(*) and any_value() to project list + // then there may be more than one expr, so we add all project exprs here Review Comment: should we do more check here? ########## fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/SubqueryToApply.java: ########## @@ -383,32 +395,68 @@ private static boolean hasTopLevelScalarAgg(Plan plan) { return false; } - private LogicalPlan addApply(SubqueryExpr subquery, LogicalPlan childPlan, - Map<SubqueryExpr, Optional<MarkJoinSlotReference>> subqueryToMarkJoinSlot, - CascadesContext ctx, Optional<Expression> conjunct, - boolean isProject, boolean singleSubquery, boolean isMarkJoinSlotNotNull) { + private Pair<LogicalPlan, Optional<Expression>> addApply(SubqueryExpr subquery, Review Comment: add fe ut for this function ########## fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/ScalarSubquery.java: ########## @@ -47,6 +53,33 @@ public ScalarSubquery(LogicalPlan subquery, List<Slot> correlateSlots, Optional< super(Objects.requireNonNull(subquery, "subquery can not be null"), Objects.requireNonNull(correlateSlots, "correlateSlots can not be null"), typeCoercionExpr); + hasTopLevelScalarAgg = findTopLevelScalarAgg(subquery); + } + + // check if the query has top level scalar agg + // if the correlated subquery doesn't have top level scalar agg + // we need create one in subquery unnesting step + private static boolean findTopLevelScalarAgg(Plan plan) { + if (plan instanceof LogicalAggregate) { + if (((LogicalAggregate<?>) plan).getGroupByExpressions().isEmpty()) { + return true; + } else { + return false; + } + } else if (plan instanceof LogicalJoin) { Review Comment: why only join return false directly? could u add some comment to explain it? what about other operator, such as sort, limit, window or generate? ########## fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/SubqueryToApply.java: ########## @@ -383,32 +395,68 @@ private static boolean hasTopLevelScalarAgg(Plan plan) { return false; } - private LogicalPlan addApply(SubqueryExpr subquery, LogicalPlan childPlan, - Map<SubqueryExpr, Optional<MarkJoinSlotReference>> subqueryToMarkJoinSlot, - CascadesContext ctx, Optional<Expression> conjunct, - boolean isProject, boolean singleSubquery, boolean isMarkJoinSlotNotNull) { + private Pair<LogicalPlan, Optional<Expression>> addApply(SubqueryExpr subquery, + LogicalPlan childPlan, + Map<SubqueryExpr, Optional<MarkJoinSlotReference>> subqueryToMarkJoinSlot, + CascadesContext ctx, Optional<Expression> conjunct, boolean isProject, + boolean singleSubquery, boolean isMarkJoinSlotNotNull) { ctx.setSubqueryExprIsAnalyzed(subquery, true); + Optional<MarkJoinSlotReference> markJoinSlot = subqueryToMarkJoinSlot.get(subquery); boolean needAddScalarSubqueryOutputToProjects = isConjunctContainsScalarSubqueryOutput( subquery, conjunct, isProject, singleSubquery); + boolean useNewSubquery = false; Review Comment: ```suggestion boolean needRuntimeAssertCount = false; ``` ########## fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/ScalarSubquery.java: ########## @@ -47,6 +53,33 @@ public ScalarSubquery(LogicalPlan subquery, List<Slot> correlateSlots, Optional< super(Objects.requireNonNull(subquery, "subquery can not be null"), Objects.requireNonNull(correlateSlots, "correlateSlots can not be null"), typeCoercionExpr); + hasTopLevelScalarAgg = findTopLevelScalarAgg(subquery); + } + + // check if the query has top level scalar agg + // if the correlated subquery doesn't have top level scalar agg + // we need create one in subquery unnesting step + private static boolean findTopLevelScalarAgg(Plan plan) { Review Comment: add fe ut for this static function ########## fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/SubqueryToApply.java: ########## @@ -383,32 +395,68 @@ private static boolean hasTopLevelScalarAgg(Plan plan) { return false; } - private LogicalPlan addApply(SubqueryExpr subquery, LogicalPlan childPlan, - Map<SubqueryExpr, Optional<MarkJoinSlotReference>> subqueryToMarkJoinSlot, - CascadesContext ctx, Optional<Expression> conjunct, - boolean isProject, boolean singleSubquery, boolean isMarkJoinSlotNotNull) { + private Pair<LogicalPlan, Optional<Expression>> addApply(SubqueryExpr subquery, + LogicalPlan childPlan, + Map<SubqueryExpr, Optional<MarkJoinSlotReference>> subqueryToMarkJoinSlot, + CascadesContext ctx, Optional<Expression> conjunct, boolean isProject, + boolean singleSubquery, boolean isMarkJoinSlotNotNull) { ctx.setSubqueryExprIsAnalyzed(subquery, true); + Optional<MarkJoinSlotReference> markJoinSlot = subqueryToMarkJoinSlot.get(subquery); boolean needAddScalarSubqueryOutputToProjects = isConjunctContainsScalarSubqueryOutput( subquery, conjunct, isProject, singleSubquery); + boolean useNewSubquery = false; + NamedExpression oldSubqueryOutput = subquery.getQueryPlan().getOutput().get(0); + Slot countSlot = null; + Slot anyValueSlot = null; + Optional<Expression> newConjunct = conjunct; + if (needAddScalarSubqueryOutputToProjects && subquery instanceof ScalarSubquery + && !subquery.getCorrelateSlots().isEmpty() + && !((ScalarSubquery) subquery).hasTopLevelScalarAgg()) { + // if scalar subquery doesn't have top level scalar agg we will create one, for example + // select (select t2.c1 from t2 where t2.c2 = t1.c2) from t1; + // the original output of the correlate subquery is t2.c1, after adding a scalar agg, it will be + // select (select count(*), any_value(t2.c1) from t2 where t2.c2 = t1.c2) from t1; + Alias countAlias = new Alias(new Count()); + Alias anyValueAlias = new Alias(new AnyValue(oldSubqueryOutput)); + LogicalAggregate<Plan> aggregate = new LogicalAggregate<>(ImmutableList.of(), + ImmutableList.of(countAlias, anyValueAlias), subquery.getQueryPlan()); + countSlot = countAlias.toSlot(); + anyValueSlot = anyValueAlias.toSlot(); + subquery = subquery.withSubquery(aggregate); + if (conjunct.isPresent()) { + Map<Expression, Expression> replaceMap = new HashMap<>(); + replaceMap.put(oldSubqueryOutput, anyValueSlot); + newConjunct = Optional.of(ExpressionUtils.replace(conjunct.get(), replaceMap)); + } + useNewSubquery = true; + } LogicalApply newApply = new LogicalApply( subquery.getCorrelateSlots(), subquery, Optional.empty(), - subqueryToMarkJoinSlot.get(subquery), + markJoinSlot, needAddScalarSubqueryOutputToProjects, isProject, isMarkJoinSlotNotNull, childPlan, subquery.getQueryPlan()); - List<NamedExpression> projects = ImmutableList.<NamedExpression>builder() - // left child - .addAll(childPlan.getOutput()) - // markJoinSlotReference - .addAll(subqueryToMarkJoinSlot.get(subquery).isPresent() - ? ImmutableList.of(subqueryToMarkJoinSlot.get(subquery).get()) : ImmutableList.of()) - // scalarSubquery output - .addAll(needAddScalarSubqueryOutputToProjects - ? ImmutableList.of(subquery.getQueryPlan().getOutput().get(0)) : ImmutableList.of()) - .build(); - - return new LogicalProject(projects, newApply); + ImmutableList.Builder<NamedExpression> projects = ImmutableList.builder(); Review Comment: ```suggestion ImmutableList.Builder<NamedExpression> projects = ImmutableList.builderWithExpectedSize(childPlan.getOutput().size() + 3); ``` ########## fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/SubqueryToApply.java: ########## @@ -383,32 +395,68 @@ private static boolean hasTopLevelScalarAgg(Plan plan) { return false; } - private LogicalPlan addApply(SubqueryExpr subquery, LogicalPlan childPlan, - Map<SubqueryExpr, Optional<MarkJoinSlotReference>> subqueryToMarkJoinSlot, - CascadesContext ctx, Optional<Expression> conjunct, - boolean isProject, boolean singleSubquery, boolean isMarkJoinSlotNotNull) { + private Pair<LogicalPlan, Optional<Expression>> addApply(SubqueryExpr subquery, + LogicalPlan childPlan, + Map<SubqueryExpr, Optional<MarkJoinSlotReference>> subqueryToMarkJoinSlot, + CascadesContext ctx, Optional<Expression> conjunct, boolean isProject, + boolean singleSubquery, boolean isMarkJoinSlotNotNull) { ctx.setSubqueryExprIsAnalyzed(subquery, true); + Optional<MarkJoinSlotReference> markJoinSlot = subqueryToMarkJoinSlot.get(subquery); boolean needAddScalarSubqueryOutputToProjects = isConjunctContainsScalarSubqueryOutput( subquery, conjunct, isProject, singleSubquery); + boolean useNewSubquery = false; + NamedExpression oldSubqueryOutput = subquery.getQueryPlan().getOutput().get(0); + Slot countSlot = null; + Slot anyValueSlot = null; + Optional<Expression> newConjunct = conjunct; + if (needAddScalarSubqueryOutputToProjects && subquery instanceof ScalarSubquery + && !subquery.getCorrelateSlots().isEmpty() + && !((ScalarSubquery) subquery).hasTopLevelScalarAgg()) { + // if scalar subquery doesn't have top level scalar agg we will create one, for example + // select (select t2.c1 from t2 where t2.c2 = t1.c2) from t1; + // the original output of the correlate subquery is t2.c1, after adding a scalar agg, it will be + // select (select count(*), any_value(t2.c1) from t2 where t2.c2 = t1.c2) from t1; + Alias countAlias = new Alias(new Count()); + Alias anyValueAlias = new Alias(new AnyValue(oldSubqueryOutput)); + LogicalAggregate<Plan> aggregate = new LogicalAggregate<>(ImmutableList.of(), + ImmutableList.of(countAlias, anyValueAlias), subquery.getQueryPlan()); + countSlot = countAlias.toSlot(); + anyValueSlot = anyValueAlias.toSlot(); + subquery = subquery.withSubquery(aggregate); + if (conjunct.isPresent()) { + Map<Expression, Expression> replaceMap = new HashMap<>(); + replaceMap.put(oldSubqueryOutput, anyValueSlot); + newConjunct = Optional.of(ExpressionUtils.replace(conjunct.get(), replaceMap)); + } + useNewSubquery = true; + } LogicalApply newApply = new LogicalApply( subquery.getCorrelateSlots(), subquery, Optional.empty(), - subqueryToMarkJoinSlot.get(subquery), + markJoinSlot, needAddScalarSubqueryOutputToProjects, isProject, isMarkJoinSlotNotNull, childPlan, subquery.getQueryPlan()); - List<NamedExpression> projects = ImmutableList.<NamedExpression>builder() - // left child - .addAll(childPlan.getOutput()) - // markJoinSlotReference - .addAll(subqueryToMarkJoinSlot.get(subquery).isPresent() - ? ImmutableList.of(subqueryToMarkJoinSlot.get(subquery).get()) : ImmutableList.of()) - // scalarSubquery output - .addAll(needAddScalarSubqueryOutputToProjects - ? ImmutableList.of(subquery.getQueryPlan().getOutput().get(0)) : ImmutableList.of()) - .build(); - - return new LogicalProject(projects, newApply); + ImmutableList.Builder<NamedExpression> projects = ImmutableList.builder(); + // left child + projects.addAll(childPlan.getOutput()); + // markJoinSlotReference + projects.addAll(markJoinSlot.isPresent() ? ImmutableList.of(markJoinSlot.get()) : ImmutableList.of()); Review Comment: ```suggestion markJoinSlot.map(projects::add); ``` ########## fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/SubqueryToApply.java: ########## @@ -383,32 +395,68 @@ private static boolean hasTopLevelScalarAgg(Plan plan) { return false; } - private LogicalPlan addApply(SubqueryExpr subquery, LogicalPlan childPlan, - Map<SubqueryExpr, Optional<MarkJoinSlotReference>> subqueryToMarkJoinSlot, - CascadesContext ctx, Optional<Expression> conjunct, - boolean isProject, boolean singleSubquery, boolean isMarkJoinSlotNotNull) { + private Pair<LogicalPlan, Optional<Expression>> addApply(SubqueryExpr subquery, + LogicalPlan childPlan, + Map<SubqueryExpr, Optional<MarkJoinSlotReference>> subqueryToMarkJoinSlot, + CascadesContext ctx, Optional<Expression> conjunct, boolean isProject, + boolean singleSubquery, boolean isMarkJoinSlotNotNull) { ctx.setSubqueryExprIsAnalyzed(subquery, true); + Optional<MarkJoinSlotReference> markJoinSlot = subqueryToMarkJoinSlot.get(subquery); boolean needAddScalarSubqueryOutputToProjects = isConjunctContainsScalarSubqueryOutput( subquery, conjunct, isProject, singleSubquery); + boolean useNewSubquery = false; + NamedExpression oldSubqueryOutput = subquery.getQueryPlan().getOutput().get(0); + Slot countSlot = null; + Slot anyValueSlot = null; + Optional<Expression> newConjunct = conjunct; + if (needAddScalarSubqueryOutputToProjects && subquery instanceof ScalarSubquery + && !subquery.getCorrelateSlots().isEmpty() + && !((ScalarSubquery) subquery).hasTopLevelScalarAgg()) { + // if scalar subquery doesn't have top level scalar agg we will create one, for example + // select (select t2.c1 from t2 where t2.c2 = t1.c2) from t1; + // the original output of the correlate subquery is t2.c1, after adding a scalar agg, it will be + // select (select count(*), any_value(t2.c1) from t2 where t2.c2 = t1.c2) from t1; + Alias countAlias = new Alias(new Count()); + Alias anyValueAlias = new Alias(new AnyValue(oldSubqueryOutput)); + LogicalAggregate<Plan> aggregate = new LogicalAggregate<>(ImmutableList.of(), + ImmutableList.of(countAlias, anyValueAlias), subquery.getQueryPlan()); Review Comment: group by will be add later? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org