This is an automated email from the ASF dual-hosted git repository. morrysnow pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new 6f41abada0d [feature](Nereids) support qualify stmt (#40048) 6f41abada0d is described below commit 6f41abada0d10d7566523c6861ef8afccae4c337 Author: shee <13843187+qz...@users.noreply.github.com> AuthorDate: Thu Oct 31 16:26:22 2024 +0800 [feature](Nereids) support qualify stmt (#40048) like bigquery https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#qualify_clause Examples SELECT item, RANK() OVER (PARTITION BY category ORDER BY purchases DESC) as rank FROM Produce WHERE Produce.category = 'vegetable' QUALIFY rank <= 3 /*---------+------* | item | rank | +---------+------+ | kale | 1 | | lettuce | 2 | | cabbage | 3 | *---------+------*/ You don't have to include a window function in the SELECT list to use QUALIFY. The following query returns the most popular vegetables SELECT item FROM Produce WHERE Produce.category = 'vegetable' QUALIFY RANK() OVER (PARTITION BY category ORDER BY purchases DESC) <= 3 /*---------* | item | +---------+ | kale | | lettuce | | cabbage | *---------*/ Co-authored-by: garenshi <garen...@tencent.com> --- .../antlr4/org/apache/doris/nereids/DorisLexer.g4 | 1 + .../antlr4/org/apache/doris/nereids/DorisParser.g4 | 6 + .../doris/nereids/jobs/executor/Analyzer.java | 4 + .../doris/nereids/parser/LogicalPlanBuilder.java | 21 +- .../org/apache/doris/nereids/rules/RuleType.java | 8 + .../nereids/rules/analysis/BindExpression.java | 195 +++++++++++++++ .../nereids/rules/analysis/FillUpMissingSlots.java | 7 +- .../rules/analysis/FillUpQualifyMissingSlot.java | 272 +++++++++++++++++++++ .../nereids/rules/analysis/QualifyToFilter.java | 34 +++ .../apache/doris/nereids/trees/plans/PlanType.java | 1 + .../trees/plans/logical/LogicalQualify.java | 154 ++++++++++++ .../nereids/trees/plans/visitor/PlanVisitor.java | 5 + .../doris/nereids/parser/NereidsParserTest.java | 44 ++++ .../rules/analysis/FillUpMissingSlotsTest.java | 122 +++++++++ .../window_functions/test_qualify_query.out | 122 +++++++++ .../window_functions/test_qualify_query.groovy | 120 +++++++++ 16 files changed, 1109 insertions(+), 7 deletions(-) diff --git a/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisLexer.g4 b/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisLexer.g4 index 47a45b67aa7..8ce8d033108 100644 --- a/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisLexer.g4 +++ b/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisLexer.g4 @@ -429,6 +429,7 @@ QUANTILE_STATE: 'QUANTILE_STATE'; QUANTILE_UNION: 'QUANTILE_UNION'; QUERY: 'QUERY'; QUOTA: 'QUOTA'; +QUALIFY: 'QUALIFY'; RANDOM: 'RANDOM'; RANGE: 'RANGE'; READ: 'READ'; diff --git a/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisParser.g4 b/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisParser.g4 index 80da53a51bd..acd139c010e 100644 --- a/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisParser.g4 +++ b/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisParser.g4 @@ -1117,6 +1117,7 @@ querySpecification whereClause? aggClause? havingClause? + qualifyClause? {doris_legacy_SQL_syntax}? queryOrganization #regularQuerySpecification ; @@ -1203,6 +1204,10 @@ havingClause : HAVING booleanExpression ; +qualifyClause + : QUALIFY booleanExpression + ; + selectHint: hintStatements+=hintStatement (COMMA? hintStatements+=hintStatement)* HINT_END; hintStatement @@ -2018,6 +2023,7 @@ nonReserved | QUANTILE_UNION | QUERY | QUOTA + | QUALIFY | RANDOM | RECENT | RECOVER diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Analyzer.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Analyzer.java index 6f6c022117c..894d4264201 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Analyzer.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Analyzer.java @@ -35,6 +35,7 @@ import org.apache.doris.nereids.rules.analysis.EliminateDistinctConstant; import org.apache.doris.nereids.rules.analysis.EliminateGroupByConstant; import org.apache.doris.nereids.rules.analysis.EliminateLogicalSelectHint; import org.apache.doris.nereids.rules.analysis.FillUpMissingSlots; +import org.apache.doris.nereids.rules.analysis.FillUpQualifyMissingSlot; import org.apache.doris.nereids.rules.analysis.HavingToFilter; import org.apache.doris.nereids.rules.analysis.LeadingJoin; import org.apache.doris.nereids.rules.analysis.NormalizeAggregate; @@ -43,6 +44,7 @@ import org.apache.doris.nereids.rules.analysis.NormalizeRepeat; import org.apache.doris.nereids.rules.analysis.OneRowRelationExtractAggregate; import org.apache.doris.nereids.rules.analysis.ProjectToGlobalAggregate; import org.apache.doris.nereids.rules.analysis.ProjectWithDistinctToAggregate; +import org.apache.doris.nereids.rules.analysis.QualifyToFilter; import org.apache.doris.nereids.rules.analysis.ReplaceExpressionByChildOutput; import org.apache.doris.nereids.rules.analysis.SubqueryToApply; import org.apache.doris.nereids.rules.analysis.VariableToLiteral; @@ -125,6 +127,7 @@ public class Analyzer extends AbstractBatchJobExecutor { topDown(new BindSink()), bottomUp(new CheckAfterBind()), bottomUp(new AddInitMaterializationHook()), + topDown(new FillUpQualifyMissingSlot()), bottomUp( new ProjectToGlobalAggregate(), // this rule check's the logicalProject node's isDistinct property @@ -165,6 +168,7 @@ public class Analyzer extends AbstractBatchJobExecutor { topDown(new SimplifyAggGroupBy()), topDown(new NormalizeAggregate()), topDown(new HavingToFilter()), + topDown(new QualifyToFilter()), bottomUp(new SemiJoinCommute()), bottomUp( new CollectSubQueryAlias(), diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java index a15966ed55f..b5e6d928d6c 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java @@ -154,6 +154,7 @@ import org.apache.doris.nereids.DorisParser.PropertyItemListContext; import org.apache.doris.nereids.DorisParser.PropertyKeyContext; import org.apache.doris.nereids.DorisParser.PropertyValueContext; import org.apache.doris.nereids.DorisParser.QualifiedNameContext; +import org.apache.doris.nereids.DorisParser.QualifyClauseContext; import org.apache.doris.nereids.DorisParser.QueryContext; import org.apache.doris.nereids.DorisParser.QueryOrganizationContext; import org.apache.doris.nereids.DorisParser.QueryTermContext; @@ -484,6 +485,7 @@ import org.apache.doris.nereids.trees.plans.logical.LogicalJoin; import org.apache.doris.nereids.trees.plans.logical.LogicalLimit; import org.apache.doris.nereids.trees.plans.logical.LogicalPlan; import org.apache.doris.nereids.trees.plans.logical.LogicalProject; +import org.apache.doris.nereids.trees.plans.logical.LogicalQualify; import org.apache.doris.nereids.trees.plans.logical.LogicalRepeat; import org.apache.doris.nereids.trees.plans.logical.LogicalSelectHint; import org.apache.doris.nereids.trees.plans.logical.LogicalSink; @@ -1459,7 +1461,8 @@ public class LogicalPlanBuilder extends DorisParserBaseVisitor<Object> { selectCtx, Optional.ofNullable(ctx.whereClause()), Optional.ofNullable(ctx.aggClause()), - Optional.ofNullable(ctx.havingClause())); + Optional.ofNullable(ctx.havingClause()), + Optional.ofNullable(ctx.qualifyClause())); selectPlan = withQueryOrganization(selectPlan, ctx.queryOrganization()); if ((selectHintMap == null) || selectHintMap.isEmpty()) { return selectPlan; @@ -3162,24 +3165,32 @@ public class LogicalPlanBuilder extends DorisParserBaseVisitor<Object> { SelectClauseContext selectClause, Optional<WhereClauseContext> whereClause, Optional<AggClauseContext> aggClause, - Optional<HavingClauseContext> havingClause) { + Optional<HavingClauseContext> havingClause, + Optional<QualifyClauseContext> qualifyClause) { return ParserUtils.withOrigin(ctx, () -> { // from -> where -> group by -> having -> select LogicalPlan filter = withFilter(inputRelation, whereClause); SelectColumnClauseContext selectColumnCtx = selectClause.selectColumnClause(); LogicalPlan aggregate = withAggregate(filter, selectColumnCtx, aggClause); boolean isDistinct = (selectClause.DISTINCT() != null); + LogicalPlan selectPlan; if (!(aggregate instanceof Aggregate) && havingClause.isPresent()) { // create a project node for pattern match of ProjectToGlobalAggregate rule // then ProjectToGlobalAggregate rule can insert agg node as LogicalHaving node's child List<NamedExpression> projects = getNamedExpressions(selectColumnCtx.namedExpressionSeq()); LogicalPlan project = new LogicalProject<>(projects, isDistinct, aggregate); - return new LogicalHaving<>(ExpressionUtils.extractConjunctionToSet( + selectPlan = new LogicalHaving<>(ExpressionUtils.extractConjunctionToSet( getExpression((havingClause.get().booleanExpression()))), project); } else { LogicalPlan having = withHaving(aggregate, havingClause); - return withProjection(having, selectColumnCtx, aggClause, isDistinct); + selectPlan = withProjection(having, selectColumnCtx, aggClause, isDistinct); + } + // support qualify clause + if (qualifyClause.isPresent()) { + Expression qualifyExpr = getExpression(qualifyClause.get().booleanExpression()); + selectPlan = new LogicalQualify<>(Sets.newHashSet(qualifyExpr), selectPlan); } + return selectPlan; }); } @@ -3387,7 +3398,7 @@ public class LogicalPlanBuilder extends DorisParserBaseVisitor<Object> { } protected LogicalPlan withProjection(LogicalPlan input, SelectColumnClauseContext selectCtx, - Optional<AggClauseContext> aggCtx, boolean isDistinct) { + Optional<AggClauseContext> aggCtx, boolean isDistinct) { return ParserUtils.withOrigin(selectCtx, () -> { if (aggCtx.isPresent()) { if (isDistinct) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/RuleType.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/RuleType.java index 08fbc9aafec..dbf96ef2f1f 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/RuleType.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/RuleType.java @@ -43,6 +43,9 @@ public enum RuleType { BINDING_USING_JOIN_SLOT(RuleTypeClass.REWRITE), BINDING_JOIN_SLOT(RuleTypeClass.REWRITE), BINDING_FILTER_SLOT(RuleTypeClass.REWRITE), + BINDING_QUALIFY_PROJECT_SLOT(RuleTypeClass.REWRITE), + BINDING_QUALIFY_AGGREGATE_SLOT(RuleTypeClass.REWRITE), + BINDING_QUALIFY_HAVING_SLOT(RuleTypeClass.REWRITE), BINDING_AGGREGATE_SLOT(RuleTypeClass.REWRITE), BINDING_REPEAT_SLOT(RuleTypeClass.REWRITE), BINDING_HAVING_SLOT(RuleTypeClass.REWRITE), @@ -67,11 +70,16 @@ public enum RuleType { FILL_UP_SORT_HAVING_PROJECT(RuleTypeClass.REWRITE), FILL_UP_SORT_HAVING_AGGREGATE(RuleTypeClass.REWRITE), FILL_UP_SORT_PROJECT(RuleTypeClass.REWRITE), + FILL_UP_QUALIFY_PROJECT(RuleTypeClass.REWRITE), + FILL_UP_QUALIFY_AGGREGATE(RuleTypeClass.REWRITE), + FILL_UP_QUALIFY_HAVING_AGGREGATE(RuleTypeClass.REWRITE), + FILL_UP_QUALIFY_HAVING_PROJECT(RuleTypeClass.REWRITE), RESOLVE_PROJECT_ALIAS(RuleTypeClass.REWRITE), RESOLVE_AGGREGATE_ALIAS(RuleTypeClass.REWRITE), PROJECT_TO_GLOBAL_AGGREGATE(RuleTypeClass.REWRITE), HAVING_TO_FILTER(RuleTypeClass.REWRITE), + QUALIFY_TO_FILTER(RuleTypeClass.REWRITE), ONE_ROW_RELATION_EXTRACT_AGGREGATE(RuleTypeClass.REWRITE), PROJECT_WITH_DISTINCT_TO_AGGREGATE(RuleTypeClass.REWRITE), AVG_DISTINCT_TO_SUM_DIV_COUNT(RuleTypeClass.REWRITE), diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/BindExpression.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/BindExpression.java index bebe2702cc5..9cdda51e476 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/BindExpression.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/BindExpression.java @@ -80,6 +80,7 @@ import org.apache.doris.nereids.trees.plans.logical.LogicalJoin; import org.apache.doris.nereids.trees.plans.logical.LogicalOneRowRelation; import org.apache.doris.nereids.trees.plans.logical.LogicalPlan; import org.apache.doris.nereids.trees.plans.logical.LogicalProject; +import org.apache.doris.nereids.trees.plans.logical.LogicalQualify; import org.apache.doris.nereids.trees.plans.logical.LogicalRepeat; import org.apache.doris.nereids.trees.plans.logical.LogicalResultSink; import org.apache.doris.nereids.trees.plans.logical.LogicalSetOperation; @@ -184,6 +185,15 @@ public class BindExpression implements AnalysisRuleFactory { RuleType.BINDING_HAVING_SLOT.build( logicalHaving(any().whenNot(Aggregate.class::isInstance)).thenApply(this::bindHaving) ), + RuleType.BINDING_QUALIFY_PROJECT_SLOT.build( + logicalQualify(logicalProject()).thenApply(this::bindQualifyProject) + ), + RuleType.BINDING_QUALIFY_AGGREGATE_SLOT.build( + logicalQualify(aggregate()).thenApply(this::bindQualifyAggregate) + ), + RuleType.BINDING_QUALIFY_HAVING_SLOT.build( + logicalQualify(logicalHaving()).thenApply(this::bindQualifyHaving) + ), RuleType.BINDING_INLINE_TABLE_SLOT.build( logicalInlineTable().thenApply(this::bindInlineTable) ), @@ -693,6 +703,191 @@ public class BindExpression implements AnalysisRuleFactory { return new LogicalFilter<>(boundConjuncts.build(), filter.child()); } + /** + * there a dup table sales + * CREATE TABLE sales ( + * year INT, + * country STRING, + * product STRING, + * profit INT + * ) + * DISTRIBUTED BY HASH(`year`) + * PROPERTIES ( + * "replication_num" = "1" + * ); + * 1.qualify -> project + * for example : + * select year + 1 as year from sales qualify row_number() over (order by year, country) = 1; + * We are binding the year field of table sales. Instead of renaming year + * ----------------------------------------------------------------------------------------------------------------- + * 2.qualify -> project(distinct) + * for example: + * select distinct year + 1, country from sales qualify row_number() over (order by year + 1) > 1; + * We are binding the year field of table sales. + * ----------------------------------------------------------------------------------------------------------------- + * 3.qualify -> project(distinct) -> agg + * for example: + * select distinct year + 1 as year from sales group by year qualify row_number() over (order by year) = 1; + * We are binding the year field of group by output. Instead of renaming year + * ----------------------------------------------------------------------------------------------------------------- + * 4.qualify -> project(distinct) -> having -> agg + * for example: + * select distinct year,country from sales group by year,country having year > 2000 + * qualify row_number() over (order by year + 1) > 1; + * We are binding the year field of group output. + *----------------------------------------------------------------------------------------------------------------- + * Note: For the query without agg, we first bind slot from the child of the project. + * If it cannot be bound in the child, then bind slot from the project. + * If query with agg, we bind slot from the group by first. if not then bind slot from the group output + * or not bind slot from the agg child output finally. + */ + private Plan bindQualifyProject(MatchingContext<LogicalQualify<LogicalProject<Plan>>> ctx) { + LogicalQualify<LogicalProject<Plan>> qualify = ctx.root; + CascadesContext cascadesContext = ctx.cascadesContext; + LogicalProject<Plan> project = qualify.child(); + ImmutableSet.Builder<Expression> boundConjuncts = ImmutableSet.builderWithExpectedSize( + qualify.getConjuncts().size()); + if (project.child() instanceof Aggregate) { + Aggregate<Plan> aggregate = (Aggregate<Plan>) project.child(); + bindQualifyByAggregate(aggregate, cascadesContext, qualify, boundConjuncts); + } else if (project.child() instanceof LogicalHaving) { + LogicalHaving<Plan> having = (LogicalHaving<Plan>) project.child(); + if (having.child() instanceof Aggregate) { + Aggregate<Plan> aggregate = (Aggregate<Plan>) having.child(); + bindQualifyByAggregate(aggregate, cascadesContext, qualify, boundConjuncts); + } else { + throw new AnalysisException("unknown query structure"); + } + } else { + bindQualifyByProject(project, cascadesContext, qualify, boundConjuncts); + } + return new LogicalQualify<>(boundConjuncts.build(), qualify.child()); + } + + /** + * 1.qualify -> having -> agg + * for example: + * select country, sum(profit) as total, row_number() over (order by country) as rk from sales where year >= 2000 + * group by country having sum(profit) > 100 qualify rk = 1 + * We are binding the country field from group by. + * ----------------------------------------------------------------------------------------------------------------- + * 2.qualify -> having -> project + * for example: + * select year, country, profit, row_number() over (partition by year, country order by profit desc) as rk from + * (select * from sales) a where year >= 2000 having profit > 200 qualify rk = 1 order by profit,country limit 3 + * We are binding year/country/profit from sales + * ----------------------------------------------------------------------------------------------------------------- + * 3.qualify -> having -> project(distinct) + * for example: + * select distinct year + 1 as year from sales qualify row_number() over (order by year) = 1; + * we are binding year from sales. Instead of renaming year + */ + private Plan bindQualifyHaving(MatchingContext<LogicalQualify<LogicalHaving<Plan>>> ctx) { + LogicalQualify<LogicalHaving<Plan>> qualify = ctx.root; + CascadesContext cascadesContext = ctx.cascadesContext; + LogicalHaving<Plan> having = qualify.child(); + ImmutableSet.Builder<Expression> boundConjuncts = ImmutableSet.builderWithExpectedSize( + qualify.getConjuncts().size()); + if (having.child() instanceof Aggregate) { + bindQualifyByAggregate((Aggregate<? extends Plan>) having.child(), cascadesContext, qualify, + boundConjuncts); + } else { + bindQualifyByProject((LogicalProject<? extends Plan>) having.child(), cascadesContext, qualify, + boundConjuncts); + } + return new LogicalQualify<>(boundConjuncts.build(), qualify.child()); + } + + /** + * qualify -> agg + * for example: + * select country, sum(profit) as total, row_number() over (order by country) as rk from sales qualify rk > 1 + * we are binding the country field from group by. + */ + private Plan bindQualifyAggregate(MatchingContext<LogicalQualify<Aggregate<Plan>>> ctx) { + LogicalQualify<Aggregate<Plan>> qualify = ctx.root; + CascadesContext cascadesContext = ctx.cascadesContext; + Aggregate<Plan> aggregate = qualify.child(); + ImmutableSet.Builder<Expression> boundConjuncts = ImmutableSet.builderWithExpectedSize( + qualify.getConjuncts().size()); + bindQualifyByAggregate(aggregate, cascadesContext, qualify, boundConjuncts); + return new LogicalQualify<>(boundConjuncts.build(), qualify.child()); + } + + private void bindQualifyByProject(LogicalProject<? extends Plan> project, CascadesContext cascadesContext, + LogicalQualify<? extends Plan> qualify, + ImmutableSet.Builder<Expression> boundConjuncts) { + Supplier<Scope> defaultScope = Suppliers.memoize(() -> + toScope(cascadesContext, PlanUtils.fastGetChildrenOutputs(project.children())) + ); + Scope backupScope = toScope(cascadesContext, project.getOutput()); + + SimpleExprAnalyzer analyzer = buildCustomSlotBinderAnalyzer( + qualify, cascadesContext, defaultScope.get(), true, true, + (self, unboundSlot) -> { + List<Slot> slots = self.bindSlotByScope(unboundSlot, defaultScope.get()); + if (!slots.isEmpty()) { + return slots; + } + return self.bindSlotByScope(unboundSlot, backupScope); + }); + + for (Expression conjunct : qualify.getConjuncts()) { + conjunct = analyzer.analyze(conjunct); + conjunct = TypeCoercionUtils.castIfNotSameType(conjunct, BooleanType.INSTANCE); + boundConjuncts.add(conjunct); + } + } + + private void bindQualifyByAggregate(Aggregate<? extends Plan> aggregate, CascadesContext cascadesContext, + LogicalQualify<? extends Plan> qualify, + ImmutableSet.Builder<Expression> boundConjuncts) { + Supplier<CustomSlotBinderAnalyzer> bindByAggChild = Suppliers.memoize(() -> { + Scope aggChildOutputScope + = toScope(cascadesContext, PlanUtils.fastGetChildrenOutputs(aggregate.children())); + return (analyzer, unboundSlot) -> analyzer.bindSlotByScope(unboundSlot, aggChildOutputScope); + }); + Scope aggOutputScope = toScope(cascadesContext, aggregate.getOutput()); + Supplier<CustomSlotBinderAnalyzer> bindByGroupByThenAggOutputThenAggChildOutput = Suppliers.memoize(() -> { + List<Expression> groupByExprs = aggregate.getGroupByExpressions(); + ImmutableList.Builder<Slot> groupBySlots = ImmutableList.builderWithExpectedSize(groupByExprs.size()); + for (Expression groupBy : groupByExprs) { + if (groupBy instanceof Slot) { + groupBySlots.add((Slot) groupBy); + } + } + Scope groupBySlotsScope = toScope(cascadesContext, groupBySlots.build()); + + return (analyzer, unboundSlot) -> { + List<Slot> boundInGroupBy = analyzer.bindSlotByScope(unboundSlot, groupBySlotsScope); + if (!boundInGroupBy.isEmpty()) { + return ImmutableList.of(boundInGroupBy.get(0)); + } + List<Slot> boundInAggOutput = analyzer.bindSlotByScope(unboundSlot, aggOutputScope); + if (!boundInAggOutput.isEmpty()) { + return ImmutableList.of(boundInAggOutput.get(0)); + } + List<? extends Expression> expressions = bindByAggChild.get().bindSlot(analyzer, unboundSlot); + return expressions.isEmpty() ? expressions : ImmutableList.of(expressions.get(0)); + }; + }); + + ExpressionAnalyzer qualifyAnalyzer = new ExpressionAnalyzer(qualify, aggOutputScope, cascadesContext, + true, true) { + @Override + protected List<? extends Expression> bindSlotByThisScope(UnboundSlot unboundSlot) { + return bindByGroupByThenAggOutputThenAggChildOutput.get().bindSlot(this, unboundSlot); + } + }; + + ExpressionRewriteContext rewriteContext = new ExpressionRewriteContext(cascadesContext); + for (Expression expression : qualify.getConjuncts()) { + Expression boundConjunct = qualifyAnalyzer.analyze(expression, rewriteContext); + boundConjunct = TypeCoercionUtils.castIfNotSameType(boundConjunct, BooleanType.INSTANCE); + boundConjuncts.add(boundConjunct); + } + } + private List<Slot> exceptStarSlots(Set<NamedExpression> boundExcepts, BoundStar boundStar) { List<Slot> slots = boundStar.getSlots(); if (!boundExcepts.isEmpty()) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/FillUpMissingSlots.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/FillUpMissingSlots.java index c55ed5957ba..c392b85317d 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/FillUpMissingSlots.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/FillUpMissingSlots.java @@ -28,6 +28,7 @@ import org.apache.doris.nereids.trees.expressions.Expression; import org.apache.doris.nereids.trees.expressions.NamedExpression; import org.apache.doris.nereids.trees.expressions.Slot; import org.apache.doris.nereids.trees.expressions.SlotReference; +import org.apache.doris.nereids.trees.expressions.WindowExpression; import org.apache.doris.nereids.trees.expressions.functions.agg.AggregateFunction; import org.apache.doris.nereids.trees.plans.Plan; import org.apache.doris.nereids.trees.plans.algebra.Aggregate; @@ -245,7 +246,7 @@ public class FillUpMissingSlots implements AnalysisRuleFactory { having.withChildren(new LogicalProject<>(projects, project.child()))); } }) - ) + ) ); } @@ -316,6 +317,8 @@ public class FillUpMissingSlots implements AnalysisRuleFactory { + expression.toSql() + "."); } generateAliasForNewOutputSlots(expression); + } else if (expression instanceof WindowExpression) { + generateAliasForNewOutputSlots(expression); } else { // Try to resolve the children. for (Expression child : expression.children()) { @@ -387,7 +390,7 @@ public class FillUpMissingSlots implements AnalysisRuleFactory { Plan apply(Resolver resolver, Aggregate<?> aggregate); } - private Plan createPlan(Resolver resolver, Aggregate<? extends Plan> aggregate, PlanGenerator planGenerator) { + protected Plan createPlan(Resolver resolver, Aggregate<? extends Plan> aggregate, PlanGenerator planGenerator) { Aggregate<? extends Plan> newAggregate; if (resolver.getNewOutputSlots().isEmpty()) { newAggregate = aggregate; diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/FillUpQualifyMissingSlot.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/FillUpQualifyMissingSlot.java new file mode 100644 index 00000000000..bb998832547 --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/FillUpQualifyMissingSlot.java @@ -0,0 +1,272 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.nereids.rules.analysis; + +import org.apache.doris.nereids.exceptions.AnalysisException; +import org.apache.doris.nereids.rules.Rule; +import org.apache.doris.nereids.rules.RuleType; +import org.apache.doris.nereids.trees.expressions.Alias; +import org.apache.doris.nereids.trees.expressions.Expression; +import org.apache.doris.nereids.trees.expressions.NamedExpression; +import org.apache.doris.nereids.trees.expressions.Slot; +import org.apache.doris.nereids.trees.expressions.SlotReference; +import org.apache.doris.nereids.trees.expressions.WindowExpression; +import org.apache.doris.nereids.trees.expressions.visitor.DefaultExpressionRewriter; +import org.apache.doris.nereids.trees.expressions.visitor.DefaultExpressionVisitor; +import org.apache.doris.nereids.trees.plans.Plan; +import org.apache.doris.nereids.trees.plans.algebra.Aggregate; +import org.apache.doris.nereids.trees.plans.logical.LogicalAggregate; +import org.apache.doris.nereids.trees.plans.logical.LogicalHaving; +import org.apache.doris.nereids.trees.plans.logical.LogicalProject; +import org.apache.doris.nereids.trees.plans.logical.LogicalQualify; +import org.apache.doris.nereids.trees.plans.visitor.DefaultPlanVisitor; +import org.apache.doris.nereids.util.ExpressionUtils; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.Lists; + +import java.util.HashSet; +import java.util.List; +import java.util.Set; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.stream.Collectors; + +/** + * We don't fill the missing slots in FillUpMissingSlots. + * Because for distinct queries, + * for example: + * select distinct year,country from sales having year > 2000 qualify row_number() over (order by year + 1) > 1; + * It would be converted into the form of agg. + * before logical plan: + * qualify + * | + * project(distinct) + * | + * scan + * apply ProjectWithDistinctToAggregate rule + * after logical plan: + * qualify + * | + * agg + * | + * scan + * if fill the missing slots in FillUpMissingSlots(after ProjectWithDistinctToAggregate). qualify could hardly be + * pushed under the agg of distinct. + * But apply FillUpQualifyMissingSlot rule before ProjectWithDistinctToAggregate + * logical plan: + * project(distinct) + * | + * qualify + * | + * project + * | + * scan + * and then apply ProjectWithDistinctToAggregate rule + * logical plan: + * agg + * | + * qualify + * | + * project + * | + * scan + * So it is easy to handle. + */ +public class FillUpQualifyMissingSlot extends FillUpMissingSlots { + @Override + public List<Rule> buildRules() { + return ImmutableList.of( + /* + qualify -> project + qualify -> project(distinct) + qualify -> project(distinct) -> agg + qualify -> project(distinct) -> having -> agg + */ + RuleType.FILL_UP_QUALIFY_PROJECT.build( + logicalQualify(logicalProject()) + .then(qualify -> { + checkWindow(qualify); + LogicalProject<Plan> project = qualify.child(); + return createPlan(project, qualify.getConjuncts(), (newConjuncts, projects) -> { + LogicalProject<Plan> bottomProject = new LogicalProject<>(projects, project.child()); + LogicalQualify<Plan> logicalQualify = new LogicalQualify<>(newConjuncts, bottomProject); + ImmutableList<NamedExpression> copyOutput = ImmutableList.copyOf(project.getOutput()); + return new LogicalProject<>(copyOutput, project.isDistinct(), logicalQualify); + }); + }) + ), + /* + qualify -> agg + */ + RuleType.FILL_UP_QUALIFY_AGGREGATE.build( + logicalQualify(aggregate()).then(qualify -> { + checkWindow(qualify); + Aggregate<Plan> agg = qualify.child(); + Resolver resolver = new Resolver(agg); + qualify.getConjuncts().forEach(resolver::resolve); + return createPlan(resolver, agg, (r, a) -> { + Set<Expression> newConjuncts = ExpressionUtils.replace( + qualify.getConjuncts(), r.getSubstitution()); + boolean notChanged = newConjuncts.equals(qualify.getConjuncts()); + if (notChanged && a.equals(agg)) { + return null; + } + return notChanged ? qualify.withChildren(a) : new LogicalQualify<>(newConjuncts, a); + }); + }) + ), + /* + qualify -> having -> agg + */ + RuleType.FILL_UP_QUALIFY_HAVING_AGGREGATE.build( + logicalQualify(logicalHaving(aggregate())).then(qualify -> { + checkWindow(qualify); + LogicalHaving<Aggregate<Plan>> having = qualify.child(); + Aggregate<Plan> agg = qualify.child().child(); + Resolver resolver = new Resolver(agg); + qualify.getConjuncts().forEach(resolver::resolve); + return createPlan(resolver, agg, (r, a) -> { + Set<Expression> newConjuncts = ExpressionUtils.replace( + qualify.getConjuncts(), r.getSubstitution()); + boolean notChanged = newConjuncts.equals(qualify.getConjuncts()); + if (notChanged && a.equals(agg)) { + return null; + } + return notChanged ? qualify.withChildren(having.withChildren(a)) : + new LogicalQualify<>(newConjuncts, having.withChildren(a)); + }); + }) + ), + /* + qualify -> having -> project + qualify -> having -> project(distinct) + */ + RuleType.FILL_UP_QUALIFY_HAVING_PROJECT.build( + logicalQualify(logicalHaving(logicalProject())).then(qualify -> { + checkWindow(qualify); + LogicalHaving<LogicalProject<Plan>> having = qualify.child(); + LogicalProject<Plan> project = qualify.child().child(); + return createPlan(project, qualify.getConjuncts(), (newConjuncts, projects) -> { + ImmutableList<NamedExpression> copyOutput = ImmutableList.copyOf(project.getOutput()); + if (project.isDistinct()) { + Set<Slot> missingSlots = having.getExpressions().stream() + .map(Expression::getInputSlots) + .flatMap(Set::stream) + .filter(s -> !projects.contains(s)) + .collect(Collectors.toSet()); + List<NamedExpression> output = ImmutableList.<NamedExpression>builder() + .addAll(projects).addAll(missingSlots).build(); + LogicalQualify<LogicalProject<Plan>> logicalQualify = + new LogicalQualify<>(newConjuncts, new LogicalProject<>(output, project.child())); + return having.withChildren(project.withProjects(copyOutput).withChildren(logicalQualify)); + } else { + return new LogicalProject<>(copyOutput, new LogicalQualify<>(newConjuncts, + having.withChildren(project.withProjects(projects)))); + } + }); + }) + ) + ); + } + + interface PlanGenerator { + Plan apply(Set<Expression> newConjuncts, List<NamedExpression> projects); + } + + private Plan createPlan(LogicalProject<Plan> project, Set<Expression> conjuncts, PlanGenerator planGenerator) { + Set<Slot> projectOutputSet = project.getOutputSet(); + List<NamedExpression> newOutputSlots = Lists.newArrayList(); + Set<Expression> newConjuncts = new HashSet<>(); + for (Expression conjunct : conjuncts) { + conjunct = conjunct.accept(new DefaultExpressionRewriter<List<NamedExpression>>() { + @Override + public Expression visitWindow(WindowExpression window, List<NamedExpression> context) { + Alias alias = new Alias(window); + context.add(alias); + return alias.toSlot(); + } + }, newOutputSlots); + newConjuncts.add(conjunct); + } + Set<Slot> notExistedInProject = conjuncts.stream() + .map(Expression::getInputSlots) + .flatMap(Set::stream) + .filter(s -> !projectOutputSet.contains(s)) + .collect(Collectors.toSet()); + + newOutputSlots.addAll(notExistedInProject); + if (newOutputSlots.isEmpty()) { + return null; + } + List<NamedExpression> projects = ImmutableList.<NamedExpression>builder() + .addAll(project.getProjects()) + .addAll(newOutputSlots).build(); + + return planGenerator.apply(newConjuncts, projects); + } + + private void checkWindow(LogicalQualify<? extends Plan> qualify) throws AnalysisException { + Set<SlotReference> inputSlots = new HashSet<>(); + AtomicBoolean hasWindow = new AtomicBoolean(false); + for (Expression conjunct : qualify.getConjuncts()) { + conjunct.accept(new DefaultExpressionVisitor<Void, Set<SlotReference>>() { + @Override + public Void visitWindow(WindowExpression windowExpression, Set<SlotReference> context) { + hasWindow.set(true); + return null; + } + + @Override + public Void visitSlotReference(SlotReference slotReference, Set<SlotReference> context) { + context.add(slotReference); + return null; + } + + }, inputSlots); + } + if (hasWindow.get()) { + return; + } + qualify.accept(new DefaultPlanVisitor<Void, Void>() { + private void findWindow(List<NamedExpression> namedExpressions) { + for (NamedExpression slot : namedExpressions) { + if (slot instanceof Alias && slot.child(0) instanceof WindowExpression) { + if (inputSlots.contains(slot.toSlot())) { + hasWindow.set(true); + } + } + } + } + + @Override + public Void visitLogicalProject(LogicalProject<? extends Plan> project, Void context) { + findWindow(project.getProjects()); + return visit(project, context); + } + + @Override + public Void visitLogicalAggregate(LogicalAggregate<? extends Plan> aggregate, Void context) { + findWindow(aggregate.getOutputExpressions()); + return visit(aggregate, context); + } + }, null); + if (!hasWindow.get()) { + throw new AnalysisException("qualify only used for window expression"); + } + } +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/QualifyToFilter.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/QualifyToFilter.java new file mode 100644 index 00000000000..8bd933010ac --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/QualifyToFilter.java @@ -0,0 +1,34 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.nereids.rules.analysis; + +import org.apache.doris.nereids.rules.Rule; +import org.apache.doris.nereids.rules.RuleType; +import org.apache.doris.nereids.trees.plans.logical.LogicalFilter; + +/** + * qualify to filter. + */ +public class QualifyToFilter extends OneAnalysisRuleFactory { + @Override + public Rule build() { + return logicalQualify() + .then(qualify -> new LogicalFilter<>(qualify.getConjuncts(), qualify.child())) + .toRule(RuleType.QUALIFY_TO_FILTER); + } +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/PlanType.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/PlanType.java index 988ca381304..b87dfaf08ae 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/PlanType.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/PlanType.java @@ -75,6 +75,7 @@ public enum PlanType { LOGICAL_MULTI_JOIN, LOGICAL_PARTITION_TOP_N, LOGICAL_PROJECT, + LOGICAL_QUALIFY, LOGICAL_REPEAT, LOGICAL_SELECT_HINT, LOGICAL_SUBQUERY_ALIAS, diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalQualify.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalQualify.java new file mode 100644 index 00000000000..ced6730dfb5 --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalQualify.java @@ -0,0 +1,154 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.nereids.trees.plans.logical; + +import org.apache.doris.common.Pair; +import org.apache.doris.nereids.memo.GroupExpression; +import org.apache.doris.nereids.properties.DataTrait.Builder; +import org.apache.doris.nereids.properties.LogicalProperties; +import org.apache.doris.nereids.trees.expressions.Expression; +import org.apache.doris.nereids.trees.expressions.Slot; +import org.apache.doris.nereids.trees.plans.Plan; +import org.apache.doris.nereids.trees.plans.PlanType; +import org.apache.doris.nereids.trees.plans.algebra.Filter; +import org.apache.doris.nereids.trees.plans.visitor.PlanVisitor; +import org.apache.doris.nereids.util.ExpressionUtils; +import org.apache.doris.nereids.util.Utils; + +import com.google.common.base.Preconditions; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableSet; + +import java.util.List; +import java.util.Objects; +import java.util.Optional; +import java.util.Set; + +/** + * Logical qualify plan. + */ +public class LogicalQualify<CHILD_TYPE extends Plan> extends LogicalUnary<CHILD_TYPE> implements Filter { + + private final Set<Expression> conjuncts; + + public LogicalQualify(Set<Expression> conjuncts, CHILD_TYPE child) { + this(conjuncts, Optional.empty(), Optional.empty(), child); + } + + private LogicalQualify(Set<Expression> conjuncts, Optional<GroupExpression> groupExpression, + Optional<LogicalProperties> logicalProperties, CHILD_TYPE child) { + super(PlanType.LOGICAL_QUALIFY, groupExpression, logicalProperties, child); + this.conjuncts = ImmutableSet.copyOf(Objects.requireNonNull(conjuncts, "conjuncts can not be null")); + } + + @Override + public Set<Expression> getConjuncts() { + return conjuncts; + } + + @Override + public List<Slot> computeOutput() { + return child().getOutput(); + } + + @Override + public Plan withGroupExpression(Optional<GroupExpression> groupExpression) { + return new LogicalQualify<>(conjuncts, groupExpression, Optional.of(getLogicalProperties()), child()); + } + + @Override + public Plan withGroupExprLogicalPropChildren(Optional<GroupExpression> groupExpression, + Optional<LogicalProperties> logicalProperties, List<Plan> children) { + Preconditions.checkArgument(children.size() == 1); + return new LogicalQualify<>(conjuncts, groupExpression, logicalProperties, children.get(0)); + } + + public LogicalQualify<Plan> withConjuncts(Set<Expression> conjuncts) { + return new LogicalQualify<>(conjuncts, Optional.empty(), Optional.of(getLogicalProperties()), child()); + } + + @Override + public String toString() { + return Utils.toSqlString("LogicalQualify[" + id.asInt() + "]", + "predicates", getPredicate() + ); + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + LogicalQualify that = (LogicalQualify) o; + return conjuncts.equals(that.conjuncts); + } + + @Override + public int hashCode() { + return Objects.hash(conjuncts); + } + + @Override + public <R, C> R accept(PlanVisitor<R, C> visitor, C context) { + return visitor.visitLogicalQualify(this, context); + } + + @Override + public List<? extends Expression> getExpressions() { + return ImmutableList.copyOf(conjuncts); + } + + @Override + public LogicalQualify<Plan> withChildren(List<Plan> children) { + Preconditions.checkArgument(children.size() == 1); + return new LogicalQualify<>(conjuncts, children.get(0)); + } + + @Override + public void computeUnique(Builder builder) { + builder.addUniqueSlot(child(0).getLogicalProperties().getTrait()); + } + + @Override + public void computeUniform(Builder builder) { + for (Expression e : getConjuncts()) { + Set<Slot> uniformSlots = ExpressionUtils.extractUniformSlot(e); + for (Slot slot : uniformSlots) { + builder.addUniformSlot(slot); + } + } + builder.addUniformSlot(child(0).getLogicalProperties().getTrait()); + } + + @Override + public void computeEqualSet(Builder builder) { + builder.addEqualSet(child().getLogicalProperties().getTrait()); + for (Expression expression : getConjuncts()) { + Optional<Pair<Slot, Slot>> equalSlot = ExpressionUtils.extractEqualSlot(expression); + equalSlot.ifPresent(slotSlotPair -> builder.addEqualPair(slotSlotPair.first, slotSlotPair.second)); + } + } + + @Override + public void computeFd(Builder builder) { + builder.addFuncDepsDG(child().getLogicalProperties().getTrait()); + } +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/visitor/PlanVisitor.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/visitor/PlanVisitor.java index 8db1407220c..396c6e4f265 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/visitor/PlanVisitor.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/visitor/PlanVisitor.java @@ -39,6 +39,7 @@ import org.apache.doris.nereids.trees.plans.logical.LogicalJoin; import org.apache.doris.nereids.trees.plans.logical.LogicalLimit; import org.apache.doris.nereids.trees.plans.logical.LogicalPartitionTopN; import org.apache.doris.nereids.trees.plans.logical.LogicalProject; +import org.apache.doris.nereids.trees.plans.logical.LogicalQualify; import org.apache.doris.nereids.trees.plans.logical.LogicalRelation; import org.apache.doris.nereids.trees.plans.logical.LogicalRepeat; import org.apache.doris.nereids.trees.plans.logical.LogicalSelectHint; @@ -169,6 +170,10 @@ public abstract class PlanVisitor<R, C> implements CommandVisitor<R, C>, Relatio return visit(filter, context); } + public R visitLogicalQualify(LogicalQualify<? extends Plan> filter, C context) { + return visit(filter, context); + } + public R visitLogicalGenerate(LogicalGenerate<? extends Plan> generate, C context) { return visit(generate, context); } diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/parser/NereidsParserTest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/parser/NereidsParserTest.java index c8a5364b710..ff9e81f2bf3 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/nereids/parser/NereidsParserTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/parser/NereidsParserTest.java @@ -47,6 +47,7 @@ import org.apache.doris.nereids.types.DecimalV3Type; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; +import java.util.ArrayList; import java.util.List; import java.util.Optional; import java.util.Set; @@ -660,4 +661,47 @@ public class NereidsParserTest extends ParserTestBase { String sql = "create role a comment 'create user'"; nereidsParser.parseSingle(sql); } + + @Test + public void testQualify() { + NereidsParser nereidsParser = new NereidsParser(); + + List<String> sqls = new ArrayList<>(); + sqls.add("select year, country, profit, row_number() over (order by year) as rk from (select * from sales) a where year >= 2000 qualify rk > 1"); + sqls.add("select year, country, profit from (select * from sales) a where year >= 2000 qualify row_number() over (order by year) > 1"); + sqls.add("select year, country, profit from (select * from sales) a where year >= 2000 qualify rank() over (order by year) > 1"); + sqls.add("select year, country, profit from (select * from sales) a where year >= 2000 qualify dense_rank() over (order by year) > 1"); + + sqls.add("select country, sum(profit) as total, row_number() over (order by country) as rk from sales where year >= 2000 group by country having sum(profit) > 100 qualify rk = 1"); + sqls.add("select country, sum(profit) as total from sales where year >= 2000 group by country having sum(profit) > 100 qualify row_number() over (order by country) = 1"); + sqls.add("select country, sum(profit) as total from sales where year >= 2000 group by country having sum(profit) > 100 qualify rank() over (order by country) = 1"); + sqls.add("select country, sum(profit) as total from sales where year >= 2000 group by country having sum(profit) > 100 qualify dense_rank() over (order by country) = 1"); + + sqls.add("select country, sum(profit) as total, row_number() over (order by country) as rk from sales where year >= 2000 group by country qualify rk = 1"); + sqls.add("select country, sum(profit) as total from sales where year >= 2000 group by country qualify row_number() over (order by country) = 1"); + sqls.add("select country, sum(profit) as total from sales where year >= 2000 group by country qualify rank() over (order by country) = 1"); + sqls.add("select country, sum(profit) as total from sales where year >= 2000 group by country qualify dense_rank() over (order by country) = 1"); + + sqls.add("select year, country, product, profit, row_number() over (partition by year, country order by profit desc) as rk from sales where year >= 2000 qualify rk = 1 order by profit"); + sqls.add("select year, country, product, profit from sales where year >= 2000 qualify row_number() over (partition by year, country order by profit desc) = 1 order by profit"); + sqls.add("select year, country, product, profit from sales where year >= 2000 qualify rank() over (partition by year, country order by profit desc) = 1 order by profit"); + sqls.add("select year, country, product, profit from sales where year >= 2000 qualify dense_rank() over (partition by year, country order by profit desc) = 1 order by profit"); + + sqls.add("select year, country, profit, row_number() over (partition by year, country order by profit desc) as rk from (select * from sales) a where year >= 2000 having profit > 200 qualify rk = 1"); + sqls.add("select year, country, profit from (select * from sales) a where year >= 2000 having profit > 200 qualify row_number() over (partition by year, country order by profit desc) = 1"); + sqls.add("select year, country, profit from (select * from sales) a where year >= 2000 having profit > 200 qualify rank() over (partition by year, country order by profit desc) = 1"); + sqls.add("select year, country, profit from (select * from sales) a where year >= 2000 having profit > 200 qualify dense_rank() over (partition by year, country order by profit desc) = 1"); + + sqls.add("select distinct year, row_number() over (order by year) as rk from sales group by year qualify rk = 1"); + sqls.add("select distinct year from sales group by year qualify row_number() over (order by year) = 1"); + sqls.add("select distinct year from sales group by year qualify rank() over (order by year) = 1"); + sqls.add("select distinct year from sales group by year qualify dense_rank() over (order by year) = 1"); + + sqls.add("select year, country, profit from (select year, country, profit from (select year, country, profit, row_number() over (partition by year, country order by profit desc) as rk from (select * from sales) a where year >= 2000 having profit > 200) t where rk = 1) a where year >= 2000 qualify row_number() over (order by profit) = 1"); + sqls.add("select year, country, profit from (select year, country, profit from (select * from sales) a where year >= 2000 having profit > 200 qualify row_number() over (partition by year, country order by profit desc) = 1) a qualify row_number() over (order by profit) = 1"); + + for (String sql : sqls) { + nereidsParser.parseSingle(sql); + } + } } diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/analysis/FillUpMissingSlotsTest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/analysis/FillUpMissingSlotsTest.java index 02f3caffa80..31b135e7fbb 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/analysis/FillUpMissingSlotsTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/analysis/FillUpMissingSlotsTest.java @@ -72,6 +72,16 @@ public class FillUpMissingSlotsTest extends AnalyzeCheckTestBase implements Memo + "DISTRIBUTED BY HASH (pk)\n" + "PROPERTIES(\n" + " 'replication_num' = '1'\n" + + ");", + "CREATE TABLE sales (\n" + + " year INT,\n" + + " country STRING,\n" + + " product STRING,\n" + + " profit INT\n" + + ") \n" + + "DISTRIBUTED BY HASH(`year`)\n" + + "PROPERTIES (\n" + + "\"replication_num\" = \"1\"\n" + ");" ); } @@ -597,4 +607,116 @@ public class FillUpMissingSlotsTest extends AnalyzeCheckTestBase implements Memo PlanChecker.from(connectContext).analyze(sql) .applyBottomUp(new CheckAfterRewrite()); } + + @Test + void testQualify() { + connectContext.getSessionVariable().setDisableNereidsRules("ELIMINATE_AGG_ON_EMPTYRELATION"); + String sql = "select year + 1, country from sales where year >= 2000 qualify row_number() over (order by profit) > 1"; + PlanChecker.from(connectContext).analyze(sql).rewrite().matches( + logicalResultSink( + logicalProject( + logicalFilter( + logicalWindow( + logicalEmptyRelation()) + ).when(filter -> filter.toString().contains("predicates=(row_number() OVER(ORDER BY profit asc null first)#5 > 1)")) + ) + ) + ); + + sql = "select year + 1, country, row_number() over (order by year) as rk from sales where year >= 2000 qualify rk > profit"; + PlanChecker.from(connectContext).analyze(sql).rewrite().matches( + logicalResultSink( + logicalProject( + logicalFilter( + logicalWindow( + logicalEmptyRelation()) + ).when(filter -> filter.toString().contains("predicates=(rk#5 > cast(profit#3 as BIGINT))")) + ) + ) + ); + + sql = "select year + 1, country from sales where year >= 2000 group by year,country qualify rank() over (order by year) > 1"; + PlanChecker.from(connectContext).analyze(sql).rewrite().matches( + logicalResultSink( + logicalProject( + logicalFilter( + logicalWindow( + logicalProject( + logicalAggregate(logicalEmptyRelation()))) + ).when(filter -> filter.toString().contains("predicates=(rank() OVER(ORDER BY year asc null first)#5 > 1)")) + ) + ) + ); + + sql = "select year + 1, country, sum(profit) as total from sales where year >= 2000 group by year,country having sum(profit) > 100 qualify row_number() over (order by year) > 1"; + PlanChecker.from(connectContext).analyze(sql).rewrite().matches( + logicalResultSink( + logicalProject( + logicalFilter( + logicalWindow( + logicalProject( + logicalFilter( + logicalAggregate(logicalEmptyRelation()) + ).when(filter -> filter.toString().contains("predicates=(total#5 > 100)")) + ) + ) + ).when(filter -> filter.toString().contains("predicates=(row_number() OVER(ORDER BY year asc null first)#6 > 1)")) + ) + ) + ); + + sql = "select distinct year + 1,country from sales qualify row_number() over (order by profit + 1) > 1"; + PlanChecker.from(connectContext).analyze(sql).rewrite().matches( + logicalResultSink( + logicalAggregate( + logicalProject( + logicalFilter( + logicalWindow( + logicalEmptyRelation()) + ).when(filter -> filter.toString().contains("predicates=(row_number() OVER(ORDER BY (profit + 1) asc null first)#5 > 1)")) + ) + ) + ) + ); + + sql = "select distinct year + 1 as year,country from sales group by year, country qualify row_number() over (order by year) > 1"; + PlanChecker.from(connectContext).analyze(sql).rewrite().matches( + logicalResultSink( + logicalAggregate( + logicalProject( + logicalFilter( + logicalWindow( + logicalProject(logicalAggregate(logicalEmptyRelation()))) + ).when(filter -> filter.toString().contains("predicates=(row_number() OVER(ORDER BY year asc null first)#5 > 1)")) + ) + ) + ) + ); + + sql = "select distinct year,country,rank() over (order by year) from sales having sum(profit) > 100 qualify row_number() over (order by year) > 1"; + PlanChecker.from(connectContext).analyze(sql).rewrite().matches( + logicalResultSink( + logicalProject( + logicalFilter( + logicalAggregate( + logicalProject( + logicalFilter( + logicalWindow( + logicalEmptyRelation()) + ).when(filter -> filter.toString().contains("predicates=(row_number() OVER(ORDER BY year asc null first)#5 > 1)")) + ) + ) + ).when(filter -> filter.toString().contains("predicates=(sum(profit)#6 > 100)")) + ) + ) + ); + + ExceptionChecker.expectThrowsWithMsg( + AnalysisException.class, + "qualify only used for window expression", + () -> PlanChecker.from(connectContext).analyze( + "select year + 1, country from sales where year >= 2000 qualify year > 1" + ) + ); + } } diff --git a/regression-test/data/query_p0/sql_functions/window_functions/test_qualify_query.out b/regression-test/data/query_p0/sql_functions/window_functions/test_qualify_query.out new file mode 100644 index 00000000000..29dac133106 --- /dev/null +++ b/regression-test/data/query_p0/sql_functions/window_functions/test_qualify_query.out @@ -0,0 +1,122 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !select_1 -- +2001 Finland +2001 Finland +2001 India +2001 India +2001 India +2001 USA +2002 Finland +2002 USA +2002 USA +2002 USA +2002 USA +2002 USA + +-- !select_4 -- +2000 USA 1502 1 + +-- !select_5 -- +2000 Finland Computer 1501 1 +2000 India Computer 1201 1 +2000 USA Computer 1502 1 + +-- !select_6 -- +2000 Finland 1501 1 +2000 India 1201 1 +2000 USA 1502 1 +2001 USA 1503 1 + +-- !select_7 -- +2000 India 1201 + +-- !select_8 -- +2000 India 1201 + +-- !select_9 -- +Finland Phone 11 1 + +-- !select_10 -- +Finland Phone 11 + +-- !select_12 -- +2001 India 1201 1 +2001 Finland 1501 1 +2001 usa 1502 1 +2002 usa 1503 1 + +-- !select_13 -- +2001 India 1201 +2001 Finland 1501 +2001 usa 1502 +2002 usa 1503 + +-- !select_14 -- +2000 USA Computer 1502 +2001 USA Computer 1503 + +-- !select_15 -- +2000 USA Computer 1502 1 +2001 USA Computer 1503 1 + +-- !select_16 -- + +-- !select_17 -- + +-- !select_18 -- + +-- !select_19 -- + +-- !select_20 -- + +-- !select_21 -- +2001 Finland 10 1 +2001 USA 50 2 +2000 India 75 3 + +-- !select_22 -- +2001 Finland 10 1 + +-- !select_23 -- + +-- !select_24 -- +2001 Finland 1601 + +-- !select_25 -- +2000 Finland 1501 +2000 India 1201 +2000 USA 1502 +2001 Finland 10 +2001 USA 1503 + +-- !select_26 -- +2002 Finland + +-- !select_27 -- +2002 Finland 2 + +-- !select_28 -- +2002 USA 3006 + +-- !select_29 -- +2002 Finland + +-- !select_30 -- +2001 Finland 1 + +-- !select_31 -- +2001 Finland + +-- !select_32 -- + +-- !select_33 -- +2001 USA 3 + +-- !select_34 -- +2000 India 1 + +-- !select_35 -- +2001 Finland + +-- !select_36 -- +2001 Finland 6 diff --git a/regression-test/suites/query_p0/sql_functions/window_functions/test_qualify_query.groovy b/regression-test/suites/query_p0/sql_functions/window_functions/test_qualify_query.groovy new file mode 100644 index 00000000000..04ed0cd7415 --- /dev/null +++ b/regression-test/suites/query_p0/sql_functions/window_functions/test_qualify_query.groovy @@ -0,0 +1,120 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +suite("test_qualify_query") { + sql "create database if not exists qualify_test" + sql "use qualify_test" + sql "DROP TABLE IF EXISTS sales" + sql """ + CREATE TABLE sales ( + year INT, + country STRING, + product STRING, + profit INT + ) + DISTRIBUTED BY HASH(`year`) BUCKETS 1 + PROPERTIES ( + "replication_num" = "1" + ) + """ + sql """ + INSERT INTO sales VALUES + (2000,'Finland','Computer',1501), + (2000,'Finland','Phone',100), + (2001,'Finland','Phone',10), + (2000,'India','Calculator',75), + (2000,'India','Calculator',76), + (2000,'India','Computer',1201), + (2000,'USA','Calculator',77), + (2000,'USA','Computer',1502), + (2001,'USA','Calculator',50), + (2001,'USA','Computer',1503), + (2001,'USA','Computer',1202), + (2001,'USA','TV',150), + (2001,'USA','TV',101); + """ + + qt_select_1 "select year + 1 as year, country from sales where year >= 2000 qualify row_number() over (order by year) > 1 order by year,country;" + + qt_select_4 "select year, country, profit, row_number() over (order by year) as rk from (select * from sales) a where year = 2000 qualify rk = 1;" + + qt_select_5 "select year, country, product, profit, row_number() over (partition by year, country order by profit desc) as rk from sales where year = 2000 qualify rk = 1 order by year, country, product, profit;" + + qt_select_6 "select year, country, profit, row_number() over (partition by year, country order by profit desc) as rk from (select * from sales) a where year >= 2000 having profit > 200 qualify rk = 1 order by year, country;" + + qt_select_7 "select year, country, profit from (select year, country, profit from (select year, country, profit, row_number() over (partition by year, country order by profit desc) as rk from (select * from sales) a where year >= 2000 having profit > 200) t where rk = 1) a where year >= 2000 qualify row_number() over (order by profit) = 1;" + + qt_select_8 "select year, country, profit from (select year, country, profit from (select * from sales) a where year >= 2000 having profit > 200 qualify row_number() over (partition by year, country order by profit desc) = 1) a qualify row_number() over (order by profit) = 1;" + + qt_select_9 "select * except(year) replace(profit+1 as profit), row_number() over (order by profit) as rk from sales where year >= 2000 qualify rk = 1;" + + qt_select_10 "select * except(year) replace(profit+1 as profit) from sales where year >= 2000 qualify row_number() over (order by year) > profit;" + + qt_select_12 "select year + 1, if(country = 'USA', 'usa' , country), case when profit < 200 then 200 else profit end as new_profit, row_number() over (partition by year, country order by profit desc) as rk from (select * from sales) a where year >= 2000 having profit > 200 qualify rk = 1 order by new_profit;" + + qt_select_13 "select year + 1, if(country = 'USA', 'usa' , country), case when profit < 200 then 200 else profit end as new_profit from (select * from sales) a where year >= 2000 having profit > 200 qualify row_number() over (partition by year, country order by profit desc) = 1 order by new_profit;" + + qt_select_14 "select * from sales where year >= 2000 qualify row_number() over (partition by year order by profit desc, country) = 1 order by country,profit;" + + qt_select_15 "select *,row_number() over (partition by year order by profit desc, country) as rk from sales where year >= 2000 qualify rk = 1 order by country,profit;" + + qt_select_16 "select * from sales where year >= 2000 qualify row_number() over (partition by year order by if(profit > 200, profit, profit+200) desc, country) = profit order by country;" + + qt_select_17 "select * from sales where year >= 2000 qualify row_number() over (partition by year order by case when profit > 200 then profit else profit+200 end desc, country) = profit order by country;" + + qt_select_18 "select distinct x.year, x.country, x.product from sales x left join sales y on x.year = y.year left join sales z on x.year = z.year where x.year >= 2000 qualify row_number() over (partition by x.year order by x.profit desc) = x.profit order by year;" + + qt_select_19 "select year, country, profit, row_number() over (order by profit) as rk1, row_number() over (order by country) as rk2 from (select * from sales) a where year >= 2000 qualify rk1 = 1 and rk2 > 2;" + + qt_select_20 "select year, country, profit, row_number() over (order by year) as rk from (select * from sales) a where year >= 2000 qualify rk + 1 > 1 * 100;" + + qt_select_21 "select year, country, profit, row_number() over (order by profit) as rk from (select * from sales) a where year >= 2000 qualify rk in (1,2,3);" + + qt_select_22 "select year, country, profit, row_number() over (order by profit) as rk from (select * from sales) a where year >= 2000 qualify rk = (select 1);" + + qt_select_23 "select year, country, profit, row_number() over (order by year) as rk from (select * from sales) a where year >= 2000 qualify rk = (select max(year) from sales);" + + qt_select_24 "select year+1, country, sum(profit) as total from sales where year >= 2000 and country = 'Finland' group by year,country having sum(profit) > 100 qualify row_number() over (order by year) = 1;" + + qt_select_25 "select year, country, profit from (select * from sales) a where year >= 2000 qualify row_number() over (partition by year, country order by profit desc) = 1 order by year, country, profit;" + + qt_select_26 "select year + 1, country from sales where year >= 2000 and country = 'Finland' group by year,country qualify row_number() over (order by year) > 1;" + + qt_select_27 "select year + 1, country, row_number() over (order by year) as rk from sales where year >= 2000 and country = 'Finland' group by year,country qualify rk > 1;" + + qt_select_28 "select year + 1, country, sum(profit) as total from sales where year >= 2000 group by year,country having sum(profit) > 1700 qualify row_number() over (order by year) = 1;" + + qt_select_29 "select distinct year + 1,country from sales qualify row_number() over (order by profit + 1) = 1;" + + qt_select_30 "select distinct year,country, row_number() over (order by profit + 1) as rk from sales qualify row_number() over (order by profit + 1) = 1;" + + qt_select_31 "select distinct year + 1 as year,country from sales where country = 'Finland' group by year, country qualify row_number() over (order by year) = 1;" + + qt_select_32 "select distinct year,country from sales having sum(profit) > 100 qualify row_number() over (order by year) > 100;" + + qt_select_33 "select distinct year,country,rank() over (order by year) from sales where country = 'USA' having sum(profit) > 100 qualify row_number() over (order by year) > 1;" + + qt_select_34 "select distinct year,country,rank() over (order by year) from sales where country = 'India' having sum(profit) > 100;" + + qt_select_35 "select year + 1, country from sales having profit >= 100 qualify row_number() over (order by profit) = 6;" + + qt_select_36 "select year + 1, country, row_number() over (order by profit) rk from sales having profit >= 100 qualify rk = 6;" +} + + + + + --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org