This is an automated email from the ASF dual-hosted git repository. huajianlan pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new 75451ab0ed [feature](nereids) Add stats derive framework for new optimizer (#11179) 75451ab0ed is described below commit 75451ab0ed933db4da300de72c458f6ce46b20bb Author: Kikyou1997 <33112463+kikyou1...@users.noreply.github.com> AuthorDate: Thu Jul 28 14:38:19 2022 +0800 [feature](nereids) Add stats derive framework for new optimizer (#11179) Implement a visitor to derive stats for each operator which would be used for CBO. --- .../org/apache/doris/nereids/NereidsPlanner.java | 9 + .../glue/translator/PhysicalPlanTranslator.java | 21 +- .../nereids/jobs/cascades/DeriveStatsJob.java | 5 +- .../java/org/apache/doris/nereids/memo/Group.java | 7 + .../apache/doris/nereids/memo/GroupExpression.java | 5 + .../nereids/stats/FilterSelectivityCalculator.java | 110 ++++++++++ .../apache/doris/nereids/stats/JoinEstimation.java | 160 ++++++++++++++ .../doris/nereids/stats/StatsCalculator.java | 235 ++++++++++++++++++++ .../nereids/trees/expressions/SlotReference.java | 1 - .../doris/nereids/trees/plans/AbstractPlan.java | 39 +--- .../doris/nereids/trees/plans/Aggregate.java | 33 +++ .../apache/doris/nereids/trees/plans/Filter.java | 27 +++ .../doris/nereids/trees/plans/GroupPlan.java | 28 --- .../apache/doris/nereids/trees/plans/JoinType.java | 12 + .../org/apache/doris/nereids/trees/plans/Plan.java | 5 +- .../apache/doris/nereids/trees/plans/Project.java | 29 +++ .../org/apache/doris/nereids/trees/plans/Scan.java | 38 ++++ .../trees/plans/logical/LogicalAggregate.java | 3 +- .../nereids/trees/plans/logical/LogicalFilter.java | 3 +- .../trees/plans/logical/LogicalOlapScan.java | 2 +- .../trees/plans/logical/LogicalProject.java | 3 +- .../trees/plans/logical/LogicalRelation.java | 3 +- .../trees/plans/physical/PhysicalAggregate.java | 3 +- .../trees/plans/physical/PhysicalFilter.java | 3 +- .../trees/plans/physical/PhysicalProject.java | 3 +- .../trees/plans/physical/PhysicalRelation.java | 3 +- .../nereids/trees/plans/visitor/PlanVisitor.java | 4 +- .../java/org/apache/doris/nereids/util/Utils.java | 44 ++++ .../java/org/apache/doris/qe/SessionVariable.java | 13 ++ .../apache/doris/statistics/BaseStatsDerive.java | 4 +- .../org/apache/doris/statistics/ColumnStats.java | 26 +++ .../org/apache/doris/statistics/Statistics.java | 5 + .../apache/doris/statistics/StatsDeriveResult.java | 68 +++++- .../org/apache/doris/statistics/TableStats.java | 11 +- .../nereids/jobs/cascades/DeriveStatsJobTest.java | 127 +++++++++++ .../apache/doris/nereids/plan/TestPlanOutput.java | 5 + .../doris/nereids/stats/StatsCalculatorTest.java | 244 +++++++++++++++++++++ 37 files changed, 1229 insertions(+), 112 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/NereidsPlanner.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/NereidsPlanner.java index c5427c943c..625ec84097 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/NereidsPlanner.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/NereidsPlanner.java @@ -29,6 +29,7 @@ import org.apache.doris.nereids.jobs.batch.DisassembleRulesJob; import org.apache.doris.nereids.jobs.batch.JoinReorderRulesJob; import org.apache.doris.nereids.jobs.batch.OptimizeRulesJob; import org.apache.doris.nereids.jobs.batch.PredicatePushDownRulesJob; +import org.apache.doris.nereids.jobs.cascades.DeriveStatsJob; import org.apache.doris.nereids.memo.Group; import org.apache.doris.nereids.memo.GroupExpression; import org.apache.doris.nereids.properties.PhysicalProperties; @@ -105,6 +106,10 @@ public class NereidsPlanner extends Planner { .setJobContext(outputProperties); rewrite(); + // TODO: remove this condition, when stats collector is fully developed. + if (ConnectContext.get().getSessionVariable().isEnableNereidsCBO()) { + deriveStats(); + } optimize(); // Get plan directly. Just for SSB. @@ -120,6 +125,10 @@ public class NereidsPlanner extends Planner { new DisassembleRulesJob(plannerContext).execute(); } + private void deriveStats() { + new DeriveStatsJob(getRoot().getLogicalExpression(), plannerContext.getCurrentJobContext()).execute(); + } + /** * Cascades style optimize: perform equivalent logical plan exploration and physical implementation enumeration, * try to find best plan under the guidance of statistic information and cost model. diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java index 47f9748263..b55ff0277e 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java @@ -51,6 +51,7 @@ import org.apache.doris.nereids.trees.plans.physical.PhysicalPlan; import org.apache.doris.nereids.trees.plans.physical.PhysicalProject; import org.apache.doris.nereids.trees.plans.visitor.DefaultPlanVisitor; import org.apache.doris.nereids.util.ExpressionUtils; +import org.apache.doris.nereids.util.Utils; import org.apache.doris.planner.AggregationNode; import org.apache.doris.planner.DataPartition; import org.apache.doris.planner.ExchangeNode; @@ -244,7 +245,7 @@ public class PhysicalPlanTranslator extends DefaultPlanVisitor<PlanFragment, Pla } catch (Exception e) { throw new AnalysisException(e.getMessage()); } - exec(olapScanNode::init); + Utils.execWithUncheckedException(olapScanNode::init); olapScanNode.addConjuncts(execConjunctsList); context.addScanNode(olapScanNode); // Create PlanFragment @@ -491,22 +492,4 @@ public class PhysicalPlanTranslator extends DefaultPlanVisitor<PlanFragment, Pla return fragment; } - /** - * Helper function to eliminate unnecessary checked exception caught requirement from the main logic of translator. - * - * @param f function which would invoke the logic of - * stale code from old optimizer that could throw - * a checked exception - */ - public void exec(FuncWrapper f) { - try { - f.exec(); - } catch (Exception e) { - throw new RuntimeException(e.getMessage(), e); - } - } - - private interface FuncWrapper { - void exec() throws Exception; - } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/cascades/DeriveStatsJob.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/cascades/DeriveStatsJob.java index 49ef3ff228..389c54c347 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/cascades/DeriveStatsJob.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/cascades/DeriveStatsJob.java @@ -22,6 +22,7 @@ import org.apache.doris.nereids.jobs.JobContext; import org.apache.doris.nereids.jobs.JobType; import org.apache.doris.nereids.memo.Group; import org.apache.doris.nereids.memo.GroupExpression; +import org.apache.doris.nereids.stats.StatsCalculator; /** * Job to derive stats for {@link GroupExpression} in {@link org.apache.doris.nereids.memo.Memo}. @@ -64,8 +65,8 @@ public class DeriveStatsJob extends Job { } } } else { - // TODO: derive stat here - groupExpression.setStatDerived(true); + StatsCalculator statsCalculator = new StatsCalculator(groupExpression); + statsCalculator.estimate(); } } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/memo/Group.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/memo/Group.java index 4a85acb0b2..8e093f0506 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/memo/Group.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/memo/Group.java @@ -69,6 +69,13 @@ public class Group { groupExpression.setOwnerGroup(this); } + /** + * For unit test only. + */ + public Group() { + groupId = null; + } + public GroupId getGroupId() { return groupId; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/memo/GroupExpression.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/memo/GroupExpression.java index 6a7a6c78ea..27dbb0257d 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/memo/GroupExpression.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/memo/GroupExpression.java @@ -22,6 +22,7 @@ import org.apache.doris.nereids.properties.PhysicalProperties; import org.apache.doris.nereids.rules.Rule; import org.apache.doris.nereids.rules.RuleType; import org.apache.doris.nereids.trees.plans.Plan; +import org.apache.doris.statistics.StatsDeriveResult; import com.google.common.base.Preconditions; import com.google.common.collect.Lists; @@ -174,4 +175,8 @@ public class GroupExpression { public int hashCode() { return Objects.hash(children, plan); } + + public StatsDeriveResult getCopyOfChildStats(int idx) { + return child(idx).getStatistics().copy(); + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterSelectivityCalculator.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterSelectivityCalculator.java new file mode 100644 index 0000000000..f4daf9f8b8 --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterSelectivityCalculator.java @@ -0,0 +1,110 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.nereids.stats; + +import org.apache.doris.nereids.trees.expressions.ComparisonPredicate; +import org.apache.doris.nereids.trees.expressions.CompoundPredicate; +import org.apache.doris.nereids.trees.expressions.EqualTo; +import org.apache.doris.nereids.trees.expressions.Expression; +import org.apache.doris.nereids.trees.expressions.GreaterThan; +import org.apache.doris.nereids.trees.expressions.GreaterThanEqual; +import org.apache.doris.nereids.trees.expressions.LessThan; +import org.apache.doris.nereids.trees.expressions.Literal; +import org.apache.doris.nereids.trees.expressions.Or; +import org.apache.doris.nereids.trees.expressions.Slot; +import org.apache.doris.nereids.trees.expressions.SlotReference; +import org.apache.doris.nereids.trees.expressions.visitor.DefaultExpressionVisitor; +import org.apache.doris.statistics.ColumnStats; + +import com.google.common.base.Preconditions; + +import java.util.Map; + +/** + * Calculate selectivity of the filter. + */ +public class FilterSelectivityCalculator extends DefaultExpressionVisitor<Double, Void> { + + private static double DEFAULT_SELECTIVITY = 0.1; + + private final Map<Slot, ColumnStats> slotRefToStats; + + public FilterSelectivityCalculator(Map<Slot, ColumnStats> slotRefToStats) { + Preconditions.checkState(slotRefToStats != null); + this.slotRefToStats = slotRefToStats; + } + + /** + * Do estimate. + */ + public double estimate(Expression expression) { + // For a comparison predicate, only when it's left side is a slot and right side is a literal, we would + // consider is a valid predicate. + if (expression instanceof ComparisonPredicate + && !(expression.child(0) instanceof SlotReference + && expression.child(1) instanceof Literal)) { + return 1.0; + } + return expression.accept(this, null); + } + + @Override + public Double visitCompoundPredicate(CompoundPredicate compoundPredicate, Void context) { + Expression leftExpr = compoundPredicate.child(0); + Expression rightExpr = compoundPredicate.child(1); + double leftSel = 1; + double rightSel = 1; + leftSel = estimate(leftExpr); + rightSel = estimate(rightExpr); + return compoundPredicate instanceof Or ? leftSel + rightSel - leftSel * rightSel : leftSel * rightSel; + } + + @Override + public Double visitComparisonPredicate(ComparisonPredicate cp, Void context) { + return super.visitComparisonPredicate(cp, context); + } + + // TODO: If right value greater than the max value or less than min value in column stats, return 0.0 . + @Override + public Double visitEqualTo(EqualTo equalTo, Void context) { + SlotReference left = (SlotReference) equalTo.left(); + ColumnStats columnStats = slotRefToStats.get(left); + if (columnStats == null) { + return DEFAULT_SELECTIVITY; + } + long ndv = columnStats.getNdv(); + return ndv < 0 ? DEFAULT_SELECTIVITY : ndv == 0 ? 0 : 1.0 / columnStats.getNdv(); + } + + // TODO: Should consider the distribution of data. + @Override + public Double visitGreaterThan(GreaterThan greaterThan, Void context) { + return DEFAULT_SELECTIVITY; + } + + @Override + public Double visitGreaterThanEqual(GreaterThanEqual greaterThanEqual, Void context) { + return DEFAULT_SELECTIVITY; + } + + @Override + public Double visitLessThan(LessThan lessThan, Void context) { + return DEFAULT_SELECTIVITY; + } + +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/JoinEstimation.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/JoinEstimation.java new file mode 100644 index 0000000000..c623956505 --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/JoinEstimation.java @@ -0,0 +1,160 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.nereids.stats; + +import org.apache.doris.common.CheckedMath; +import org.apache.doris.nereids.trees.expressions.Expression; +import org.apache.doris.nereids.trees.expressions.Slot; +import org.apache.doris.nereids.trees.expressions.SlotReference; +import org.apache.doris.nereids.trees.plans.JoinType; +import org.apache.doris.nereids.util.ExpressionUtils; +import org.apache.doris.statistics.ColumnStats; +import org.apache.doris.statistics.StatsDeriveResult; + +import com.google.common.base.Preconditions; + +import java.util.List; +import java.util.Map; + +/** + * Estimate hash join stats. + * TODO: Update other props in the ColumnStats properly. + */ +public class JoinEstimation { + + /** + * Do estimate. + */ + public static StatsDeriveResult estimate(StatsDeriveResult leftStats, StatsDeriveResult rightStats, + Expression eqCondition, JoinType joinType) { + StatsDeriveResult statsDeriveResult = new StatsDeriveResult(leftStats); + statsDeriveResult.merge(rightStats); + List<Expression> eqConjunctList = ExpressionUtils.extractConjunctive(eqCondition); + long rowCount = -1; + if (joinType.isSemiOrAntiJoin()) { + rowCount = getSemiJoinRowCount(leftStats, rightStats, eqConjunctList, joinType); + } else if (joinType.isInnerJoin() || joinType.isOuterJoin()) { + rowCount = getJoinRowCount(leftStats, rightStats, eqConjunctList, joinType); + } else if (joinType.isCrossJoin()) { + rowCount = CheckedMath.checkedMultiply(leftStats.getRowCount(), + rightStats.getRowCount()); + } else { + throw new RuntimeException("joinType is not supported"); + } + statsDeriveResult.setRowCount(rowCount); + return statsDeriveResult; + } + + // TODO: If the condition of Join Plan could any expression in addition to EqualTo type, + // we should handle that properly. + private static long getSemiJoinRowCount(StatsDeriveResult leftStats, StatsDeriveResult rightStats, + List<Expression> eqConjunctList, JoinType joinType) { + long rowCount; + if (JoinType.RIGHT_SEMI_JOIN.equals(joinType) || JoinType.RIGHT_ANTI_JOIN.equals(joinType)) { + if (rightStats.getRowCount() == -1) { + return -1; + } + rowCount = rightStats.getRowCount(); + } else { + if (leftStats.getRowCount() == -1) { + return -1; + } + rowCount = leftStats.getRowCount(); + } + Map<Slot, ColumnStats> leftSlotToColStats = leftStats.getSlotToColumnStats(); + Map<Slot, ColumnStats> rightSlotToColStats = rightStats.getSlotToColumnStats(); + double minSelectivity = 1.0; + for (Expression eqJoinPredicate : eqConjunctList) { + long lhsNdv = leftSlotToColStats.get(eqJoinPredicate.child(0)).getNdv(); + lhsNdv = Math.min(lhsNdv, leftStats.getRowCount()); + long rhsNdv = rightSlotToColStats.get(eqJoinPredicate.child(1)).getNdv(); + rhsNdv = Math.min(rhsNdv, rightStats.getRowCount()); + // Skip conjuncts with unknown NDV on either side. + if (lhsNdv == -1 || rhsNdv == -1) { + continue; + } + // TODO: Do we need NULL_AWARE_LEFT_ANTI_JOIN type as stale optimizer? + double selectivity = 1.0; + switch (joinType) { + case LEFT_SEMI_JOIN: { + selectivity = (double) Math.min(lhsNdv, rhsNdv) / (double) (lhsNdv); + break; + } + case RIGHT_SEMI_JOIN: { + selectivity = (double) Math.min(lhsNdv, rhsNdv) / (double) (rhsNdv); + break; + } + case LEFT_ANTI_JOIN: + selectivity = (double) (lhsNdv > rhsNdv ? (lhsNdv - rhsNdv) : lhsNdv) / (double) lhsNdv; + break; + case RIGHT_ANTI_JOIN: { + selectivity = (double) (rhsNdv > lhsNdv ? (rhsNdv - lhsNdv) : rhsNdv) / (double) rhsNdv; + break; + } + default: + throw new RuntimeException("joinType is not supported"); + } + minSelectivity = Math.min(minSelectivity, selectivity); + } + Preconditions.checkState(rowCount != -1); + return Math.round(rowCount * minSelectivity); + } + + private static long getJoinRowCount(StatsDeriveResult leftStats, StatsDeriveResult rightStats, + List<Expression> eqConjunctList, JoinType joinType) { + long lhsCard = leftStats.getRowCount(); + long rhsCard = rightStats.getRowCount(); + Map<Slot, ColumnStats> leftSlotToColumnStats = leftStats.getSlotToColumnStats(); + Map<Slot, ColumnStats> rightSlotToColumnStats = rightStats.getSlotToColumnStats(); + if (lhsCard == -1 || rhsCard == -1) { + return lhsCard; + } + long result = -1; + for (Expression eqJoinConjunct : eqConjunctList) { + Expression left = eqJoinConjunct.child(0); + if (!(left instanceof SlotReference)) { + continue; + } + Expression right = eqJoinConjunct.child(1); + if (!(right instanceof SlotReference)) { + continue; + } + SlotReference leftSlot = (SlotReference) left; + ColumnStats leftColStats = leftSlotToColumnStats.get(leftSlot); + if (leftColStats == null) { + continue; + } + SlotReference rightSlot = (SlotReference) right; + ColumnStats rightColStats = rightSlotToColumnStats.get(rightSlot); + if (rightColStats == null) { + continue; + } + double leftSideNdv = leftColStats.getNdv(); + double rightSideNdv = rightColStats.getNdv(); + long tmpNdv = (long) Math.max(1, Math.max(leftSideNdv, rightSideNdv)); + long joinCard = tmpNdv == rhsCard ? lhsCard : CheckedMath.checkedMultiply( + Math.round((lhsCard / Math.max(1, Math.max(leftSideNdv, rightSideNdv)))), rhsCard); + if (result == -1) { + result = joinCard; + } else { + result = Math.min(result, joinCard); + } + } + return result; + } +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java new file mode 100644 index 0000000000..8552e0e01c --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java @@ -0,0 +1,235 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.nereids.stats; + +import org.apache.doris.catalog.Table; +import org.apache.doris.nereids.memo.GroupExpression; +import org.apache.doris.nereids.trees.expressions.Expression; +import org.apache.doris.nereids.trees.expressions.NamedExpression; +import org.apache.doris.nereids.trees.expressions.Slot; +import org.apache.doris.nereids.trees.expressions.SlotReference; +import org.apache.doris.nereids.trees.plans.Aggregate; +import org.apache.doris.nereids.trees.plans.Filter; +import org.apache.doris.nereids.trees.plans.Plan; +import org.apache.doris.nereids.trees.plans.Project; +import org.apache.doris.nereids.trees.plans.Scan; +import org.apache.doris.nereids.trees.plans.logical.LogicalAggregate; +import org.apache.doris.nereids.trees.plans.logical.LogicalFilter; +import org.apache.doris.nereids.trees.plans.logical.LogicalJoin; +import org.apache.doris.nereids.trees.plans.logical.LogicalOlapScan; +import org.apache.doris.nereids.trees.plans.logical.LogicalProject; +import org.apache.doris.nereids.trees.plans.logical.LogicalSort; +import org.apache.doris.nereids.trees.plans.physical.PhysicalAggregate; +import org.apache.doris.nereids.trees.plans.physical.PhysicalDistribution; +import org.apache.doris.nereids.trees.plans.physical.PhysicalFilter; +import org.apache.doris.nereids.trees.plans.physical.PhysicalHashJoin; +import org.apache.doris.nereids.trees.plans.physical.PhysicalHeapSort; +import org.apache.doris.nereids.trees.plans.physical.PhysicalOlapScan; +import org.apache.doris.nereids.trees.plans.physical.PhysicalProject; +import org.apache.doris.nereids.trees.plans.visitor.DefaultPlanVisitor; +import org.apache.doris.nereids.util.Utils; +import org.apache.doris.qe.ConnectContext; +import org.apache.doris.statistics.ColumnStats; +import org.apache.doris.statistics.StatsDeriveResult; +import org.apache.doris.statistics.TableStats; + +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.stream.Collectors; + +/** + * Used to calculate the stats for each operator + */ +public class StatsCalculator extends DefaultPlanVisitor<StatsDeriveResult, Void> { + + private final GroupExpression groupExpression; + + public StatsCalculator(GroupExpression groupExpression) { + this.groupExpression = groupExpression; + } + + /** + * Do estimate. + */ + public void estimate() { + StatsDeriveResult stats = groupExpression.getPlan().accept(this, null); + groupExpression.getOwnerGroup().setStatistics(stats); + Plan plan = groupExpression.getPlan(); + long limit = plan.getLimit(); + if (limit != -1) { + stats.setRowCount(Math.min(limit, stats.getRowCount())); + } + groupExpression.setStatDerived(true); + } + + @Override + public StatsDeriveResult visitLogicalAggregate(LogicalAggregate<Plan> aggregate, Void context) { + return computeAggregate(aggregate); + } + + @Override + public StatsDeriveResult visitLogicalFilter(LogicalFilter<Plan> filter, Void context) { + return computeFilter(filter); + } + + @Override + public StatsDeriveResult visitLogicalOlapScan(LogicalOlapScan olapScan, Void context) { + olapScan.getExpressions(); + return computeScan(olapScan); + } + + @Override + public StatsDeriveResult visitLogicalProject(LogicalProject<Plan> project, Void context) { + return computeProject(project); + } + + @Override + public StatsDeriveResult visitLogicalSort(LogicalSort<Plan> sort, Void context) { + return groupExpression.getCopyOfChildStats(0); + } + + @Override + public StatsDeriveResult visitLogicalJoin(LogicalJoin<Plan, Plan> join, Void context) { + return JoinEstimation.estimate(groupExpression.getCopyOfChildStats(0), + groupExpression.getCopyOfChildStats(1), + join.getCondition().get(), join.getJoinType()); + } + + @Override + public StatsDeriveResult visitPhysicalAggregate(PhysicalAggregate<Plan> agg, Void context) { + return computeAggregate(agg); + } + + @Override + public StatsDeriveResult visitPhysicalOlapScan(PhysicalOlapScan olapScan, Void context) { + return computeScan(olapScan); + } + + @Override + public StatsDeriveResult visitPhysicalHeapSort(PhysicalHeapSort<Plan> sort, Void context) { + return groupExpression.getCopyOfChildStats(0); + } + + @Override + public StatsDeriveResult visitPhysicalHashJoin(PhysicalHashJoin<Plan, Plan> hashJoin, Void context) { + return JoinEstimation.estimate(groupExpression.getCopyOfChildStats(0), + groupExpression.getCopyOfChildStats(1), + hashJoin.getCondition().get(), hashJoin.getJoinType()); + } + + // TODO: We should subtract those pruned column, and consider the expression transformations in the node. + @Override + public StatsDeriveResult visitPhysicalProject(PhysicalProject<Plan> project, Void context) { + return computeProject(project); + } + + @Override + public StatsDeriveResult visitPhysicalFilter(PhysicalFilter<Plan> filter, Void context) { + return computeFilter(filter); + } + + @Override + public StatsDeriveResult visitPhysicalDistribution(PhysicalDistribution<Plan> distribution, + Void context) { + return groupExpression.getCopyOfChildStats(0); + } + + private StatsDeriveResult computeFilter(Filter filter) { + StatsDeriveResult stats = groupExpression.getCopyOfChildStats(0); + FilterSelectivityCalculator selectivityCalculator = + new FilterSelectivityCalculator(stats.getSlotToColumnStats()); + double selectivity = selectivityCalculator.estimate(filter.getPredicates()); + stats.multiplyDouble(selectivity); + return stats; + } + + // TODO: 1. Subtract the pruned partition + // 2. Consider the influence of runtime filter + // 3. Get NDV and column data size from StatisticManger, StatisticManager doesn't support it now. + private StatsDeriveResult computeScan(Scan scan) { + Table table = scan.getTable(); + TableStats tableStats = Utils.execWithReturnVal(() -> + ConnectContext.get().getEnv().getStatisticsManager().getStatistics().getTableStats(table.getId()) + ); + Map<Slot, ColumnStats> slotToColumnStats = new HashMap<>(); + Set<SlotReference> slotSet = scan.getOutput().stream().filter(SlotReference.class::isInstance) + .map(s -> (SlotReference) s).collect(Collectors.toSet()); + for (SlotReference slotReference : slotSet) { + String colName = slotReference.getName(); + if (colName == null) { + throw new RuntimeException("Column name of SlotReference shouldn't be null here"); + } + ColumnStats columnStats = tableStats.getColumnStats(colName); + slotToColumnStats.put(slotReference, columnStats); + } + long rowCount = tableStats.getRowCount(); + StatsDeriveResult stats = new StatsDeriveResult(rowCount, + new HashMap<>(), new HashMap<>()); + stats.setSlotToColumnStats(slotToColumnStats); + return stats; + } + + private StatsDeriveResult computeAggregate(Aggregate aggregate) { + List<Expression> groupByExprList = aggregate.getGroupByExpressions(); + StatsDeriveResult childStats = groupExpression.getCopyOfChildStats(0); + Map<Slot, ColumnStats> childSlotColumnStatsMap = childStats.getSlotToColumnStats(); + long resultSetCount = 1; + for (Expression expression : groupByExprList) { + List<SlotReference> slotRefList = expression.collect(SlotReference.class::isInstance); + // TODO: Support more complex group expr. + // For example: + // select max(col1+col3) from t1 group by col1+col3; + if (slotRefList.size() != 1) { + continue; + } + SlotReference slotRef = slotRefList.get(0); + ColumnStats columnStats = childSlotColumnStatsMap.get(slotRef); + resultSetCount *= columnStats.getNdv(); + } + Map<Slot, ColumnStats> slotColumnStatsMap = new HashMap<>(); + List<NamedExpression> namedExpressionList = aggregate.getOutputExpressions(); + // TODO: 1. Estimate the output unit size by the type of corresponding AggregateFunction + // 2. Handle alias, literal in the output expression list + for (NamedExpression namedExpression : namedExpressionList) { + if (namedExpression instanceof SlotReference) { + slotColumnStatsMap.put((SlotReference) namedExpression, new ColumnStats()); + } + } + StatsDeriveResult statsDeriveResult = new StatsDeriveResult(resultSetCount, slotColumnStatsMap); + // TODO: Update ColumnStats properly, add new mapping from output slot to ColumnStats + return statsDeriveResult; + } + + // TODO: Update data size and min/max value. + private StatsDeriveResult computeProject(Project project) { + List<NamedExpression> namedExpressionList = project.getProjects(); + Set<Slot> slotSet = new HashSet<>(); + for (NamedExpression namedExpression : namedExpressionList) { + List<SlotReference> slotReferenceList = namedExpression.collect(SlotReference.class::isInstance); + slotSet.addAll(slotReferenceList); + } + StatsDeriveResult stat = groupExpression.getCopyOfChildStats(0); + Map<Slot, ColumnStats> slotColumnStatsMap = stat.getSlotToColumnStats(); + slotColumnStatsMap.entrySet().removeIf(entry -> !slotSet.contains(entry.getKey())); + return stat; + } + +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/SlotReference.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/SlotReference.java index 12cf3acca8..4aa225c4ec 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/SlotReference.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/SlotReference.java @@ -110,7 +110,6 @@ public class SlotReference extends Slot { return nullable == that.nullable && dataType.equals(that.dataType) && exprId.equals(that.exprId) - && dataType.equals(that.dataType) && name.equals(that.name) && qualifier.equals(that.qualifier); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/AbstractPlan.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/AbstractPlan.java index cefbf5f941..431fb9c281 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/AbstractPlan.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/AbstractPlan.java @@ -20,14 +20,11 @@ package org.apache.doris.nereids.trees.plans; import org.apache.doris.nereids.memo.GroupExpression; import org.apache.doris.nereids.properties.LogicalProperties; import org.apache.doris.nereids.trees.AbstractTreeNode; -import org.apache.doris.statistics.ExprStats; -import org.apache.doris.statistics.StatisticalType; import org.apache.doris.statistics.StatsDeriveResult; import org.apache.commons.lang3.StringUtils; import java.util.ArrayList; -import java.util.Collections; import java.util.List; import java.util.Objects; import java.util.Optional; @@ -38,7 +35,7 @@ import java.util.Optional; public abstract class AbstractPlan extends AbstractTreeNode<Plan> implements Plan { protected StatsDeriveResult statsDeriveResult; - protected long limit; + protected long limit = -1; protected final PlanType type; protected final LogicalProperties logicalProperties; @@ -101,36 +98,6 @@ public abstract class AbstractPlan extends AbstractTreeNode<Plan> implements Pla } } - @Override - public List<StatsDeriveResult> getChildrenStats() { - return Collections.emptyList(); - } - - @Override - public StatsDeriveResult getStatsDeriveResult() { - return statsDeriveResult; - } - - @Override - public StatisticalType getStatisticalType() { - return null; - } - - @Override - public void setStatsDeriveResult(StatsDeriveResult result) { - this.statsDeriveResult = result; - } - - @Override - public long getLimit() { - return limit; - } - - @Override - public List<? extends ExprStats> getConjuncts() { - return Collections.emptyList(); - } - @Override public boolean equals(Object o) { if (this == o) { @@ -149,4 +116,8 @@ public abstract class AbstractPlan extends AbstractTreeNode<Plan> implements Pla public int hashCode() { return Objects.hash(statsDeriveResult, limit, logicalProperties); } + + public long getLimit() { + return limit; + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/Aggregate.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/Aggregate.java new file mode 100644 index 0000000000..4773403ad5 --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/Aggregate.java @@ -0,0 +1,33 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.nereids.trees.plans; + +import org.apache.doris.nereids.trees.expressions.Expression; +import org.apache.doris.nereids.trees.expressions.NamedExpression; + +import java.util.List; + +/** + * Common interface for logical/physical Aggregate. + */ +public interface Aggregate { + + List<Expression> getGroupByExpressions(); + + List<NamedExpression> getOutputExpressions(); +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/Filter.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/Filter.java new file mode 100644 index 0000000000..1dd37fdd06 --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/Filter.java @@ -0,0 +1,27 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.nereids.trees.plans; + +import org.apache.doris.nereids.trees.expressions.Expression; + +/** + * Common interface for logical/physical filter. + */ +public interface Filter { + Expression getPredicates(); +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/GroupPlan.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/GroupPlan.java index a27a819d1d..3597d97d02 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/GroupPlan.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/GroupPlan.java @@ -24,9 +24,6 @@ import org.apache.doris.nereids.trees.expressions.Expression; import org.apache.doris.nereids.trees.expressions.Slot; import org.apache.doris.nereids.trees.plans.logical.LogicalLeaf; import org.apache.doris.nereids.trees.plans.visitor.PlanVisitor; -import org.apache.doris.statistics.ExprStats; -import org.apache.doris.statistics.StatisticalType; -import org.apache.doris.statistics.StatsDeriveResult; import com.google.common.collect.ImmutableList; @@ -70,36 +67,11 @@ public class GroupPlan extends LogicalLeaf { throw new IllegalStateException("GroupPlan can not invoke withChildren()"); } - @Override - public List<StatsDeriveResult> getChildrenStats() { - throw new IllegalStateException("GroupPlan can not invoke getChildrenStats()"); - } - - @Override - public StatsDeriveResult getStatsDeriveResult() { - throw new IllegalStateException("GroupPlan can not invoke getStatsDeriveResult()"); - } - - @Override - public StatisticalType getStatisticalType() { - throw new IllegalStateException("GroupPlan can not invoke getStatisticalType()"); - } - - @Override - public void setStatsDeriveResult(StatsDeriveResult result) { - throw new IllegalStateException("GroupPlan can not invoke setStatsDeriveResult()"); - } - @Override public long getLimit() { throw new IllegalStateException("GroupPlan can not invoke getLimit()"); } - @Override - public List<? extends ExprStats> getConjuncts() { - throw new IllegalStateException("GroupPlan can not invoke getConjuncts()"); - } - @Override public Plan withGroupExpression(Optional<GroupExpression> groupExpression) { throw new IllegalStateException("GroupPlan can not invoke withGroupExpression()"); diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/JoinType.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/JoinType.java index 030482992a..f53edae272 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/JoinType.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/JoinType.java @@ -99,4 +99,16 @@ public enum JoinType { public JoinType swap() { return joinSwapMap.get(this); } + + public boolean isSemiOrAntiJoin() { + return this == LEFT_SEMI_JOIN || this == RIGHT_SEMI_JOIN || this == LEFT_ANTI_JOIN || this == RIGHT_ANTI_JOIN; + } + + public boolean isInnerJoin() { + return this == INNER_JOIN; + } + + public boolean isOuterJoin() { + return this == LEFT_OUTER_JOIN || this == RIGHT_OUTER_JOIN || this == FULL_OUTER_JOIN; + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/Plan.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/Plan.java index 029e0f0c0e..fd2e72c02d 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/Plan.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/Plan.java @@ -23,7 +23,6 @@ import org.apache.doris.nereids.trees.TreeNode; import org.apache.doris.nereids.trees.expressions.Expression; import org.apache.doris.nereids.trees.expressions.Slot; import org.apache.doris.nereids.trees.plans.visitor.PlanVisitor; -import org.apache.doris.statistics.PlanStats; import java.util.List; import java.util.Optional; @@ -31,7 +30,7 @@ import java.util.Optional; /** * Abstract class for all plan node. */ -public interface Plan extends TreeNode<Plan>, PlanStats { +public interface Plan extends TreeNode<Plan> { PlanType getType(); @@ -62,4 +61,6 @@ public interface Plan extends TreeNode<Plan>, PlanStats { Plan withGroupExpression(Optional<GroupExpression> groupExpression); Plan withLogicalProperties(Optional<LogicalProperties> logicalProperties); + + long getLimit(); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/Project.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/Project.java new file mode 100644 index 0000000000..78c3864a00 --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/Project.java @@ -0,0 +1,29 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.nereids.trees.plans; + +import org.apache.doris.nereids.trees.expressions.NamedExpression; + +import java.util.List; + +/** + * Common interface for logical/physical project. + */ +public interface Project { + List<NamedExpression> getProjects(); +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/Scan.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/Scan.java new file mode 100644 index 0000000000..b5bcbca786 --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/Scan.java @@ -0,0 +1,38 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.nereids.trees.plans; + +import org.apache.doris.catalog.Table; +import org.apache.doris.nereids.trees.expressions.Expression; +import org.apache.doris.nereids.trees.expressions.Slot; + +import java.util.Collections; +import java.util.List; + +/** + * Common interface for logical/physical scan. + */ +public interface Scan { + List<Expression> getExpressions(); + + Table getTable(); + + default List<Slot> getOutput() { + return Collections.emptyList(); + } +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalAggregate.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalAggregate.java index 0564fd3c19..d60850d044 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalAggregate.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalAggregate.java @@ -23,6 +23,7 @@ import org.apache.doris.nereids.trees.expressions.Expression; import org.apache.doris.nereids.trees.expressions.NamedExpression; import org.apache.doris.nereids.trees.expressions.Slot; import org.apache.doris.nereids.trees.plans.AggPhase; +import org.apache.doris.nereids.trees.plans.Aggregate; import org.apache.doris.nereids.trees.plans.Plan; import org.apache.doris.nereids.trees.plans.PlanType; import org.apache.doris.nereids.trees.plans.visitor.PlanVisitor; @@ -48,7 +49,7 @@ import java.util.Optional; * Note: In general, the output of agg is a subset of the group by column plus aggregate column. * In special cases. this relationship does not hold. for example, select k1+1, sum(v1) from table group by k1. */ -public class LogicalAggregate<CHILD_TYPE extends Plan> extends LogicalUnary<CHILD_TYPE> { +public class LogicalAggregate<CHILD_TYPE extends Plan> extends LogicalUnary<CHILD_TYPE> implements Aggregate { private final boolean disassembled; private final List<Expression> groupByExpressions; diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalFilter.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalFilter.java index 27d532d998..af58cc2f7c 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalFilter.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalFilter.java @@ -21,6 +21,7 @@ import org.apache.doris.nereids.memo.GroupExpression; import org.apache.doris.nereids.properties.LogicalProperties; import org.apache.doris.nereids.trees.expressions.Expression; import org.apache.doris.nereids.trees.expressions.Slot; +import org.apache.doris.nereids.trees.plans.Filter; import org.apache.doris.nereids.trees.plans.Plan; import org.apache.doris.nereids.trees.plans.PlanType; import org.apache.doris.nereids.trees.plans.visitor.PlanVisitor; @@ -35,7 +36,7 @@ import java.util.Optional; /** * Logical filter plan. */ -public class LogicalFilter<CHILD_TYPE extends Plan> extends LogicalUnary<CHILD_TYPE> { +public class LogicalFilter<CHILD_TYPE extends Plan> extends LogicalUnary<CHILD_TYPE> implements Filter { private final Expression predicates; diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalOlapScan.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalOlapScan.java index 0d3738b025..94c0249f02 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalOlapScan.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalOlapScan.java @@ -75,7 +75,7 @@ public class LogicalOlapScan extends LogicalRelation { } @Override - public Plan withLogicalProperties(Optional<LogicalProperties> logicalProperties) { + public LogicalOlapScan withLogicalProperties(Optional<LogicalProperties> logicalProperties) { return new LogicalOlapScan(table, qualifier, Optional.empty(), logicalProperties); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalProject.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalProject.java index 6d9e75bffc..6e2df85cb3 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalProject.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalProject.java @@ -24,6 +24,7 @@ import org.apache.doris.nereids.trees.expressions.NamedExpression; import org.apache.doris.nereids.trees.expressions.Slot; import org.apache.doris.nereids.trees.plans.Plan; import org.apache.doris.nereids.trees.plans.PlanType; +import org.apache.doris.nereids.trees.plans.Project; import org.apache.doris.nereids.trees.plans.visitor.PlanVisitor; import com.google.common.base.Preconditions; @@ -37,7 +38,7 @@ import java.util.Optional; /** * Logical project plan. */ -public class LogicalProject<CHILD_TYPE extends Plan> extends LogicalUnary<CHILD_TYPE> { +public class LogicalProject<CHILD_TYPE extends Plan> extends LogicalUnary<CHILD_TYPE> implements Project { private final List<NamedExpression> projects; diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalRelation.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalRelation.java index 31020755be..daa28f6841 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalRelation.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalRelation.java @@ -24,6 +24,7 @@ import org.apache.doris.nereids.trees.expressions.Expression; import org.apache.doris.nereids.trees.expressions.Slot; import org.apache.doris.nereids.trees.expressions.SlotReference; import org.apache.doris.nereids.trees.plans.PlanType; +import org.apache.doris.nereids.trees.plans.Scan; import org.apache.doris.nereids.trees.plans.visitor.PlanVisitor; import org.apache.doris.nereids.util.Utils; @@ -36,7 +37,7 @@ import java.util.Optional; /** * Logical relation plan. */ -public abstract class LogicalRelation extends LogicalLeaf { +public abstract class LogicalRelation extends LogicalLeaf implements Scan { protected final Table table; protected final List<String> qualifier; diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalAggregate.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalAggregate.java index 9b1fff74b3..2dfc493bd3 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalAggregate.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalAggregate.java @@ -22,6 +22,7 @@ import org.apache.doris.nereids.properties.LogicalProperties; import org.apache.doris.nereids.trees.expressions.Expression; import org.apache.doris.nereids.trees.expressions.NamedExpression; import org.apache.doris.nereids.trees.plans.AggPhase; +import org.apache.doris.nereids.trees.plans.Aggregate; import org.apache.doris.nereids.trees.plans.Plan; import org.apache.doris.nereids.trees.plans.PlanType; import org.apache.doris.nereids.trees.plans.visitor.PlanVisitor; @@ -37,7 +38,7 @@ import java.util.Optional; /** * Physical aggregation plan. */ -public class PhysicalAggregate<CHILD_TYPE extends Plan> extends PhysicalUnary<CHILD_TYPE> { +public class PhysicalAggregate<CHILD_TYPE extends Plan> extends PhysicalUnary<CHILD_TYPE> implements Aggregate { private final List<Expression> groupByExpressions; diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalFilter.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalFilter.java index 0a01c689cc..d7a60a8426 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalFilter.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalFilter.java @@ -20,6 +20,7 @@ package org.apache.doris.nereids.trees.plans.physical; import org.apache.doris.nereids.memo.GroupExpression; import org.apache.doris.nereids.properties.LogicalProperties; import org.apache.doris.nereids.trees.expressions.Expression; +import org.apache.doris.nereids.trees.plans.Filter; import org.apache.doris.nereids.trees.plans.Plan; import org.apache.doris.nereids.trees.plans.PlanType; import org.apache.doris.nereids.trees.plans.visitor.PlanVisitor; @@ -34,7 +35,7 @@ import java.util.Optional; /** * Physical filter plan. */ -public class PhysicalFilter<CHILD_TYPE extends Plan> extends PhysicalUnary<CHILD_TYPE> { +public class PhysicalFilter<CHILD_TYPE extends Plan> extends PhysicalUnary<CHILD_TYPE> implements Filter { private final Expression predicates; diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalProject.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalProject.java index b99be40bed..7f4c2151f6 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalProject.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalProject.java @@ -23,6 +23,7 @@ import org.apache.doris.nereids.trees.expressions.Expression; import org.apache.doris.nereids.trees.expressions.NamedExpression; import org.apache.doris.nereids.trees.plans.Plan; import org.apache.doris.nereids.trees.plans.PlanType; +import org.apache.doris.nereids.trees.plans.Project; import org.apache.doris.nereids.trees.plans.visitor.PlanVisitor; import com.google.common.base.Preconditions; @@ -35,7 +36,7 @@ import java.util.Optional; /** * Physical project plan. */ -public class PhysicalProject<CHILD_TYPE extends Plan> extends PhysicalUnary<CHILD_TYPE> { +public class PhysicalProject<CHILD_TYPE extends Plan> extends PhysicalUnary<CHILD_TYPE> implements Project { private final List<NamedExpression> projects; diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalRelation.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalRelation.java index cce3e3a5e9..485e5d926c 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalRelation.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalRelation.java @@ -21,6 +21,7 @@ import org.apache.doris.nereids.memo.GroupExpression; import org.apache.doris.nereids.properties.LogicalProperties; import org.apache.doris.nereids.trees.expressions.Expression; import org.apache.doris.nereids.trees.plans.PlanType; +import org.apache.doris.nereids.trees.plans.Scan; import org.apache.doris.nereids.trees.plans.visitor.PlanVisitor; import com.google.common.collect.ImmutableList; @@ -32,7 +33,7 @@ import java.util.Optional; /** * Abstract class for all physical scan plan. */ -public abstract class PhysicalRelation extends PhysicalLeaf { +public abstract class PhysicalRelation extends PhysicalLeaf implements Scan { protected final List<String> qualifier; diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/visitor/PlanVisitor.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/visitor/PlanVisitor.java index cc5dc7d199..7ad5f43bee 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/visitor/PlanVisitor.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/visitor/PlanVisitor.java @@ -59,8 +59,8 @@ public abstract class PlanVisitor<R, C> { } - public R visitLogicalAggregate(LogicalAggregate<Plan> relation, C context) { - return visit(relation, context); + public R visitLogicalAggregate(LogicalAggregate<Plan> aggregate, C context) { + return visit(aggregate, context); } public R visitLogicalFilter(LogicalFilter<Plan> filter, C context) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/util/Utils.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/util/Utils.java index b1ef6e56ca..2500283e44 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/util/Utils.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/util/Utils.java @@ -38,6 +38,50 @@ public class Utils { ? part : part.replace("`", "``"); } + /** + * Helper function to eliminate unnecessary checked exception caught requirement from the main logic of translator. + * + * @param f function which would invoke the logic of + * stale code from old optimizer that could throw + * a checked exception + */ + public static void execWithUncheckedException(FuncWrapper f) { + try { + f.exec(); + } catch (Exception e) { + throw new RuntimeException(e.getMessage(), e); + } + } + + /** + * Helper function to eliminate unnecessary checked exception caught requirement from the main logic of translator. + * + */ + @SuppressWarnings("unchecked") + public static <R> R execWithReturnVal(Supplier<R> f) { + final Object[] ans = new Object[]{null}; + try { + ans[0] = f.get(); + } catch (Exception e) { + throw new RuntimeException(e.getMessage(), e); + } + return (R) ans[0]; + } + + /** + * Wrapper to a function without return value. + */ + public interface FuncWrapper { + void exec() throws Exception; + } + + /** + * Wrapper to a funciton with return value. + */ + public interface Supplier<R> { + R get() throws Exception; + } + /** * Fully qualified identifier name parts, i.e., concat qualifier and name into a list. */ diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java index 5301d3ee2f..bccfed14b9 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java @@ -92,6 +92,8 @@ public class SessionVariable implements Serializable, Writable { public static final String ENABLE_COST_BASED_JOIN_REORDER = "enable_cost_based_join_reorder"; + public static final String ENABLE_NEREIDS_CBO = "enable_nereids_cbo"; + public static final int MIN_EXEC_INSTANCE_NUM = 1; public static final int MAX_EXEC_INSTANCE_NUM = 32; // if set to true, some of stmt will be forwarded to master FE to get result @@ -431,6 +433,9 @@ public class SessionVariable implements Serializable, Writable { @VariableMgr.VarAttr(name = ENABLE_COST_BASED_JOIN_REORDER) private boolean enableJoinReorderBasedCost = false; + @VariableMgr.VarAttr(name = ENABLE_NEREIDS_CBO) + private boolean enableNereidsCBO = false; + @VariableMgr.VarAttr(name = ENABLE_FOLD_CONSTANT_BY_BE) private boolean enableFoldConstantByBe = false; @@ -979,6 +984,14 @@ public class SessionVariable implements Serializable, Writable { this.enableJoinReorderBasedCost = enableJoinReorderBasedCost; } + public boolean isEnableNereidsCBO() { + return enableNereidsCBO; + } + + public void setEnableNereidsCBO(boolean enableNereidsCBO) { + this.enableNereidsCBO = enableNereidsCBO; + } + public void setDisableJoinReorder(boolean disableJoinReorder) { this.disableJoinReorder = disableJoinReorder; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseStatsDerive.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseStatsDerive.java index a26ae7b0b0..85d3859b7a 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseStatsDerive.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseStatsDerive.java @@ -155,7 +155,7 @@ public class BaseStatsDerive { protected HashMap<Id, Float> deriveColumnToDataSize() { HashMap<Id, Float> columnToDataSize = new HashMap<>(); for (StatsDeriveResult child : childrenStatsResult) { - columnToDataSize.putAll(child.getColumnToDataSize()); + columnToDataSize.putAll(child.getColumnIdToDataSize()); } return columnToDataSize; } @@ -163,7 +163,7 @@ public class BaseStatsDerive { protected HashMap<Id, Long> deriveColumnToNdv() { HashMap<Id, Long> columnToNdv = new HashMap<>(); for (StatsDeriveResult child : childrenStatsResult) { - columnToNdv.putAll(child.getColumnToNdv()); + columnToNdv.putAll(child.getColumnIdToNdv()); } return columnToNdv; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStats.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStats.java index e684156279..f6e990d507 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStats.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStats.java @@ -76,6 +76,22 @@ public class ColumnStats { private LiteralExpr minValue; private LiteralExpr maxValue; + public ColumnStats(ColumnStats other) { + this.ndv = other.ndv; + this.avgSize = other.avgSize; + this.maxSize = other.maxSize; + this.numNulls = other.numNulls; + if (other.minValue != null) { + this.minValue = (LiteralExpr) other.minValue.clone(); + } + if (other.maxValue != null) { + this.maxValue = (LiteralExpr) other.maxValue.clone(); + } + } + + public ColumnStats() { + } + public long getNdv() { return ndv; } @@ -224,4 +240,14 @@ public class ColumnStats { throw new AnalysisException("Unsupported setting this type: " + type + " of min max value"); } } + + public ColumnStats copy() { + return new ColumnStats(this); + } + + public ColumnStats multiplyDouble(double selectivity) { + ndv *= selectivity; + numNulls *= selectivity; + return this; + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/Statistics.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/Statistics.java index eb02e6e036..5066706924 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/Statistics.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/Statistics.java @@ -174,4 +174,9 @@ public class Statistics { tableStats.setRowCount(rowCount); } } + + // Used for unit test + public void putTableStats(long id, TableStats tableStats) { + this.idToTableStats.put(id, tableStats); + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatsDeriveResult.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatsDeriveResult.java index 4f2abb5691..470c327c73 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatsDeriveResult.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatsDeriveResult.java @@ -18,9 +18,11 @@ package org.apache.doris.statistics; import org.apache.doris.common.Id; +import org.apache.doris.nereids.trees.expressions.Slot; import com.google.common.collect.Maps; +import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Map.Entry; @@ -32,19 +34,36 @@ public class StatsDeriveResult { private long rowCount = -1; // The data size of the corresponding column in the operator // The actual key is slotId - private final Map<Id, Float> columnToDataSize = Maps.newHashMap(); + private final Map<Id, Float> columnIdToDataSize = Maps.newHashMap(); // The ndv of the corresponding column in the operator // The actual key is slotId - private final Map<Id, Long> columnToNdv = Maps.newHashMap(); + private final Map<Id, Long> columnIdToNdv = Maps.newHashMap(); - public StatsDeriveResult(long rowCount, Map<Id, Float> columnToDataSize, Map<Id, Long> columnToNdv) { + private Map<Slot, ColumnStats> slotToColumnStats; + + public StatsDeriveResult(long rowCount, Map<Slot, ColumnStats> slotToColumnStats) { this.rowCount = rowCount; - this.columnToDataSize.putAll(columnToDataSize); - this.columnToNdv.putAll(columnToNdv); + this.slotToColumnStats = slotToColumnStats; + } + + public StatsDeriveResult(long rowCount, Map<Id, Float> columnIdToDataSize, Map<Id, Long> columnIdToNdv) { + this.rowCount = rowCount; + this.columnIdToDataSize.putAll(columnIdToDataSize); + this.columnIdToNdv.putAll(columnIdToNdv); + } + + public StatsDeriveResult(StatsDeriveResult another) { + this.rowCount = another.rowCount; + this.columnIdToDataSize.putAll(another.columnIdToDataSize); + this.columnIdToNdv.putAll(another.columnIdToNdv); + slotToColumnStats = new HashMap<>(); + for (Entry<Slot, ColumnStats> entry : another.slotToColumnStats.entrySet()) { + slotToColumnStats.put(entry.getKey(), entry.getValue().copy()); + } } public float computeSize() { - return Math.max(1, columnToDataSize.values().stream().reduce((float) 0, Float::sum)) * rowCount; + return Math.max(1, columnIdToDataSize.values().stream().reduce((float) 0, Float::sum)) * rowCount; } /** @@ -57,7 +76,7 @@ public class StatsDeriveResult { float count = 0; boolean exist = false; - for (Entry<Id, Float> entry : columnToDataSize.entrySet()) { + for (Entry<Id, Float> entry : columnIdToDataSize.entrySet()) { if (slotIds.contains(entry.getKey())) { count += entry.getValue(); exist = true; @@ -77,11 +96,38 @@ public class StatsDeriveResult { return rowCount; } - public Map<Id, Long> getColumnToNdv() { - return columnToNdv; + public Map<Id, Long> getColumnIdToNdv() { + return columnIdToNdv; + } + + public Map<Id, Float> getColumnIdToDataSize() { + return columnIdToDataSize; + } + + public Map<Slot, ColumnStats> getSlotToColumnStats() { + return slotToColumnStats; + } + + public void setSlotToColumnStats(Map<Slot, ColumnStats> slotToColumnStats) { + this.slotToColumnStats = slotToColumnStats; + } + + public StatsDeriveResult multiplyDouble(double selectivity) { + rowCount *= selectivity; + for (Entry<Slot, ColumnStats> entry : slotToColumnStats.entrySet()) { + entry.getValue().multiplyDouble(selectivity); + } + return this; + } + + public StatsDeriveResult merge(StatsDeriveResult other) { + for (Entry<Slot, ColumnStats> entry : other.getSlotToColumnStats().entrySet()) { + this.slotToColumnStats.put(entry.getKey(), entry.getValue().copy()); + } + return this; } - public Map<Id, Float> getColumnToDataSize() { - return columnToDataSize; + public StatsDeriveResult copy() { + return new StatsDeriveResult(this); } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStats.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStats.java index 96d83cb61d..abc977c292 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStats.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStats.java @@ -116,7 +116,7 @@ public class TableStats { public void updateColumnStats(String columnName, Type columnType, Map<StatsType, String> statsTypeToValue) throws AnalysisException { - ColumnStats columnStats = getNotNullColumnStats(columnName); + ColumnStats columnStats = getColumnStats(columnName); columnStats.updateStats(columnType, statsTypeToValue); } @@ -141,7 +141,7 @@ public class TableStats { * @param columnName column name * @return @ColumnStats */ - public ColumnStats getNotNullColumnStats(String columnName) { + public ColumnStats getColumnStats(String columnName) { ColumnStats columnStats = nameToColumnStats.get(columnName); if (columnStats == null) { columnStats = new ColumnStats(); @@ -247,4 +247,11 @@ public class TableStats { } } } + + /** + * This method is for unit test. + */ + public void putColumnStats(String name, ColumnStats columnStats) { + nameToColumnStats.put(name, columnStats); + } } diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/jobs/cascades/DeriveStatsJobTest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/jobs/cascades/DeriveStatsJobTest.java new file mode 100644 index 0000000000..84cce97d19 --- /dev/null +++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/jobs/cascades/DeriveStatsJobTest.java @@ -0,0 +1,127 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.nereids.jobs.cascades; + +import org.apache.doris.catalog.Env; +import org.apache.doris.catalog.Table; +import org.apache.doris.catalog.TableIf.TableType; +import org.apache.doris.nereids.PlannerContext; +import org.apache.doris.nereids.jobs.JobContext; +import org.apache.doris.nereids.memo.Memo; +import org.apache.doris.nereids.properties.LogicalProperties; +import org.apache.doris.nereids.trees.expressions.Alias; +import org.apache.doris.nereids.trees.expressions.Expression; +import org.apache.doris.nereids.trees.expressions.Slot; +import org.apache.doris.nereids.trees.expressions.SlotReference; +import org.apache.doris.nereids.trees.expressions.functions.AggregateFunction; +import org.apache.doris.nereids.trees.expressions.functions.Sum; +import org.apache.doris.nereids.trees.plans.Plan; +import org.apache.doris.nereids.trees.plans.logical.LogicalAggregate; +import org.apache.doris.nereids.trees.plans.logical.LogicalOlapScan; +import org.apache.doris.nereids.types.IntegerType; +import org.apache.doris.qe.ConnectContext; +import org.apache.doris.statistics.ColumnStats; +import org.apache.doris.statistics.Statistics; +import org.apache.doris.statistics.StatisticsManager; +import org.apache.doris.statistics.StatsDeriveResult; +import org.apache.doris.statistics.TableStats; + +import com.google.common.base.Supplier; +import mockit.Expectations; +import mockit.Mocked; +import org.junit.Assert; +import org.junit.Test; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.Optional; + +public class DeriveStatsJobTest { + + @Mocked + ConnectContext context; + @Mocked + Env env; + @Mocked + StatisticsManager statisticsManager; + + SlotReference slot1; + + @Test + public void testExecute() throws Exception { + LogicalOlapScan olapScan = constructOlapSCan(); + LogicalAggregate agg = constructAgg(olapScan); + Memo memo = new Memo(agg); + PlannerContext plannerContext = new PlannerContext(memo, context); + new DeriveStatsJob(memo.getRoot().getLogicalExpression(), + new JobContext(plannerContext, null, Double.MAX_VALUE)).execute(); + while (!plannerContext.getJobPool().isEmpty()) { + plannerContext.getJobPool().pop().execute(); + } + StatsDeriveResult statistics = memo.getRoot().getStatistics(); + Assert.assertNotNull(statistics); + Assert.assertEquals(10, statistics.getRowCount()); + } + + private LogicalOlapScan constructOlapSCan() { + ColumnStats columnStats1 = new ColumnStats(); + columnStats1.setNdv(10); + columnStats1.setNumNulls(5); + long tableId1 = 0; + String tableName1 = "t1"; + TableStats tableStats1 = new TableStats(); + tableStats1.putColumnStats("c1", columnStats1); + Statistics statistics = new Statistics(); + statistics.putTableStats(tableId1, tableStats1); + List<String> qualifier = new ArrayList<>(); + qualifier.add("test"); + qualifier.add("t"); + slot1 = new SlotReference("c1", IntegerType.INSTANCE, true, qualifier); + new Expectations() {{ + ConnectContext.get(); + result = context; + context.getEnv(); + result = env; + env.getStatisticsManager(); + result = statisticsManager; + statisticsManager.getStatistics(); + result = statistics; + }}; + + Table table1 = new Table(tableId1, tableName1, TableType.OLAP, Collections.emptyList()); + LogicalOlapScan logicalOlapScan1 = new LogicalOlapScan(table1, Collections.emptyList()).withLogicalProperties( + Optional.of(new LogicalProperties(new Supplier<List<Slot>>() { + @Override + public List<Slot> get() { + return Arrays.asList(slot1); + } + }))); + return logicalOlapScan1; + } + + private LogicalAggregate constructAgg(Plan child) { + List<Expression> groupByExprList = new ArrayList<>(); + groupByExprList.add(slot1); + AggregateFunction sum = new Sum(slot1); + Alias alias = new Alias(sum, "a"); + LogicalAggregate logicalAggregate = new LogicalAggregate(groupByExprList, Arrays.asList(alias), child); + return logicalAggregate; + } +} diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/plan/TestPlanOutput.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/plan/TestPlanOutput.java index aac8bca61b..d6c37ae7a6 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/nereids/plan/TestPlanOutput.java +++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/plan/TestPlanOutput.java @@ -104,6 +104,11 @@ public class TestPlanOutput { public Plan withLogicalProperties(Optional<LogicalProperties> logicalProperties) { return null; } + + @Override + public Table getTable() { + return null; + } }; } } diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/StatsCalculatorTest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/StatsCalculatorTest.java new file mode 100644 index 0000000000..ee94ae29f9 --- /dev/null +++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/StatsCalculatorTest.java @@ -0,0 +1,244 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.nereids.stats; + +import org.apache.doris.catalog.Env; +import org.apache.doris.catalog.Table; +import org.apache.doris.catalog.TableIf.TableType; +import org.apache.doris.nereids.memo.Group; +import org.apache.doris.nereids.memo.GroupExpression; +import org.apache.doris.nereids.properties.LogicalProperties; +import org.apache.doris.nereids.trees.expressions.Alias; +import org.apache.doris.nereids.trees.expressions.And; +import org.apache.doris.nereids.trees.expressions.EqualTo; +import org.apache.doris.nereids.trees.expressions.Expression; +import org.apache.doris.nereids.trees.expressions.IntegerLiteral; +import org.apache.doris.nereids.trees.expressions.Or; +import org.apache.doris.nereids.trees.expressions.Slot; +import org.apache.doris.nereids.trees.expressions.SlotReference; +import org.apache.doris.nereids.trees.expressions.functions.AggregateFunction; +import org.apache.doris.nereids.trees.expressions.functions.Sum; +import org.apache.doris.nereids.trees.plans.GroupPlan; +import org.apache.doris.nereids.trees.plans.JoinType; +import org.apache.doris.nereids.trees.plans.logical.LogicalAggregate; +import org.apache.doris.nereids.trees.plans.logical.LogicalFilter; +import org.apache.doris.nereids.trees.plans.logical.LogicalOlapScan; +import org.apache.doris.nereids.types.IntegerType; +import org.apache.doris.qe.ConnectContext; +import org.apache.doris.statistics.ColumnStats; +import org.apache.doris.statistics.Statistics; +import org.apache.doris.statistics.StatisticsManager; +import org.apache.doris.statistics.StatsDeriveResult; +import org.apache.doris.statistics.TableStats; + +import com.google.common.base.Supplier; +import mockit.Expectations; +import mockit.Mocked; +import org.junit.Assert; +import org.junit.Test; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Optional; + +public class StatsCalculatorTest { + + @Mocked + ConnectContext context; + @Mocked + Env env; + @Mocked + StatisticsManager statisticsManager; + + @Test + public void testAgg() { + List<String> qualifier = new ArrayList<>(); + qualifier.add("test"); + qualifier.add("t"); + SlotReference slot1 = new SlotReference("c1", IntegerType.INSTANCE, true, qualifier); + SlotReference slot2 = new SlotReference("c2", IntegerType.INSTANCE, true, qualifier); + ColumnStats columnStats1 = new ColumnStats(); + columnStats1.setNdv(10); + columnStats1.setNumNulls(5); + ColumnStats columnStats2 = new ColumnStats(); + columnStats2.setNdv(20); + columnStats1.setNumNulls(10); + Map<Slot, ColumnStats> slotColumnStatsMap = new HashMap<>(); + slotColumnStatsMap.put(slot1, columnStats1); + slotColumnStatsMap.put(slot2, columnStats2); + List<Expression> groupByExprList = new ArrayList<>(); + groupByExprList.add(slot1); + AggregateFunction sum = new Sum(slot2); + StatsDeriveResult childStats = new StatsDeriveResult(20, slotColumnStatsMap); + Alias alias = new Alias(sum, "a"); + Group childGroup = new Group(); + childGroup.setLogicalProperties(new LogicalProperties(new Supplier<List<Slot>>() { + @Override + public List<Slot> get() { + return Collections.emptyList(); + } + })); + GroupPlan groupPlan = new GroupPlan(childGroup); + childGroup.setStatistics(childStats); + LogicalAggregate logicalAggregate = new LogicalAggregate(groupByExprList, Arrays.asList(alias), groupPlan); + GroupExpression groupExpression = new GroupExpression(logicalAggregate, Arrays.asList(childGroup)); + Group ownerGroup = new Group(); + groupExpression.setOwnerGroup(ownerGroup); + StatsCalculator statsCalculator = new StatsCalculator(groupExpression); + statsCalculator.estimate(); + Assert.assertEquals(groupExpression.getOwnerGroup().getStatistics().getRowCount(), 10); + } + + @Test + public void testFilter() { + List<String> qualifier = new ArrayList<>(); + qualifier.add("test"); + qualifier.add("t"); + SlotReference slot1 = new SlotReference("c1", IntegerType.INSTANCE, true, qualifier); + SlotReference slot2 = new SlotReference("c2", IntegerType.INSTANCE, true, qualifier); + ColumnStats columnStats1 = new ColumnStats(); + columnStats1.setNdv(10); + columnStats1.setNumNulls(5); + ColumnStats columnStats2 = new ColumnStats(); + columnStats2.setNdv(20); + columnStats1.setNumNulls(10); + Map<Slot, ColumnStats> slotColumnStatsMap = new HashMap<>(); + slotColumnStatsMap.put(slot1, columnStats1); + slotColumnStatsMap.put(slot2, columnStats2); + StatsDeriveResult childStats = new StatsDeriveResult(10000, slotColumnStatsMap); + + EqualTo eq1 = new EqualTo(slot1, new IntegerLiteral(1)); + EqualTo eq2 = new EqualTo(slot2, new IntegerLiteral(2)); + + And and = new And(eq1, eq2); + Or or = new Or(eq1, eq2); + + Group childGroup = new Group(); + childGroup.setLogicalProperties(new LogicalProperties(new Supplier<List<Slot>>() { + @Override + public List<Slot> get() { + return Collections.emptyList(); + } + })); + GroupPlan groupPlan = new GroupPlan(childGroup); + childGroup.setStatistics(childStats); + + LogicalFilter logicalFilter = new LogicalFilter(and, groupPlan); + GroupExpression groupExpression = new GroupExpression(logicalFilter); + groupExpression.addChild(childGroup); + Group ownerGroup = new Group(); + groupExpression.setOwnerGroup(ownerGroup); + StatsCalculator statsCalculator = new StatsCalculator(groupExpression); + statsCalculator.estimate(); + Assert.assertEquals((long) (10000 * 0.1 * 0.05), ownerGroup.getStatistics().getRowCount(), 0.001); + + LogicalFilter logicalFilterOr = new LogicalFilter(or, groupPlan); + GroupExpression groupExpressionOr = new GroupExpression(logicalFilterOr); + groupExpressionOr.addChild(childGroup); + Group ownerGroupOr = new Group(); + groupExpressionOr.setOwnerGroup(ownerGroupOr); + StatsCalculator statsCalculator2 = new StatsCalculator(groupExpressionOr); + statsCalculator2.estimate(); + Assert.assertEquals((long) (10000 * (0.1 + 0.05 - 0.1 * 0.05)), + ownerGroupOr.getStatistics().getRowCount(), 0.001); + } + + @Test + public void testHashJoin() { + List<String> qualifier = new ArrayList<>(); + qualifier.add("test"); + qualifier.add("t"); + SlotReference slot1 = new SlotReference("c1", IntegerType.INSTANCE, true, qualifier); + SlotReference slot2 = new SlotReference("c2", IntegerType.INSTANCE, true, qualifier); + ColumnStats columnStats1 = new ColumnStats(); + columnStats1.setNdv(10); + columnStats1.setNumNulls(5); + ColumnStats columnStats2 = new ColumnStats(); + columnStats2.setNdv(20); + columnStats1.setNumNulls(10); + Map<Slot, ColumnStats> slotColumnStatsMap1 = new HashMap<>(); + slotColumnStatsMap1.put(slot1, columnStats1); + + Map<Slot, ColumnStats> slotColumnStatsMap2 = new HashMap<>(); + slotColumnStatsMap2.put(slot2, columnStats2); + + final long leftRowCount = 5000; + StatsDeriveResult leftStats = new StatsDeriveResult(leftRowCount, slotColumnStatsMap1); + + final long rightRowCount = 10000; + StatsDeriveResult rightStats = new StatsDeriveResult(rightRowCount, slotColumnStatsMap2); + + EqualTo equalTo = new EqualTo(slot1, slot2); + StatsDeriveResult semiJoinStats = JoinEstimation.estimate(leftStats, + rightStats, equalTo, JoinType.LEFT_SEMI_JOIN); + Assert.assertEquals(leftRowCount, semiJoinStats.getRowCount()); + StatsDeriveResult innerJoinStats = JoinEstimation.estimate(leftStats, + rightStats, equalTo, JoinType.INNER_JOIN); + Assert.assertEquals(2500000, innerJoinStats.getRowCount()); + } + + @Test + public void testOlapScan() { + ColumnStats columnStats1 = new ColumnStats(); + columnStats1.setNdv(10); + columnStats1.setNumNulls(5); + long tableId1 = 0; + String tableName1 = "t1"; + TableStats tableStats1 = new TableStats(); + tableStats1.putColumnStats("c1", columnStats1); + Statistics statistics = new Statistics(); + statistics.putTableStats(tableId1, tableStats1); + List<String> qualifier = new ArrayList<>(); + qualifier.add("test"); + qualifier.add("t"); + SlotReference slot1 = new SlotReference("c1", IntegerType.INSTANCE, true, qualifier); + new Expectations() {{ + ConnectContext.get(); + result = context; + context.getEnv(); + result = env; + env.getStatisticsManager(); + result = statisticsManager; + statisticsManager.getStatistics(); + result = statistics; + }}; + + Table table1 = new Table(tableId1, tableName1, TableType.OLAP, Collections.emptyList()); + LogicalOlapScan logicalOlapScan1 = new LogicalOlapScan(table1, Collections.emptyList()).withLogicalProperties( + Optional.of(new LogicalProperties(new Supplier<List<Slot>>() { + @Override + public List<Slot> get() { + return Arrays.asList(slot1); + } + }))); + Group childGroup = new Group(); + GroupExpression groupExpression = new GroupExpression(logicalOlapScan1, Arrays.asList(childGroup)); + Group ownerGroup = new Group(); + groupExpression.setOwnerGroup(ownerGroup); + StatsCalculator statsCalculator = new StatsCalculator(groupExpression); + statsCalculator.estimate(); + StatsDeriveResult stats = ownerGroup.getStatistics(); + Assert.assertEquals(1, stats.getSlotToColumnStats().size()); + Assert.assertNotNull(stats.getSlotToColumnStats().get(slot1)); + } + +} --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org