This is an automated email from the ASF dual-hosted git repository. jakevin pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new 861d9c985c [refactor](Nereids): refactor Join Reorder Rule. (#17809) 861d9c985c is described below commit 861d9c985c49c6ac158dc25920e17a90d9dd5002 Author: jakevin <jakevin...@gmail.com> AuthorDate: Tue Mar 21 16:12:07 2023 +0800 [refactor](Nereids): refactor Join Reorder Rule. (#17809) --- .../org/apache/doris/nereids/cost/CostWeight.java | 2 +- .../nereids/jobs/cascades/CostAndEnforcerJob.java | 8 ++--- .../org/apache/doris/nereids/rules/RuleSet.java | 14 ++++---- .../exploration/join/InnerJoinLAsscomProject.java | 19 +++++----- .../rules/exploration/join/JoinReorderUtils.java | 42 +++++----------------- .../exploration/join/OuterJoinAssocProject.java | 6 ++-- .../exploration/join/OuterJoinLAsscomProject.java | 6 ++-- .../join/PushdownProjectThroughInnerJoin.java | 16 ++++++--- 8 files changed, 48 insertions(+), 65 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/cost/CostWeight.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/cost/CostWeight.java index a21febefa6..d09c72f6dd 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/cost/CostWeight.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/cost/CostWeight.java @@ -47,7 +47,7 @@ public class CostWeight { * Except stats information, there are some special criteria in doris. * For example, in hash join cluster, BE could build hash tables * in parallel for left deep tree. And hence, we need to punish right deep tree. - * penalyWeight is the factor of punishment. + * penaltyWeight is the factor of punishment. * The punishment is denoted by stats.penalty. */ final double penaltyWeight; diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/cascades/CostAndEnforcerJob.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/cascades/CostAndEnforcerJob.java index 51854cd072..c2cdddb23c 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/cascades/CostAndEnforcerJob.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/cascades/CostAndEnforcerJob.java @@ -57,20 +57,20 @@ public class CostAndEnforcerJob extends Job implements Cloneable { // List of request property to children // Example: Physical Hash Join - // [ child item: [leftProperties, rightPropertie]] + // [ child item: [leftProperties, rightProperties]] // [ [Properties {"", ANY}, Properties {"", BROADCAST}], // [Properties {"", SHUFFLE_JOIN}, Properties {"", SHUFFLE_JOIN}]] private List<List<PhysicalProperties>> requestChildrenPropertiesList; - private List<List<PhysicalProperties>> outputChildrenPropertiesList = new ArrayList<>(); + private final List<List<PhysicalProperties>> outputChildrenPropertiesList = new ArrayList<>(); // index of List<request property to children> private int requestPropertiesIndex = 0; private final List<GroupExpression> lowestCostChildren = Lists.newArrayList(); - // current child index of travsing all children + // current child index of traversing all children private int curChildIndex = -1; - // child index in the last time of travsing all children + // child index in the last time of traversing all children private int prevChildIndex = -1; public CostAndEnforcerJob(GroupExpression groupExpression, JobContext context) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/RuleSet.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/RuleSet.java index 83544b96ff..11b98fe257 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/RuleSet.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/RuleSet.java @@ -22,6 +22,8 @@ import org.apache.doris.nereids.rules.exploration.join.InnerJoinLAsscomProject; import org.apache.doris.nereids.rules.exploration.join.JoinCommute; import org.apache.doris.nereids.rules.exploration.join.OuterJoinLAsscom; import org.apache.doris.nereids.rules.exploration.join.OuterJoinLAsscomProject; +import org.apache.doris.nereids.rules.exploration.join.PushdownProjectThroughInnerJoin; +import org.apache.doris.nereids.rules.exploration.join.PushdownProjectThroughSemiJoin; import org.apache.doris.nereids.rules.exploration.join.SemiJoinLogicalJoinTranspose; import org.apache.doris.nereids.rules.exploration.join.SemiJoinLogicalJoinTransposeProject; import org.apache.doris.nereids.rules.exploration.join.SemiJoinSemiJoinTranspose; @@ -86,12 +88,14 @@ public class RuleSet { .add(SemiJoinLogicalJoinTransposeProject.LEFT_DEEP) .add(SemiJoinSemiJoinTranspose.INSTANCE) .add(SemiJoinSemiJoinTransposeProject.INSTANCE) + .add(PushdownProjectThroughInnerJoin.INSTANCE) + .add(PushdownProjectThroughSemiJoin.INSTANCE) .build(); /** * This rule need to be shadow in DPHyp */ - public static final List<Rule> JOINORDER_RULE = planRuleFactories() + public static final List<Rule> JOIN_REORDER_RULE = planRuleFactories() .add(JoinCommute.ZIG_ZAG) .add(InnerJoinLAsscom.INSTANCE) .add(InnerJoinLAsscomProject.INSTANCE) @@ -175,20 +179,18 @@ public class RuleSet { public List<Rule> getExplorationRules() { List<Rule> rules = new ArrayList<>(); - rules.addAll(JOINORDER_RULE); + rules.addAll(JOIN_REORDER_RULE); rules.addAll(OTHER_REORDER_RULES); rules.addAll(EXPLORATION_RULES); return rules; } public List<Rule> getExplorationRulesWithoutReorder() { - List<Rule> rules = new ArrayList<>(); - rules.addAll(EXPLORATION_RULES); - return rules; + return new ArrayList<>(EXPLORATION_RULES); } public List<Rule> getJoinOrderRule() { - return JOINORDER_RULE; + return JOIN_REORDER_RULE; } public List<Rule> getImplementationRules() { diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/join/InnerJoinLAsscomProject.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/join/InnerJoinLAsscomProject.java index b48e429ccf..b55c135dee 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/join/InnerJoinLAsscomProject.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/join/InnerJoinLAsscomProject.java @@ -65,10 +65,11 @@ public class InnerJoinLAsscomProject extends OneExplorationRuleFactory { GroupPlan a = bottomJoin.left(); GroupPlan b = bottomJoin.right(); GroupPlan c = topJoin.right(); + Set<ExprId> bExprIdSet = b.getOutputExprIdSet(); Set<ExprId> cExprIdSet = c.getOutputExprIdSet(); /* ********** Split projects ********** */ - Map<Boolean, List<NamedExpression>> map = JoinReorderUtils.splitProjection(projects, b); + Map<Boolean, List<NamedExpression>> map = JoinReorderUtils.splitProject(projects, bExprIdSet); List<NamedExpression> aProjects = map.get(false); List<NamedExpression> bProjects = map.get(true); if (aProjects.isEmpty()) { @@ -76,14 +77,13 @@ public class InnerJoinLAsscomProject extends OneExplorationRuleFactory { } /* ********** split HashConjuncts ********** */ - Set<ExprId> bExprIdSet = JoinReorderUtils.combineProjectAndChildExprId(b, bProjects); - Map<Boolean, List<Expression>> splitHashConjuncts = splitConjunctsWithAlias( + Map<Boolean, List<Expression>> splitHashConjuncts = splitConjuncts( topJoin.getHashJoinConjuncts(), bottomJoin.getHashJoinConjuncts(), bExprIdSet); List<Expression> newTopHashConjuncts = splitHashConjuncts.get(true); List<Expression> newBottomHashConjuncts = splitHashConjuncts.get(false); /* ********** split OtherConjuncts ********** */ - Map<Boolean, List<Expression>> splitOtherConjuncts = splitConjunctsWithAlias( + Map<Boolean, List<Expression>> splitOtherConjuncts = splitConjuncts( topJoin.getOtherJoinConjuncts(), bottomJoin.getOtherJoinConjuncts(), bExprIdSet); List<Expression> newTopOtherConjuncts = splitOtherConjuncts.get(true); List<Expression> newBottomOtherConjuncts = splitOtherConjuncts.get(false); @@ -93,16 +93,15 @@ public class InnerJoinLAsscomProject extends OneExplorationRuleFactory { } // Add all slots used by OnCondition when projects not empty. - Set<ExprId> aExprIdSet = JoinReorderUtils.combineProjectAndChildExprId(a, aProjects); Map<Boolean, Set<Slot>> abOnUsedSlots = Stream.concat( newTopHashConjuncts.stream(), newTopOtherConjuncts.stream()) .flatMap(onExpr -> onExpr.getInputSlots().stream()) .filter(slot -> !cExprIdSet.contains(slot.getExprId())) .collect(Collectors.partitioningBy( - slot -> aExprIdSet.contains(slot.getExprId()), Collectors.toSet())); - JoinReorderUtils.addSlotsUsedByOn(abOnUsedSlots.get(true), aProjects); - JoinReorderUtils.addSlotsUsedByOn(abOnUsedSlots.get(false), bProjects); + slot -> bExprIdSet.contains(slot.getExprId()), Collectors.toSet())); + JoinReorderUtils.addSlotsUsedByOn(abOnUsedSlots.get(false), aProjects); + JoinReorderUtils.addSlotsUsedByOn(abOnUsedSlots.get(true), bProjects); aProjects.addAll(c.getOutput()); @@ -130,14 +129,14 @@ public class InnerJoinLAsscomProject extends OneExplorationRuleFactory { * True: contains B. * False: just contains A C. */ - private Map<Boolean, List<Expression>> splitConjunctsWithAlias(List<Expression> topConjuncts, + private Map<Boolean, List<Expression>> splitConjuncts(List<Expression> topConjuncts, List<Expression> bottomConjuncts, Set<ExprId> bExprIdSet) { // top: (A B)(error) (A C) (B C) (A B C) // Split topJoin Condition to two part according to include B. Map<Boolean, List<Expression>> splitOn = topConjuncts.stream() .collect(Collectors.partitioningBy(topHashOn -> { Set<ExprId> usedExprIds = topHashOn.getInputSlotExprIds(); - return Utils.isIntersecting(bExprIdSet, usedExprIds); + return Utils.isIntersecting(usedExprIds, bExprIdSet); })); // * don't include B, just include (A C) // we add it into newBottomJoin HashConjuncts. diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/join/JoinReorderUtils.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/join/JoinReorderUtils.java index 36c71cf01f..916ccdc6bb 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/join/JoinReorderUtils.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/join/JoinReorderUtils.java @@ -18,7 +18,6 @@ package org.apache.doris.nereids.rules.exploration.join; import org.apache.doris.nereids.trees.expressions.ExprId; -import org.apache.doris.nereids.trees.expressions.Expression; import org.apache.doris.nereids.trees.expressions.NamedExpression; import org.apache.doris.nereids.trees.expressions.Slot; import org.apache.doris.nereids.trees.plans.GroupPlan; @@ -26,13 +25,10 @@ import org.apache.doris.nereids.trees.plans.Plan; import org.apache.doris.nereids.trees.plans.logical.LogicalJoin; import org.apache.doris.nereids.trees.plans.logical.LogicalProject; -import com.google.common.collect.ImmutableList; - import java.util.List; import java.util.Map; import java.util.Set; import java.util.stream.Collectors; -import java.util.stream.Stream; /** * Common @@ -42,22 +38,19 @@ class JoinReorderUtils { return project.getProjects().stream().allMatch(expr -> expr instanceof Slot); } - static Map<Boolean, List<NamedExpression>> splitProjection(List<NamedExpression> projects, Plan splitChild) { - Set<ExprId> splitExprIds = splitChild.getOutputExprIdSet(); - + /** + * Split project according to whether namedExpr contains by splitChildExprIds. + * Notice: projects must all be Slot. + */ + static Map<Boolean, List<NamedExpression>> splitProject(List<NamedExpression> projects, + Set<ExprId> splitChildExprIds) { return projects.stream() - .collect(Collectors.partitioningBy(projectExpr -> { - Set<ExprId> usedExprIds = projectExpr.getInputSlotExprIds(); - return splitExprIds.containsAll(usedExprIds); + .collect(Collectors.partitioningBy(expr -> { + Slot slot = (Slot) expr; + return splitChildExprIds.contains(slot.getExprId()); })); } - public static Set<ExprId> combineProjectAndChildExprId(Plan b, List<NamedExpression> bProject) { - return Stream.concat( - b.getOutput().stream().map(NamedExpression::getExprId), - bProject.stream().map(NamedExpression::getExprId)).collect(Collectors.toSet()); - } - /** * If projectExprs is empty or project output equal plan output, return the original plan. */ @@ -76,23 +69,6 @@ class JoinReorderUtils { return new LogicalProject<>(projectExprs, plan); } - /** - * replace JoinConjuncts by using slots map. - */ - public static List<Expression> replaceJoinConjuncts(List<Expression> joinConjuncts, - Map<ExprId, Slot> replaceMaps) { - return joinConjuncts.stream() - .map(expr -> - expr.rewriteUp(e -> { - if (e instanceof Slot && replaceMaps.containsKey(((Slot) e).getExprId())) { - return replaceMaps.get(((Slot) e).getExprId()); - } else { - return e; - } - }) - ).collect(ImmutableList.toImmutableList()); - } - /** * When project not empty, we add all slots used by hashOnCondition into projects. */ diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/join/OuterJoinAssocProject.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/join/OuterJoinAssocProject.java index 06421c9c56..c86361f231 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/join/OuterJoinAssocProject.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/join/OuterJoinAssocProject.java @@ -66,9 +66,10 @@ public class OuterJoinAssocProject extends OneExplorationRuleFactory { GroupPlan a = bottomJoin.left(); GroupPlan b = bottomJoin.right(); GroupPlan c = topJoin.right(); + Set<ExprId> aOutputExprIds = a.getOutputExprIdSet(); /* ********** Split projects ********** */ - Map<Boolean, List<NamedExpression>> map = JoinReorderUtils.splitProjection(projects, a); + Map<Boolean, List<NamedExpression>> map = JoinReorderUtils.splitProject(projects, aOutputExprIds); List<NamedExpression> aProjects = map.get(true); List<NamedExpression> bProjects = map.get(false); if (bProjects.isEmpty()) { @@ -84,13 +85,12 @@ public class OuterJoinAssocProject extends OneExplorationRuleFactory { } // Add all slots used by OnCondition when projects not empty. - Set<ExprId> aExprIdSet = JoinReorderUtils.combineProjectAndChildExprId(a, aProjects); Map<Boolean, Set<Slot>> abOnUsedSlots = Stream.concat( bottomJoin.getHashJoinConjuncts().stream(), bottomJoin.getHashJoinConjuncts().stream()) .flatMap(onExpr -> onExpr.getInputSlots().stream()) .collect(Collectors.partitioningBy( - slot -> aExprIdSet.contains(slot.getExprId()), Collectors.toSet())); + slot -> aOutputExprIds.contains(slot.getExprId()), Collectors.toSet())); JoinReorderUtils.addSlotsUsedByOn(abOnUsedSlots.get(true), aProjects); JoinReorderUtils.addSlotsUsedByOn(abOnUsedSlots.get(false), bProjects); diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/join/OuterJoinLAsscomProject.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/join/OuterJoinLAsscomProject.java index 2661947189..96e77d7ab9 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/join/OuterJoinLAsscomProject.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/join/OuterJoinLAsscomProject.java @@ -68,9 +68,10 @@ public class OuterJoinLAsscomProject extends OneExplorationRuleFactory { GroupPlan a = bottomJoin.left(); GroupPlan b = bottomJoin.right(); GroupPlan c = topJoin.right(); + Set<ExprId> aOutputExprIds = a.getOutputExprIdSet(); /* ********** Split projects ********** */ - Map<Boolean, List<NamedExpression>> map = JoinReorderUtils.splitProjection(projects, a); + Map<Boolean, List<NamedExpression>> map = JoinReorderUtils.splitProject(projects, aOutputExprIds); List<NamedExpression> aProjects = map.get(true); if (aProjects.isEmpty()) { return null; @@ -86,13 +87,12 @@ public class OuterJoinLAsscomProject extends OneExplorationRuleFactory { } // Add all slots used by OnCondition when projects not empty. - Set<ExprId> aExprIdSet = JoinReorderUtils.combineProjectAndChildExprId(a, aProjects); Map<Boolean, Set<Slot>> abOnUsedSlots = Stream.concat( bottomJoin.getHashJoinConjuncts().stream(), bottomJoin.getHashJoinConjuncts().stream()) .flatMap(onExpr -> onExpr.getInputSlots().stream()) .collect(Collectors.partitioningBy( - slot -> aExprIdSet.contains(slot.getExprId()), Collectors.toSet())); + slot -> aOutputExprIds.contains(slot.getExprId()), Collectors.toSet())); JoinReorderUtils.addSlotsUsedByOn(abOnUsedSlots.get(true), aProjects); JoinReorderUtils.addSlotsUsedByOn(abOnUsedSlots.get(false), bProjects); diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/join/PushdownProjectThroughInnerJoin.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/join/PushdownProjectThroughInnerJoin.java index c46f2252a2..6e9fa8f130 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/join/PushdownProjectThroughInnerJoin.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/join/PushdownProjectThroughInnerJoin.java @@ -32,7 +32,6 @@ import com.google.common.collect.ImmutableList.Builder; import java.util.ArrayList; import java.util.List; -import java.util.Map; import java.util.Set; import java.util.stream.Collectors; @@ -52,6 +51,7 @@ public class PushdownProjectThroughInnerJoin extends OneExplorationRuleFactory { @Override public Rule build() { return logicalProject(logicalJoin()) + .whenNot(JoinReorderUtils::isAllSlotProject) .when(project -> project.child().getJoinType().isInnerJoin()) .whenNot(project -> project.child().hasJoinHint()) .then(project -> { @@ -68,10 +68,16 @@ public class PushdownProjectThroughInnerJoin extends OneExplorationRuleFactory { return null; } - Map<Boolean, List<NamedExpression>> map = JoinReorderUtils.splitProjection(project.getProjects(), - join.left()); - List<NamedExpression> aProjects = map.get(true); - List<NamedExpression> bProjects = map.get(false); + List<NamedExpression> aProjects = new ArrayList<>(); + List<NamedExpression> bProjects = new ArrayList<>(); + for (NamedExpression namedExpression : project.getProjects()) { + Set<ExprId> usedExprIds = namedExpression.getInputSlotExprIds(); + if (aOutputExprIdSet.containsAll(usedExprIds)) { + aProjects.add(namedExpression); + } else { + bProjects.add(namedExpression); + } + } boolean leftContains = aProjects.stream().anyMatch(e -> !(e instanceof Slot)); boolean rightContains = bProjects.stream().anyMatch(e -> !(e instanceof Slot)); --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org