This is an automated email from the ASF dual-hosted git repository. kxiao pushed a commit to branch branch-2.0 in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.0 by this push: new ff786d05fb4 [pick](Branch2.0) generate left deep tree when stats is unknown (#25702) ff786d05fb4 is described below commit ff786d05fb4d05aee444a267883fb7998f8ea91f Author: 谢健 <jianx...@gmail.com> AuthorDate: Sun Oct 22 00:46:43 2023 +0800 [pick](Branch2.0) generate left deep tree when stats is unknown (#25702) --- .../org/apache/doris/nereids/cost/CostModelV1.java | 43 +++++++++++++++++++++- .../apache/doris/nereids/stats/JoinEstimation.java | 17 ++++++++- .../trees/plans/physical/PhysicalHashJoin.java | 8 ++++ .../plans/physical/PhysicalNestedLoopJoin.java | 8 ++++ 4 files changed, 74 insertions(+), 2 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/cost/CostModelV1.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/cost/CostModelV1.java index aa8f4d6cc7c..2aca6017b7b 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/cost/CostModelV1.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/cost/CostModelV1.java @@ -22,6 +22,7 @@ import org.apache.doris.nereids.properties.DistributionSpec; import org.apache.doris.nereids.properties.DistributionSpecGather; import org.apache.doris.nereids.properties.DistributionSpecHash; import org.apache.doris.nereids.properties.DistributionSpecReplicated; +import org.apache.doris.nereids.trees.expressions.Slot; import org.apache.doris.nereids.trees.plans.Plan; import org.apache.doris.nereids.trees.plans.physical.PhysicalAssertNumRows; import org.apache.doris.nereids.trees.plans.physical.PhysicalDeferMaterializeOlapScan; @@ -311,17 +312,53 @@ class CostModelV1 extends PlanVisitor<Cost, PlanContext> { } // TODO: since the outputs rows may expand a lot, penalty on it will cause bc never be chosen. // will refine this in next generation cost model. + if (isStatsUnknown(physicalHashJoin, buildStats, probeStats)) { + // forbid broadcast join when stats is unknown + return CostV1.of(rightRowCount * buildSideFactor + 1 / leftRowCount, + rightRowCount, + 0 + ); + } return CostV1.of(leftRowCount + rightRowCount * buildSideFactor + outputRowCount * probeSideFactor, rightRowCount, 0 ); } + if (isStatsUnknown(physicalHashJoin, buildStats, probeStats)) { + return CostV1.of(rightRowCount + 1 / leftRowCount, + rightRowCount, + 0); + } return CostV1.of(leftRowCount + rightRowCount + outputRowCount, rightRowCount, 0 ); } + private boolean isStatsUnknown(PhysicalHashJoin<? extends Plan, ? extends Plan> join, + Statistics build, Statistics probe) { + for (Slot slot : join.getConditionSlot()) { + if ((build.columnStatistics().containsKey(slot) && !build.columnStatistics().get(slot).isUnKnown) + || (probe.columnStatistics().containsKey(slot) && !probe.columnStatistics().get(slot).isUnKnown)) { + continue; + } + return true; + } + return false; + } + + private boolean isStatsUnknown(PhysicalNestedLoopJoin<? extends Plan, ? extends Plan> join, + Statistics build, Statistics probe) { + for (Slot slot : join.getConditionSlot()) { + if ((build.columnStatistics().containsKey(slot) && !build.columnStatistics().get(slot).isUnKnown) + || (probe.columnStatistics().containsKey(slot) && !probe.columnStatistics().get(slot).isUnKnown)) { + continue; + } + return true; + } + return false; + } + @Override public Cost visitPhysicalNestedLoopJoin( PhysicalNestedLoopJoin<? extends Plan, ? extends Plan> nestedLoopJoin, @@ -330,7 +367,11 @@ class CostModelV1 extends PlanVisitor<Cost, PlanContext> { Preconditions.checkState(context.arity() == 2); Statistics leftStatistics = context.getChildStatistics(0); Statistics rightStatistics = context.getChildStatistics(1); - + if (isStatsUnknown(nestedLoopJoin, leftStatistics, rightStatistics)) { + return CostV1.of(rightStatistics.getRowCount() + 1 / leftStatistics.getRowCount(), + rightStatistics.getRowCount(), + 0); + } return CostV1.of( leftStatistics.getRowCount() * rightStatistics.getRowCount(), rightStatistics.getRowCount(), diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/JoinEstimation.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/JoinEstimation.java index ef4575e3308..0498d68d793 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/JoinEstimation.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/JoinEstimation.java @@ -147,6 +147,21 @@ public class JoinEstimation { } private static Statistics estimateNestLoopJoin(Statistics leftStats, Statistics rightStats, Join join) { + if (hashJoinConditionContainsUnknownColumnStats(leftStats, rightStats, join)) { + double rowCount = (leftStats.getRowCount() + rightStats.getRowCount()); + // We do more like the nested loop join with one rows than inner join + if (leftStats.getRowCount() == 1 || rightStats.getRowCount() == 1) { + rowCount *= 0.99; + } else { + rowCount *= 1.01; + } + rowCount = Math.max(1, rowCount); + return new StatisticsBuilder() + .setRowCount(rowCount) + .putColumnStatistics(leftStats.columnStatistics()) + .putColumnStatistics(rightStats.columnStatistics()) + .build(); + } return new StatisticsBuilder() .setRowCount(Math.max(1, leftStats.getRowCount() * rightStats.getRowCount())) .putColumnStatistics(leftStats.columnStatistics()) @@ -156,7 +171,7 @@ public class JoinEstimation { private static Statistics estimateInnerJoin(Statistics leftStats, Statistics rightStats, Join join) { if (hashJoinConditionContainsUnknownColumnStats(leftStats, rightStats, join)) { - double rowCount = Math.max(leftStats.getRowCount(), rightStats.getRowCount()); + double rowCount = leftStats.getRowCount() + rightStats.getRowCount(); rowCount = Math.max(1, rowCount); return new StatisticsBuilder() .setRowCount(rowCount) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalHashJoin.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalHashJoin.java index 84dc09b2a37..357667f748c 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalHashJoin.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalHashJoin.java @@ -24,6 +24,7 @@ import org.apache.doris.nereids.properties.PhysicalProperties; import org.apache.doris.nereids.trees.expressions.ExprId; import org.apache.doris.nereids.trees.expressions.Expression; import org.apache.doris.nereids.trees.expressions.MarkJoinSlotReference; +import org.apache.doris.nereids.trees.expressions.Slot; import org.apache.doris.nereids.trees.plans.AbstractPlan; import org.apache.doris.nereids.trees.plans.JoinHint; import org.apache.doris.nereids.trees.plans.JoinType; @@ -34,6 +35,7 @@ import org.apache.doris.nereids.util.Utils; import org.apache.doris.statistics.Statistics; import com.google.common.base.Preconditions; +import com.google.common.collect.ImmutableSet; import com.google.common.collect.Lists; import java.util.Comparator; @@ -41,6 +43,7 @@ import java.util.List; import java.util.Optional; import java.util.Set; import java.util.stream.Collectors; +import java.util.stream.Stream; /** * Physical hash join plan. @@ -215,6 +218,11 @@ public class PhysicalHashJoin< } } + public Set<Slot> getConditionSlot() { + return Stream.concat(hashJoinConjuncts.stream(), otherJoinConjuncts.stream()) + .flatMap(expr -> expr.getInputSlots().stream()).collect(ImmutableSet.toImmutableSet()); + } + @Override public String shapeInfo() { StringBuilder builder = new StringBuilder(); diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalNestedLoopJoin.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalNestedLoopJoin.java index 3b92830b6a0..76b4859fb13 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalNestedLoopJoin.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalNestedLoopJoin.java @@ -22,6 +22,7 @@ import org.apache.doris.nereids.properties.LogicalProperties; import org.apache.doris.nereids.properties.PhysicalProperties; import org.apache.doris.nereids.trees.expressions.Expression; import org.apache.doris.nereids.trees.expressions.MarkJoinSlotReference; +import org.apache.doris.nereids.trees.expressions.Slot; import org.apache.doris.nereids.trees.plans.JoinHint; import org.apache.doris.nereids.trees.plans.JoinType; import org.apache.doris.nereids.trees.plans.Plan; @@ -31,11 +32,13 @@ import org.apache.doris.nereids.util.Utils; import org.apache.doris.statistics.Statistics; import com.google.common.base.Preconditions; +import com.google.common.collect.ImmutableSet; import com.google.common.collect.Sets; import java.util.List; import java.util.Optional; import java.util.Set; +import java.util.stream.Stream; /** * Use nested loop algorithm to do join. @@ -169,6 +172,11 @@ public class PhysicalNestedLoopJoin< return bitMapRuntimeFilterConditions.isEmpty(); } + public Set<Slot> getConditionSlot() { + return Stream.concat(hashJoinConjuncts.stream(), otherJoinConjuncts.stream()) + .flatMap(expr -> expr.getInputSlots().stream()).collect(ImmutableSet.toImmutableSet()); + } + @Override public String shapeInfo() { StringBuilder builder = new StringBuilder("NestedLoopJoin"); --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org