This is an automated email from the ASF dual-hosted git repository.

kxiao pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.0 by this push:
     new ff786d05fb4 [pick](Branch2.0) generate left deep tree when stats is 
unknown (#25702)
ff786d05fb4 is described below

commit ff786d05fb4d05aee444a267883fb7998f8ea91f
Author: 谢健 <jianx...@gmail.com>
AuthorDate: Sun Oct 22 00:46:43 2023 +0800

    [pick](Branch2.0) generate left deep tree when stats is unknown (#25702)
---
 .../org/apache/doris/nereids/cost/CostModelV1.java | 43 +++++++++++++++++++++-
 .../apache/doris/nereids/stats/JoinEstimation.java | 17 ++++++++-
 .../trees/plans/physical/PhysicalHashJoin.java     |  8 ++++
 .../plans/physical/PhysicalNestedLoopJoin.java     |  8 ++++
 4 files changed, 74 insertions(+), 2 deletions(-)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/cost/CostModelV1.java 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/cost/CostModelV1.java
index aa8f4d6cc7c..2aca6017b7b 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/cost/CostModelV1.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/cost/CostModelV1.java
@@ -22,6 +22,7 @@ import org.apache.doris.nereids.properties.DistributionSpec;
 import org.apache.doris.nereids.properties.DistributionSpecGather;
 import org.apache.doris.nereids.properties.DistributionSpecHash;
 import org.apache.doris.nereids.properties.DistributionSpecReplicated;
+import org.apache.doris.nereids.trees.expressions.Slot;
 import org.apache.doris.nereids.trees.plans.Plan;
 import org.apache.doris.nereids.trees.plans.physical.PhysicalAssertNumRows;
 import 
org.apache.doris.nereids.trees.plans.physical.PhysicalDeferMaterializeOlapScan;
@@ -311,17 +312,53 @@ class CostModelV1 extends PlanVisitor<Cost, PlanContext> {
             }
             // TODO: since the outputs rows may expand a lot, penalty on it 
will cause bc never be chosen.
             // will refine this in next generation cost model.
+            if (isStatsUnknown(physicalHashJoin, buildStats, probeStats)) {
+                // forbid broadcast join when stats is unknown
+                return CostV1.of(rightRowCount * buildSideFactor + 1 / 
leftRowCount,
+                        rightRowCount,
+                        0
+                );
+            }
             return CostV1.of(leftRowCount + rightRowCount * buildSideFactor + 
outputRowCount * probeSideFactor,
                     rightRowCount,
                     0
             );
         }
+        if (isStatsUnknown(physicalHashJoin, buildStats, probeStats)) {
+            return CostV1.of(rightRowCount + 1 / leftRowCount,
+                    rightRowCount,
+                    0);
+        }
         return CostV1.of(leftRowCount + rightRowCount + outputRowCount,
                 rightRowCount,
                 0
         );
     }
 
+    private boolean isStatsUnknown(PhysicalHashJoin<? extends Plan, ? extends 
Plan> join,
+            Statistics build, Statistics probe) {
+        for (Slot slot : join.getConditionSlot()) {
+            if ((build.columnStatistics().containsKey(slot) && 
!build.columnStatistics().get(slot).isUnKnown)
+                    || (probe.columnStatistics().containsKey(slot) && 
!probe.columnStatistics().get(slot).isUnKnown)) {
+                continue;
+            }
+            return true;
+        }
+        return false;
+    }
+
+    private boolean isStatsUnknown(PhysicalNestedLoopJoin<? extends Plan, ? 
extends Plan> join,
+            Statistics build, Statistics probe) {
+        for (Slot slot : join.getConditionSlot()) {
+            if ((build.columnStatistics().containsKey(slot) && 
!build.columnStatistics().get(slot).isUnKnown)
+                    || (probe.columnStatistics().containsKey(slot) && 
!probe.columnStatistics().get(slot).isUnKnown)) {
+                continue;
+            }
+            return true;
+        }
+        return false;
+    }
+
     @Override
     public Cost visitPhysicalNestedLoopJoin(
             PhysicalNestedLoopJoin<? extends Plan, ? extends Plan> 
nestedLoopJoin,
@@ -330,7 +367,11 @@ class CostModelV1 extends PlanVisitor<Cost, PlanContext> {
         Preconditions.checkState(context.arity() == 2);
         Statistics leftStatistics = context.getChildStatistics(0);
         Statistics rightStatistics = context.getChildStatistics(1);
-
+        if (isStatsUnknown(nestedLoopJoin, leftStatistics, rightStatistics)) {
+            return CostV1.of(rightStatistics.getRowCount() + 1 / 
leftStatistics.getRowCount(),
+                    rightStatistics.getRowCount(),
+                    0);
+        }
         return CostV1.of(
                 leftStatistics.getRowCount() * rightStatistics.getRowCount(),
                 rightStatistics.getRowCount(),
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/JoinEstimation.java 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/JoinEstimation.java
index ef4575e3308..0498d68d793 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/JoinEstimation.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/JoinEstimation.java
@@ -147,6 +147,21 @@ public class JoinEstimation {
     }
 
     private static Statistics estimateNestLoopJoin(Statistics leftStats, 
Statistics rightStats, Join join) {
+        if (hashJoinConditionContainsUnknownColumnStats(leftStats, rightStats, 
join)) {
+            double rowCount = (leftStats.getRowCount() + 
rightStats.getRowCount());
+            // We do more like the nested loop join with one rows than inner 
join
+            if (leftStats.getRowCount() == 1 || rightStats.getRowCount() == 1) 
{
+                rowCount *= 0.99;
+            } else {
+                rowCount *= 1.01;
+            }
+            rowCount = Math.max(1, rowCount);
+            return new StatisticsBuilder()
+                    .setRowCount(rowCount)
+                    .putColumnStatistics(leftStats.columnStatistics())
+                    .putColumnStatistics(rightStats.columnStatistics())
+                    .build();
+        }
         return new StatisticsBuilder()
                 .setRowCount(Math.max(1, leftStats.getRowCount() * 
rightStats.getRowCount()))
                 .putColumnStatistics(leftStats.columnStatistics())
@@ -156,7 +171,7 @@ public class JoinEstimation {
 
     private static Statistics estimateInnerJoin(Statistics leftStats, 
Statistics rightStats, Join join) {
         if (hashJoinConditionContainsUnknownColumnStats(leftStats, rightStats, 
join)) {
-            double rowCount = Math.max(leftStats.getRowCount(), 
rightStats.getRowCount());
+            double rowCount = leftStats.getRowCount() + 
rightStats.getRowCount();
             rowCount = Math.max(1, rowCount);
             return new StatisticsBuilder()
                     .setRowCount(rowCount)
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalHashJoin.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalHashJoin.java
index 84dc09b2a37..357667f748c 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalHashJoin.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalHashJoin.java
@@ -24,6 +24,7 @@ import org.apache.doris.nereids.properties.PhysicalProperties;
 import org.apache.doris.nereids.trees.expressions.ExprId;
 import org.apache.doris.nereids.trees.expressions.Expression;
 import org.apache.doris.nereids.trees.expressions.MarkJoinSlotReference;
+import org.apache.doris.nereids.trees.expressions.Slot;
 import org.apache.doris.nereids.trees.plans.AbstractPlan;
 import org.apache.doris.nereids.trees.plans.JoinHint;
 import org.apache.doris.nereids.trees.plans.JoinType;
@@ -34,6 +35,7 @@ import org.apache.doris.nereids.util.Utils;
 import org.apache.doris.statistics.Statistics;
 
 import com.google.common.base.Preconditions;
+import com.google.common.collect.ImmutableSet;
 import com.google.common.collect.Lists;
 
 import java.util.Comparator;
@@ -41,6 +43,7 @@ import java.util.List;
 import java.util.Optional;
 import java.util.Set;
 import java.util.stream.Collectors;
+import java.util.stream.Stream;
 
 /**
  * Physical hash join plan.
@@ -215,6 +218,11 @@ public class PhysicalHashJoin<
         }
     }
 
+    public Set<Slot> getConditionSlot() {
+        return Stream.concat(hashJoinConjuncts.stream(), 
otherJoinConjuncts.stream())
+                .flatMap(expr -> 
expr.getInputSlots().stream()).collect(ImmutableSet.toImmutableSet());
+    }
+
     @Override
     public String shapeInfo() {
         StringBuilder builder = new StringBuilder();
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalNestedLoopJoin.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalNestedLoopJoin.java
index 3b92830b6a0..76b4859fb13 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalNestedLoopJoin.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalNestedLoopJoin.java
@@ -22,6 +22,7 @@ import org.apache.doris.nereids.properties.LogicalProperties;
 import org.apache.doris.nereids.properties.PhysicalProperties;
 import org.apache.doris.nereids.trees.expressions.Expression;
 import org.apache.doris.nereids.trees.expressions.MarkJoinSlotReference;
+import org.apache.doris.nereids.trees.expressions.Slot;
 import org.apache.doris.nereids.trees.plans.JoinHint;
 import org.apache.doris.nereids.trees.plans.JoinType;
 import org.apache.doris.nereids.trees.plans.Plan;
@@ -31,11 +32,13 @@ import org.apache.doris.nereids.util.Utils;
 import org.apache.doris.statistics.Statistics;
 
 import com.google.common.base.Preconditions;
+import com.google.common.collect.ImmutableSet;
 import com.google.common.collect.Sets;
 
 import java.util.List;
 import java.util.Optional;
 import java.util.Set;
+import java.util.stream.Stream;
 
 /**
  * Use nested loop algorithm to do join.
@@ -169,6 +172,11 @@ public class PhysicalNestedLoopJoin<
         return bitMapRuntimeFilterConditions.isEmpty();
     }
 
+    public Set<Slot> getConditionSlot() {
+        return Stream.concat(hashJoinConjuncts.stream(), 
otherJoinConjuncts.stream())
+                .flatMap(expr -> 
expr.getInputSlots().stream()).collect(ImmutableSet.toImmutableSet());
+    }
+
     @Override
     public String shapeInfo() {
         StringBuilder builder = new StringBuilder("NestedLoopJoin");


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to