englefly commented on code in PR #18129:
URL: https://github.com/apache/doris/pull/18129#discussion_r1151379059


##########
fe/fe-core/src/main/java/org/apache/doris/nereids/stats/JoinEstimation.java:
##########
@@ -52,37 +55,105 @@ private static Statistics estimateInnerJoin(Statistics 
crossJoinStats, List<Expr
         for (int i = 0; i < sortedJoinConditions.size(); i++) {
             sel *= Math.pow(sortedJoinConditions.get(i).second, 1 / 
Math.pow(2, i));
         }
-        return crossJoinStats.updateRowCountOnly(crossJoinStats.getRowCount() 
* sel);
+        Statistics innerJoinStats = 
crossJoinStats.updateRowCountOnly(crossJoinStats.getRowCount() * sel);
+
+        if (!join.getOtherJoinConjuncts().isEmpty()) {

Review Comment:
   yes. OtherJoinCondition should be counted in separately.
   JoinCondition and otherJoinCondition should not disturb each other
   



##########
fe/fe-core/src/main/java/org/apache/doris/nereids/stats/JoinEstimation.java:
##########
@@ -52,37 +55,105 @@ private static Statistics estimateInnerJoin(Statistics 
crossJoinStats, List<Expr
         for (int i = 0; i < sortedJoinConditions.size(); i++) {
             sel *= Math.pow(sortedJoinConditions.get(i).second, 1 / 
Math.pow(2, i));
         }
-        return crossJoinStats.updateRowCountOnly(crossJoinStats.getRowCount() 
* sel);
+        Statistics innerJoinStats = 
crossJoinStats.updateRowCountOnly(crossJoinStats.getRowCount() * sel);
+
+        if (!join.getOtherJoinConjuncts().isEmpty()) {
+            FilterEstimation filterEstimation = new FilterEstimation();
+            innerJoinStats = filterEstimation.estimate(
+                    ExpressionUtils.and(join.getOtherJoinConjuncts()), 
innerJoinStats);
+        }
+        return innerJoinStats;
     }
 
     private static double estimateJoinConditionSel(Statistics crossJoinStats, 
Expression joinCond) {
         Statistics statistics = new FilterEstimation().estimate(joinCond, 
crossJoinStats);
         return statistics.getRowCount() / crossJoinStats.getRowCount();
     }
 
+    private static double adjustSemiOrAntiByOtherJoinConditions(Join join) {
+        final double non_equal_ratio = 0.5;
+        int otherConditionCount = join.getOtherJoinConjuncts().size();
+        double sel = 1.0;
+        for (int i = 0; i < otherConditionCount; i++) {
+            sel *= Math.pow(non_equal_ratio, 1 / Math.pow(2, i));
+        }
+        return sel;
+    }
+
+    private static double estimateSemiOrAntiRowCountByEqual(Statistics 
leftStats,
+            Statistics rightStats, Join join, EqualTo equalTo) {
+        Expression eqLeft = equalTo.left();
+        Expression eqRight = equalTo.right();
+        ColumnStatistic probColStats = leftStats.findColumnStatistics(eqLeft);
+        ColumnStatistic buildColStats;
+        if (probColStats == null) {
+            probColStats = leftStats.findColumnStatistics(eqRight);
+            buildColStats = rightStats.findColumnStatistics(eqLeft);
+        } else {
+            buildColStats = rightStats.findColumnStatistics(eqRight);
+        }
+        if (probColStats == null || buildColStats == null) {
+            return Double.POSITIVE_INFINITY;
+        }
+
+        double rowCount;
+        if (join.getJoinType().isLeftSemiOrAntiJoin()) {
+            rowCount = leftStats.getRowCount() * buildColStats.ndv / 
buildColStats.originalNdv;
+        } else {
+            //right semi or anti
+            rowCount = rightStats.getRowCount() * probColStats.ndv / 
probColStats.originalNdv;
+        }
+        return rowCount;
+    }
+
+    private static Statistics estimateSemiOrAnti(Statistics leftStats, 
Statistics rightStats, Join join) {
+        // primaryConjunct is the most effective conjunct.
+        double rowCount = Double.POSITIVE_INFINITY;
+        for (Expression conjunct : join.getHashJoinConjuncts()) {
+            double eqRowCount = estimateSemiOrAntiRowCountByEqual(leftStats, 
rightStats, join, (EqualTo) conjunct);
+            if (rowCount > eqRowCount) {
+                rowCount = eqRowCount;
+            }
+        }
+        if (rowCount == Double.POSITIVE_INFINITY) {
+            //fall back to original alg.
+            return null;
+        }
+        rowCount = rowCount * adjustSemiOrAntiByOtherJoinConditions(join);
+
+        StatisticsBuilder builder;
+        if (join.getJoinType().isLeftSemiOrAntiJoin()) {
+            leftStats.fix(rowCount, leftStats.getRowCount());
+            builder = new StatisticsBuilder(leftStats);
+            builder.setRowCount(rowCount);
+        } else {
+            //right semi or anti
+            rightStats.fix(rowCount, rightStats.getRowCount());
+            builder = new StatisticsBuilder(rightStats);
+            builder.setRowCount(rowCount);
+        }
+        return builder.build();
+
+    }
+
     /**
      * estimate join
      */
     public static Statistics estimate(Statistics leftStats, Statistics 
rightStats, Join join) {
         JoinType joinType = join.getJoinType();

Review Comment:
   done



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to