englefly commented on code in PR #18129:
URL: https://github.com/apache/doris/pull/18129#discussion_r1150433024


##########
fe/fe-core/src/main/java/org/apache/doris/nereids/stats/JoinEstimation.java:
##########
@@ -52,37 +55,105 @@ private static Statistics estimateInnerJoin(Statistics 
crossJoinStats, List<Expr
         for (int i = 0; i < sortedJoinConditions.size(); i++) {
             sel *= Math.pow(sortedJoinConditions.get(i).second, 1 / 
Math.pow(2, i));
         }
-        return crossJoinStats.updateRowCountOnly(crossJoinStats.getRowCount() 
* sel);
+        Statistics innerJoinStats = 
crossJoinStats.updateRowCountOnly(crossJoinStats.getRowCount() * sel);
+
+        if (!join.getOtherJoinConjuncts().isEmpty()) {
+            FilterEstimation filterEstimation = new FilterEstimation();
+            innerJoinStats = filterEstimation.estimate(
+                    ExpressionUtils.and(join.getOtherJoinConjuncts()), 
innerJoinStats);
+        }
+        return innerJoinStats;
     }
 
     private static double estimateJoinConditionSel(Statistics crossJoinStats, 
Expression joinCond) {
         Statistics statistics = new FilterEstimation().estimate(joinCond, 
crossJoinStats);
         return statistics.getRowCount() / crossJoinStats.getRowCount();
     }
 
+    private static double adjustSemiOrAntiByOtherJoinConditions(Join join) {
+        final double non_equal_ratio = 0.5;
+        int otherConditionCount = join.getOtherJoinConjuncts().size();
+        double sel = 1.0;
+        for (int i = 0; i < otherConditionCount; i++) {
+            sel *= Math.pow(non_equal_ratio, 1 / Math.pow(2, i));
+        }
+        return sel;
+    }
+
+    private static double estimateSemiOrAntiRowCountByEqual(Statistics 
leftStats,
+            Statistics rightStats, Join join, EqualTo equalTo) {
+        Expression eqLeft = equalTo.left();
+        Expression eqRight = equalTo.right();
+        ColumnStatistic probColStats = leftStats.findColumnStatistics(eqLeft);
+        ColumnStatistic buildColStats;
+        if (probColStats == null) {
+            probColStats = leftStats.findColumnStatistics(eqRight);
+            buildColStats = rightStats.findColumnStatistics(eqLeft);
+        } else {
+            buildColStats = rightStats.findColumnStatistics(eqRight);
+        }
+        if (probColStats == null || buildColStats == null) {
+            return Double.POSITIVE_INFINITY;
+        }
+
+        double rowCount;
+        if (join.getJoinType().isLeftSemiOrAntiJoin()) {
+            rowCount = leftStats.getRowCount() * buildColStats.ndv / 
buildColStats.originalNdv;
+        } else {
+            //right semi or anti
+            rowCount = rightStats.getRowCount() * probColStats.ndv / 
probColStats.originalNdv;
+        }
+        return rowCount;
+    }
+
+    private static Statistics estimateSemiOrAnti(Statistics leftStats, 
Statistics rightStats, Join join) {
+        // primaryConjunct is the most effective conjunct.

Review Comment:
   done



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to