englefly commented on code in PR #18129: URL: https://github.com/apache/doris/pull/18129#discussion_r1151379059
########## fe/fe-core/src/main/java/org/apache/doris/nereids/stats/JoinEstimation.java: ########## @@ -52,37 +55,105 @@ private static Statistics estimateInnerJoin(Statistics crossJoinStats, List<Expr for (int i = 0; i < sortedJoinConditions.size(); i++) { sel *= Math.pow(sortedJoinConditions.get(i).second, 1 / Math.pow(2, i)); } - return crossJoinStats.updateRowCountOnly(crossJoinStats.getRowCount() * sel); + Statistics innerJoinStats = crossJoinStats.updateRowCountOnly(crossJoinStats.getRowCount() * sel); + + if (!join.getOtherJoinConjuncts().isEmpty()) { Review Comment: yes. OtherJoinCondition should be counted in separately. JoinCondition and otherJoinCondition should not disturb each other ########## fe/fe-core/src/main/java/org/apache/doris/nereids/stats/JoinEstimation.java: ########## @@ -52,37 +55,105 @@ private static Statistics estimateInnerJoin(Statistics crossJoinStats, List<Expr for (int i = 0; i < sortedJoinConditions.size(); i++) { sel *= Math.pow(sortedJoinConditions.get(i).second, 1 / Math.pow(2, i)); } - return crossJoinStats.updateRowCountOnly(crossJoinStats.getRowCount() * sel); + Statistics innerJoinStats = crossJoinStats.updateRowCountOnly(crossJoinStats.getRowCount() * sel); + + if (!join.getOtherJoinConjuncts().isEmpty()) { + FilterEstimation filterEstimation = new FilterEstimation(); + innerJoinStats = filterEstimation.estimate( + ExpressionUtils.and(join.getOtherJoinConjuncts()), innerJoinStats); + } + return innerJoinStats; } private static double estimateJoinConditionSel(Statistics crossJoinStats, Expression joinCond) { Statistics statistics = new FilterEstimation().estimate(joinCond, crossJoinStats); return statistics.getRowCount() / crossJoinStats.getRowCount(); } + private static double adjustSemiOrAntiByOtherJoinConditions(Join join) { + final double non_equal_ratio = 0.5; + int otherConditionCount = join.getOtherJoinConjuncts().size(); + double sel = 1.0; + for (int i = 0; i < otherConditionCount; i++) { + sel *= Math.pow(non_equal_ratio, 1 / Math.pow(2, i)); + } + return sel; + } + + private static double estimateSemiOrAntiRowCountByEqual(Statistics leftStats, + Statistics rightStats, Join join, EqualTo equalTo) { + Expression eqLeft = equalTo.left(); + Expression eqRight = equalTo.right(); + ColumnStatistic probColStats = leftStats.findColumnStatistics(eqLeft); + ColumnStatistic buildColStats; + if (probColStats == null) { + probColStats = leftStats.findColumnStatistics(eqRight); + buildColStats = rightStats.findColumnStatistics(eqLeft); + } else { + buildColStats = rightStats.findColumnStatistics(eqRight); + } + if (probColStats == null || buildColStats == null) { + return Double.POSITIVE_INFINITY; + } + + double rowCount; + if (join.getJoinType().isLeftSemiOrAntiJoin()) { + rowCount = leftStats.getRowCount() * buildColStats.ndv / buildColStats.originalNdv; + } else { + //right semi or anti + rowCount = rightStats.getRowCount() * probColStats.ndv / probColStats.originalNdv; + } + return rowCount; + } + + private static Statistics estimateSemiOrAnti(Statistics leftStats, Statistics rightStats, Join join) { + // primaryConjunct is the most effective conjunct. + double rowCount = Double.POSITIVE_INFINITY; + for (Expression conjunct : join.getHashJoinConjuncts()) { + double eqRowCount = estimateSemiOrAntiRowCountByEqual(leftStats, rightStats, join, (EqualTo) conjunct); + if (rowCount > eqRowCount) { + rowCount = eqRowCount; + } + } + if (rowCount == Double.POSITIVE_INFINITY) { + //fall back to original alg. + return null; + } + rowCount = rowCount * adjustSemiOrAntiByOtherJoinConditions(join); + + StatisticsBuilder builder; + if (join.getJoinType().isLeftSemiOrAntiJoin()) { + leftStats.fix(rowCount, leftStats.getRowCount()); + builder = new StatisticsBuilder(leftStats); + builder.setRowCount(rowCount); + } else { + //right semi or anti + rightStats.fix(rowCount, rightStats.getRowCount()); + builder = new StatisticsBuilder(rightStats); + builder.setRowCount(rowCount); + } + return builder.build(); + + } + /** * estimate join */ public static Statistics estimate(Statistics leftStats, Statistics rightStats, Join join) { JoinType joinType = join.getJoinType(); Review Comment: done -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org