englefly commented on code in PR #13375: URL: https://github.com/apache/doris/pull/13375#discussion_r1003977378
########## fe/fe-core/src/main/java/org/apache/doris/nereids/stats/JoinEstimation.java: ########## @@ -124,11 +147,56 @@ private static JoinEstimationResult estimateInnerJoin(PhysicalHashJoin join, Equ return result; } + /** + * estimate join + */ + public static StatsDeriveResult estimate(StatsDeriveResult leftStats, StatsDeriveResult rightStats, Join join) { + JoinType joinType = join.getJoinType(); + double rowCount = Double.MAX_VALUE; + if (joinType == JoinType.LEFT_SEMI_JOIN || joinType == JoinType.LEFT_ANTI_JOIN) { + rowCount = leftStats.getRowCount(); + } else if (joinType == JoinType.RIGHT_SEMI_JOIN || joinType == JoinType.RIGHT_ANTI_JOIN) { + rowCount = rightStats.getRowCount(); + } else if (joinType == JoinType.INNER_JOIN) { + if (join.getHashJoinConjuncts().isEmpty()) { + //TODO: consider other join conjuncts + rowCount = leftStats.getRowCount() * rightStats.getRowCount(); + } else { + for (Expression joinConjunct : join.getHashJoinConjuncts()) { + double tmpRowCount = estimateInnerJoin2(join, + (EqualTo) joinConjunct, leftStats, rightStats); + rowCount = Math.min(rowCount, tmpRowCount); + } + } + } else if (joinType == JoinType.LEFT_OUTER_JOIN) { + rowCount = leftStats.getRowCount(); + } else if (joinType == JoinType.RIGHT_OUTER_JOIN) { + rowCount = rightStats.getRowCount(); + } else if (joinType == JoinType.CROSS_JOIN) { + rowCount = CheckedMath.checkedMultiply(leftStats.getRowCount(), + rightStats.getRowCount()); + } else { + throw new RuntimeException("joinType is not supported"); + } + + StatsDeriveResult statsDeriveResult = new StatsDeriveResult(rowCount, Maps.newHashMap()); + if (joinType.isRemainLeftJoin()) { + statsDeriveResult.merge(leftStats); + } + if (joinType.isRemainRightJoin()) { + statsDeriveResult.merge(rightStats); + } + statsDeriveResult.setRowCount(rowCount); + statsDeriveResult.setWidth(rightStats.getWidth() + leftStats.getWidth()); + statsDeriveResult.setPenalty(0.0); + return statsDeriveResult; + } + /** * Do estimate. * // TODO: since we have no column stats here. just use a fix ratio to compute the row count. */ - public static StatsDeriveResult estimate(StatsDeriveResult leftStats, StatsDeriveResult rightStats, Join join) { + public static StatsDeriveResult estimate2(StatsDeriveResult leftStats, StatsDeriveResult rightStats, Join join) { Review Comment: done -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org