morrySnow commented on code in PR #13375: URL: https://github.com/apache/doris/pull/13375#discussion_r1001644423
########## fe/fe-core/src/main/java/org/apache/doris/nereids/cost/CostCalculator.java: ########## @@ -47,6 +48,11 @@ * Inspired by Presto. */ public class CostCalculator { + static final double cpuWeight = 1; Review Comment: ```suggestion static final double CPU_WEIGHT = 1; ``` ########## fe/fe-core/src/main/java/org/apache/doris/nereids/memo/GroupExpression.java: ########## @@ -39,6 +40,8 @@ * Representation for group expression in cascades optimizer. */ public class GroupExpression { + private double cost = 0.0; Review Comment: is this lowest cost for all PhysicalProperties? ########## fe/fe-core/src/main/java/org/apache/doris/nereids/properties/PhysicalProperties.java: ########## @@ -96,4 +96,10 @@ public int hashCode() { } return hashCode; } + + @Override + public String toString() { + return distributionSpec.toString() + " " + orderSpec.toString(); Review Comment: it is better has the same format with plan toString ########## fe/fe-core/src/main/java/org/apache/doris/nereids/memo/GroupExpression.java: ########## @@ -39,6 +40,8 @@ * Representation for group expression in cascades optimizer. */ public class GroupExpression { + private double cost = 0.0; + private CostEstimate costEstimate = null; Review Comment: what's this use for? ########## fe/fe-core/src/main/java/org/apache/doris/nereids/stats/JoinEstimation.java: ########## @@ -69,6 +69,29 @@ private static class JoinEstimationResult { public double rowCount = 0; } + private static double estimateInnerJoin2(Join join, EqualTo equalto, Review Comment: ```suggestion private static double estimateInnerJoinV2(Join join, EqualTo equalto, ``` ########## fe/fe-core/src/main/java/org/apache/doris/nereids/cost/CostCalculator.java: ########## @@ -47,6 +48,11 @@ * Inspired by Presto. */ public class CostCalculator { + static final double cpuWeight = 1; + static final double memorWeight = 1; + static final double networkWeight = 1.5; + static final double penaltyWeight = 0.5; + static final double heavyOperatorPunishFactor = 6.0; Review Comment: plz add some comment to explain these two factors ########## fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculatorV2.java: ########## @@ -99,7 +99,10 @@ public static void estimate(GroupExpression groupExpression) { private void estimate() { StatsDeriveResult stats = groupExpression.getPlan().accept(this, null); - groupExpression.getOwnerGroup().setStatistics(stats); + StatsDeriveResult originStats = groupExpression.getOwnerGroup().getStatistics(); + if (originStats == null || originStats.getRowCount() > stats.getRowCount()) { Review Comment: ditto ########## fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java: ########## @@ -104,7 +104,10 @@ public static void estimate(GroupExpression groupExpression) { private void estimate() { StatsDeriveResult stats = groupExpression.getPlan().accept(this, null); - groupExpression.getOwnerGroup().setStatistics(stats); + if (groupExpression.getOwnerGroup().getStatistics() == null + || (stats.getRowCount() < groupExpression.getOwnerGroup().getStatistics().getRowCount())) { Review Comment: add comment to explain `stats.getRowCount() < groupExpression.getOwnerGroup().getStatistics().getRowCount())` ########## fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/literal/Literal.java: ########## @@ -82,6 +82,10 @@ public static Literal of(Object value) { public abstract Object getValue(); + public double getDouble() { Review Comment: add java doc to this function to explain what it is use for ########## fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java: ########## @@ -115,19 +105,166 @@ public StatsDeriveResult visitCompoundPredicate(CompoundPredicate predicate, Est @Override public StatsDeriveResult visitComparisonPredicate(ComparisonPredicate cp, EstimationContext context) { + boolean isNot = (context != null) && context.isNot; Expression left = cp.left(); Expression right = cp.right(); - ColumnStat statsForLeft = ExpressionEstimation.estimate(left, stats); - ColumnStat statsForRight = ExpressionEstimation.estimate(right, stats); + ColumnStat statsForLeft = ExpressionEstimation.estimate(left, inputStats); + ColumnStat statsForRight = ExpressionEstimation.estimate(right, inputStats); double selectivity; if (!(left instanceof Literal) && !(right instanceof Literal)) { selectivity = calculateWhenBothChildIsColumn(cp, statsForLeft, statsForRight); } else { // For literal, it's max min is same value. - selectivity = calculateWhenRightChildIsLiteral(cp, statsForLeft, statsForRight.getMaxValue()); + selectivity = updateLeftStatsWhenRightChildIsLiteral(cp, + statsForLeft, + statsForRight.getMaxValue(), + isNot); + } + StatsDeriveResult outputStats = new StatsDeriveResult(inputStats); + //TODO: we take the assumption that func(A) and A have the same stats. + outputStats.updateBySelectivity(selectivity, cp.getInputSlots()); + if (left.getInputSlots().size() == 1) { + Slot leftSlot = left.getInputSlots().iterator().next(); + outputStats.updateColumnStatsForSlot(leftSlot, statsForLeft); + } + return outputStats; + } + + private double updateLessThan(ColumnStat statsForLeft, double val, Review Comment: do we have enough ut to cover these code? ########## fe/fe-core/src/main/java/org/apache/doris/nereids/stats/JoinEstimation.java: ########## @@ -124,11 +147,56 @@ private static JoinEstimationResult estimateInnerJoin(PhysicalHashJoin join, Equ return result; } + /** + * estimate join + */ + public static StatsDeriveResult estimate(StatsDeriveResult leftStats, StatsDeriveResult rightStats, Join join) { + JoinType joinType = join.getJoinType(); + double rowCount = Double.MAX_VALUE; + if (joinType == JoinType.LEFT_SEMI_JOIN || joinType == JoinType.LEFT_ANTI_JOIN) { + rowCount = leftStats.getRowCount(); + } else if (joinType == JoinType.RIGHT_SEMI_JOIN || joinType == JoinType.RIGHT_ANTI_JOIN) { + rowCount = rightStats.getRowCount(); Review Comment: why semi and anti join filter non rows? ########## fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalOlapScan.java: ########## @@ -105,7 +105,8 @@ public PreAggStatus getPreAggStatus() { public String toString() { return Utils.toSqlString("PhysicalOlapScan", "qualified", Utils.qualifiedName(qualifier, olapTable.getName()), - "output", getOutput() + "output", getOutput(), + "stats=", statsDeriveResult Review Comment: ```suggestion "stats", statsDeriveResult ``` ########## fe/fe-core/src/main/java/org/apache/doris/nereids/stats/ExpressionEstimation.java: ########## @@ -56,12 +62,20 @@ public ColumnStat visit(Expression expr, StatsDeriveResult context) { return expr.accept(this, context); } + public ColumnStat visitCaseWhen(CaseWhen caseWhen, StatsDeriveResult context) { + throw new RuntimeException("ExpressionEstimation case-when not implemented"); + } + + public ColumnStat visitCast(Cast cast, StatsDeriveResult context) { + return cast.child().accept(this, context); + } + @Override public ColumnStat visitLiteral(Literal literal, StatsDeriveResult context) { - if (literal.isStringLiteral()) { + if (ColumnStat.MAX_MIN_UNSUPPORTED_TYPE.contains(literal.getDataType().toCatalogDataType())) { return ColumnStat.UNKNOWN; Review Comment: should we return a default value instead of unknown? ########## fe/fe-core/src/main/java/org/apache/doris/nereids/stats/JoinEstimation.java: ########## @@ -124,11 +147,56 @@ private static JoinEstimationResult estimateInnerJoin(PhysicalHashJoin join, Equ return result; } + /** + * estimate join + */ + public static StatsDeriveResult estimate(StatsDeriveResult leftStats, StatsDeriveResult rightStats, Join join) { + JoinType joinType = join.getJoinType(); + double rowCount = Double.MAX_VALUE; + if (joinType == JoinType.LEFT_SEMI_JOIN || joinType == JoinType.LEFT_ANTI_JOIN) { + rowCount = leftStats.getRowCount(); + } else if (joinType == JoinType.RIGHT_SEMI_JOIN || joinType == JoinType.RIGHT_ANTI_JOIN) { + rowCount = rightStats.getRowCount(); + } else if (joinType == JoinType.INNER_JOIN) { + if (join.getHashJoinConjuncts().isEmpty()) { + //TODO: consider other join conjuncts + rowCount = leftStats.getRowCount() * rightStats.getRowCount(); + } else { + for (Expression joinConjunct : join.getHashJoinConjuncts()) { + double tmpRowCount = estimateInnerJoin2(join, + (EqualTo) joinConjunct, leftStats, rightStats); + rowCount = Math.min(rowCount, tmpRowCount); + } + } + } else if (joinType == JoinType.LEFT_OUTER_JOIN) { + rowCount = leftStats.getRowCount(); + } else if (joinType == JoinType.RIGHT_OUTER_JOIN) { + rowCount = rightStats.getRowCount(); + } else if (joinType == JoinType.CROSS_JOIN) { + rowCount = CheckedMath.checkedMultiply(leftStats.getRowCount(), + rightStats.getRowCount()); + } else { + throw new RuntimeException("joinType is not supported"); + } + + StatsDeriveResult statsDeriveResult = new StatsDeriveResult(rowCount, Maps.newHashMap()); + if (joinType.isRemainLeftJoin()) { + statsDeriveResult.merge(leftStats); + } + if (joinType.isRemainRightJoin()) { + statsDeriveResult.merge(rightStats); + } + statsDeriveResult.setRowCount(rowCount); + statsDeriveResult.setWidth(rightStats.getWidth() + leftStats.getWidth()); + statsDeriveResult.setPenalty(0.0); + return statsDeriveResult; + } + /** * Do estimate. * // TODO: since we have no column stats here. just use a fix ratio to compute the row count. */ - public static StatsDeriveResult estimate(StatsDeriveResult leftStats, StatsDeriveResult rightStats, Join join) { + public static StatsDeriveResult estimate2(StatsDeriveResult leftStats, StatsDeriveResult rightStats, Join join) { Review Comment: ```suggestion public static StatsDeriveResult estimateV2(StatsDeriveResult leftStats, StatsDeriveResult rightStats, Join join) { ``` ########## fe/fe-core/src/main/java/org/apache/doris/nereids/stats/ExpressionEstimation.java: ########## @@ -56,12 +62,20 @@ public ColumnStat visit(Expression expr, StatsDeriveResult context) { return expr.accept(this, context); } + public ColumnStat visitCaseWhen(CaseWhen caseWhen, StatsDeriveResult context) { + throw new RuntimeException("ExpressionEstimation case-when not implemented"); Review Comment: remove exception, use a default selectivity and add a todo -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org