englefly commented on code in PR #29184: URL: https://github.com/apache/doris/pull/29184#discussion_r1438140973
########## fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java: ########## @@ -568,10 +581,33 @@ public Statistics visitLike(Like like, EstimationContext context) { "col stats not found. slot=%s in %s", like.left().toSql(), like.toSql()); ColumnStatisticBuilder colBuilder = new ColumnStatisticBuilder(origin); - colBuilder.setNdv(origin.ndv * DEFAULT_LIKE_COMPARISON_SELECTIVITY).setNumNulls(0); + double selectivity = origin.ndv * DEFAULT_LIKE_COMPARISON_SELECTIVITY; + double notNullSel = getNotNullSelectivity(origin, selectivity); + colBuilder.setNdv(selectivity).setCount(notNullSel * context.statistics.getRowCount()).setNumNulls(0); statsBuilder.putColumnStatistics(like.left(), colBuilder.build()); context.addKeyIfSlot(like.left()); } return statsBuilder.build(); } + + private double getNotNullSelectivity(ColumnStatistic stats, double origSel) { + double rowCount = stats.count; + double numNulls = stats.numNulls; + + // comment following check since current rowCount and ndv may be inconsistant + // e.g, rowCount has been reduced by one filter but another filter column's + // ndv and numNull remains originally, which will unexpectedly go into the following + // normalization. + + //if (numNulls > rowCount - ndv) { + // numNulls = rowCount - ndv > 0 ? rowCount - ndv : 0; + //} + double notNullSel = rowCount <= 1.0 ? 1.0 : 1 - getValidSelectivity(numNulls / rowCount); Review Comment: if rowCount=0, notNullSel is NaN. And this NaN pollute following derivation. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org