englefly commented on code in PR #12987: URL: https://github.com/apache/doris/pull/12987#discussion_r990604144
########## fe/fe-core/src/main/java/org/apache/doris/statistics/StatsDeriveResult.java: ########## @@ -146,4 +147,22 @@ public StatsDeriveResult merge(StatsDeriveResult other) { public StatsDeriveResult copy() { return new StatsDeriveResult(this); } + + public StatsDeriveResult updateRowCountOnCopy(double selectivity) { + StatsDeriveResult copy = new StatsDeriveResult(this); + copy.setRowCount(rowCount * selectivity); + for (Entry<Slot, ColumnStat> entry : copy.slotToColumnStats.entrySet()) { + entry.getValue().updateBySelectivity(selectivity, rowCount); Review Comment: this logic is not solid. select A , B from T where B=1. if the selectivity is 0.1, there is no reason to say that A.ndv reduced by 10 times. ########## fe/fe-core/src/main/java/org/apache/doris/statistics/TableStats.java: ########## @@ -214,12 +187,126 @@ private PartitionStats getNotNullPartitionStats(String partitionName) { * @param columnName column name * @return @ColumnStats */ - private ColumnStats getNotNullColumnStats(String columnName) { - ColumnStats columnStats = nameToColumnStats.get(columnName); - if (columnStats == null) { - columnStats = new ColumnStats(); - nameToColumnStats.put(columnName, columnStats); + private ColumnStat getNotNullColumnStats(String columnName) { + ColumnStat columnStat = nameToColumnStats.get(columnName); + if (columnStat == null) { + columnStat = new ColumnStat(); + nameToColumnStats.put(columnName, columnStat); } - return columnStats; + return columnStat; + } + + public ColumnStat getColumnStats(String columnName) { + ColumnStat columnStat = nameToColumnStats.get(columnName); + if (columnStat == null) { + columnStat = new ColumnStat(); + nameToColumnStats.put(columnName, columnStat); + } + return columnStat; + } + + public ColumnStat getColumnStatCopy(String columnName) { + ColumnStat columnStat = getColumnStats(columnName); + return columnStat.copy(); + } + + public List<String> getShowInfo() { + List<String> result = Lists.newArrayList(); + result.add(Double.toString(getRowCount())); + result.add(Long.toString(getDataSize())); + return result; + } + + public List<String> getShowInfo(String partitionName) { + PartitionStats partitionStats = nameToPartitionStats.get(partitionName); + return partitionStats.getShowInfo(); + } + + private Map<String, ColumnStat> getAggPartitionColStats() { + Map<String, ColumnStat> aggColumnStats = new HashMap<>(); + for (PartitionStats partitionStats : nameToPartitionStats.values()) { + partitionStats.getNameToColumnStats().forEach((colName, columnStats) -> { + if (!aggColumnStats.containsKey(colName)) { + aggColumnStats.put(colName, columnStats); + } else { + ColumnStat tblColStats = aggColumnStats.get(colName); + mergePartitionColumnStats(tblColStats, columnStats); + } + }); + } + + return aggColumnStats; + } + + private void mergePartitionColumnStats(ColumnStat leftStats, ColumnStat rightStats) { + if (leftStats.getNdv() == -1) { + if (rightStats.getNdv() != -1) { + leftStats.setNdv(rightStats.getNdv()); + } + } else { + if (rightStats.getNdv() != -1) { + double ndv = leftStats.getNdv() + rightStats.getNdv(); + leftStats.setNdv(ndv); + } + } + + if (leftStats.getAvgSizeByte() == -1) { + if (rightStats.getAvgSizeByte() != -1) { + leftStats.setAvgSizeByte(rightStats.getAvgSizeByte()); + } + } else { + if (rightStats.getAvgSizeByte() != -1) { + double avgSize = (leftStats.getAvgSizeByte() + rightStats.getAvgSizeByte()) / 2; + leftStats.setAvgSizeByte(avgSize); + } + } + + if (leftStats.getMaxSizeByte() == -1) { + if (rightStats.getMaxSizeByte() != -1) { + leftStats.setMaxSizeByte(rightStats.getMaxSizeByte()); + } + } else { + if (rightStats.getMaxSizeByte() != -1) { + double maxSize = Math.max(leftStats.getMaxSizeByte(), rightStats.getMaxSizeByte()); + leftStats.setMaxSizeByte(maxSize); + } + } + + if (leftStats.getNumNulls() == -1) { + if (rightStats.getNumNulls() != -1) { + leftStats.setNumNulls(rightStats.getNumNulls()); + } + } else { + if (rightStats.getNumNulls() != -1) { + double numNulls = leftStats.getNumNulls() + rightStats.getNumNulls(); + leftStats.setNumNulls(numNulls); + } + } + + if (Double.isNaN(leftStats.getMinValue())) { + if (!Double.isNaN(rightStats.getMinValue())) { + leftStats.setMinValue(rightStats.getMinValue()); + } + } else if (!Double.isNaN(rightStats.getMinValue())) { + double minValue = Math.max(leftStats.getMinValue(), rightStats.getMinValue()); + leftStats.setMinValue(minValue); + } + + + if (Double.isNaN(leftStats.getMaxValue())) { + if (!Double.isNaN(rightStats.getMaxValue())) { + leftStats.setMaxValue(rightStats.getMaxValue()); + } + } else if (!Double.isNaN(rightStats.getMaxValue())) { + double maxValue = Math.min(leftStats.getMaxValue(), rightStats.getMaxValue()); Review Comment: could you explain why the `maxValue` is `Math.min(...)` ########## fe/fe-core/src/main/java/org/apache/doris/common/CheckedMath.java: ########## @@ -38,6 +40,16 @@ public static long checkedMultiply(long a, long b) { } } + public static double checkedMultiply(double a, double b) { + BigDecimal d1 = new BigDecimal(a); + BigDecimal d2 = new BigDecimal(b); + BigDecimal result = d1.multiply(d2); + if (result.compareTo(new BigDecimal(Double.MAX_VALUE)) > 0) { Review Comment: Add a final attribute MAX_BIG_DECIMAL. Do not `new` every time. ########## fe/fe-core/src/main/java/org/apache/doris/common/CheckedMath.java: ########## @@ -38,6 +40,16 @@ public static long checkedMultiply(long a, long b) { } } + public static double checkedMultiply(double a, double b) { + BigDecimal d1 = new BigDecimal(a); + BigDecimal d2 = new BigDecimal(b); + BigDecimal result = d1.multiply(d2); + if (result.compareTo(new BigDecimal(Double.MAX_VALUE)) > 0) { Review Comment: why do we need to cast double to BigDecimal? ########## fe/fe-core/src/main/java/org/apache/doris/statistics/TableStats.java: ########## @@ -214,12 +187,126 @@ private PartitionStats getNotNullPartitionStats(String partitionName) { * @param columnName column name * @return @ColumnStats */ - private ColumnStats getNotNullColumnStats(String columnName) { - ColumnStats columnStats = nameToColumnStats.get(columnName); - if (columnStats == null) { - columnStats = new ColumnStats(); - nameToColumnStats.put(columnName, columnStats); + private ColumnStat getNotNullColumnStats(String columnName) { + ColumnStat columnStat = nameToColumnStats.get(columnName); + if (columnStat == null) { + columnStat = new ColumnStat(); + nameToColumnStats.put(columnName, columnStat); } - return columnStats; + return columnStat; + } + + public ColumnStat getColumnStats(String columnName) { Review Comment: `stat` or `stats`? do not use both of them -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org