This is an automated email from the ASF dual-hosted git repository. englefly pushed a commit to branch branch-3.0 in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-3.0 by this push: new 90ffe9068be [feat](nereids) adjust min/max for partition key #41729 branch-3.0 (#45110) 90ffe9068be is described below commit 90ffe9068be9deb74082514ecb6753bea0707ab8 Author: minghong <zhoumingh...@selectdb.com> AuthorDate: Mon Dec 9 10:16:37 2024 +0800 [feat](nereids) adjust min/max for partition key #41729 branch-3.0 (#45110) ### What problem does this PR solve? pick 41729 --- .../doris/nereids/stats/StatsCalculator.java | 131 +++++++++++++++++++++ 1 file changed, 131 insertions(+) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java index 2c4fd340bf5..0249a94d67b 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java @@ -18,11 +18,18 @@ package org.apache.doris.nereids.stats; import org.apache.doris.analysis.IntLiteral; +import org.apache.doris.analysis.LiteralExpr; import org.apache.doris.catalog.Column; import org.apache.doris.catalog.Env; +import org.apache.doris.catalog.ListPartitionItem; import org.apache.doris.catalog.MTMV; import org.apache.doris.catalog.OlapTable; +import org.apache.doris.catalog.PartitionItem; +import org.apache.doris.catalog.PartitionKey; +import org.apache.doris.catalog.PartitionType; +import org.apache.doris.catalog.RangePartitionItem; import org.apache.doris.catalog.TableIf; +import org.apache.doris.common.AnalysisException; import org.apache.doris.common.FeConstants; import org.apache.doris.common.Pair; import org.apache.doris.nereids.CascadesContext; @@ -143,11 +150,13 @@ import org.apache.doris.statistics.StatisticRange; import org.apache.doris.statistics.Statistics; import org.apache.doris.statistics.StatisticsBuilder; import org.apache.doris.statistics.TableStatsMeta; +import org.apache.doris.statistics.util.StatisticsUtil; import com.google.common.base.Preconditions; import com.google.common.collect.ImmutableSet; import com.google.common.collect.Lists; import com.google.common.collect.Maps; +import com.google.common.collect.Range; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; @@ -544,6 +553,9 @@ public class StatsCalculator extends DefaultPlanVisitor<Statistics, Void> { }); for (SlotReference slot : visibleOutputSlots) { ColumnStatistic cache = getColumnStatsFromPartitionCache(olapScan, slot, selectedPartitionNames); + if (slot.getColumn().isPresent()) { + cache = updateMinMaxForPartitionKey(olapTable, selectedPartitionNames, slot, cache); + } ColumnStatisticBuilder colStatsBuilder = new ColumnStatisticBuilder(cache, selectedPartitionsRowCount); colStatsBuilder.normalizeAvgSizeByte(slot); @@ -576,6 +588,125 @@ public class StatsCalculator extends DefaultPlanVisitor<Statistics, Void> { return builder.build(); } + private ColumnStatistic updateMinMaxForPartitionKey(OlapTable olapTable, + List<String> selectedPartitionNames, + SlotReference slot, ColumnStatistic cache) { + if (olapTable.getPartitionType() == PartitionType.LIST) { + cache = updateMinMaxForListPartitionKey(olapTable, selectedPartitionNames, slot, cache); + } else if (olapTable.getPartitionType() == PartitionType.RANGE) { + cache = updateMinMaxForTheFirstRangePartitionKey(olapTable, selectedPartitionNames, slot, cache); + } + return cache; + } + + private double convertLegacyLiteralToDouble(LiteralExpr literal) throws AnalysisException { + return StatisticsUtil.convertToDouble(literal.getType(), literal.getStringValue()); + } + + private ColumnStatistic updateMinMaxForListPartitionKey(OlapTable olapTable, + List<String> selectedPartitionNames, + SlotReference slot, ColumnStatistic cache) { + int partitionColumnIdx = olapTable.getPartitionColumns().indexOf(slot.getColumn().get()); + if (partitionColumnIdx != -1) { + try { + LiteralExpr minExpr = null; + LiteralExpr maxExpr = null; + double minValue = 0; + double maxValue = 0; + for (String selectedPartitionName : selectedPartitionNames) { + PartitionItem item = olapTable.getPartitionItemOrAnalysisException( + selectedPartitionName); + if (item instanceof ListPartitionItem) { + ListPartitionItem lp = (ListPartitionItem) item; + for (PartitionKey key : lp.getItems()) { + if (minExpr == null) { + minExpr = key.getKeys().get(partitionColumnIdx); + minValue = convertLegacyLiteralToDouble(minExpr); + maxExpr = key.getKeys().get(partitionColumnIdx); + maxValue = convertLegacyLiteralToDouble(maxExpr); + } else { + double current = convertLegacyLiteralToDouble(key.getKeys().get(partitionColumnIdx)); + if (current > maxValue) { + maxValue = current; + maxExpr = key.getKeys().get(partitionColumnIdx); + } else if (current < minValue) { + minValue = current; + minExpr = key.getKeys().get(partitionColumnIdx); + } + } + } + } + } + if (minExpr != null) { + cache = new ColumnStatisticBuilder(cache) + .setMinExpr(minExpr) + .setMinValue(minValue) + .setMaxExpr(maxExpr) + .setMaxValue(maxValue) + .build(); + } + } catch (AnalysisException e) { + LOG.debug(e.getMessage()); + } + } + return cache; + } + + private ColumnStatistic updateMinMaxForTheFirstRangePartitionKey(OlapTable olapTable, + List<String> selectedPartitionNames, + SlotReference slot, ColumnStatistic cache) { + int partitionColumnIdx = olapTable.getPartitionColumns().indexOf(slot.getColumn().get()); + // for multi partition keys, only the first partition key need to adjust min/max + if (partitionColumnIdx == 0) { + // update partition column min/max by partition info + try { + LiteralExpr minExpr = null; + LiteralExpr maxExpr = null; + double minValue = 0; + double maxValue = 0; + for (String selectedPartitionName : selectedPartitionNames) { + PartitionItem item = olapTable.getPartitionItemOrAnalysisException( + selectedPartitionName); + if (item instanceof RangePartitionItem) { + RangePartitionItem ri = (RangePartitionItem) item; + Range<PartitionKey> range = ri.getItems(); + PartitionKey upper = range.upperEndpoint(); + PartitionKey lower = range.lowerEndpoint(); + if (maxExpr == null) { + maxExpr = upper.getKeys().get(partitionColumnIdx); + maxValue = convertLegacyLiteralToDouble(maxExpr); + minExpr = lower.getKeys().get(partitionColumnIdx); + minValue = convertLegacyLiteralToDouble(minExpr); + } else { + double currentValue = convertLegacyLiteralToDouble(upper.getKeys() + .get(partitionColumnIdx)); + if (currentValue > maxValue) { + maxValue = currentValue; + maxExpr = upper.getKeys().get(partitionColumnIdx); + } + currentValue = convertLegacyLiteralToDouble(lower.getKeys().get(partitionColumnIdx)); + if (currentValue < minValue) { + minValue = currentValue; + minExpr = lower.getKeys().get(partitionColumnIdx); + } + } + } + } + if (minExpr != null) { + cache = new ColumnStatisticBuilder(cache) + .setMinExpr(minExpr) + .setMinValue(minValue) + .setMaxExpr(maxExpr) + .setMaxValue(maxValue) + .build(); + } + } catch (AnalysisException e) { + LOG.debug(e.getMessage()); + } + } + return cache; + } + @Override public Statistics visitLogicalOlapScan(LogicalOlapScan olapScan, Void context) { return computeOlapScan(olapScan); --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org