This is an automated email from the ASF dual-hosted git repository.

englefly pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.1 by this push:
     new 7d9a7512971 [feat](nereids) adjust min/max for partition key #41729 
branch-2.1 (#45713)
7d9a7512971 is described below

commit 7d9a7512971b3ec9dc0dee68835999b123c8650a
Author: minghong <zhoumingh...@selectdb.com>
AuthorDate: Sun Dec 22 11:20:01 2024 +0800

    [feat](nereids) adjust min/max for partition key #41729 branch-2.1 (#45713)
    
    ### What problem does this PR solve?
    pick #41729
---
 .../doris/nereids/stats/StatsCalculator.java       | 174 +++++++++++++++++++++
 1 file changed, 174 insertions(+)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java
index d4b4022fbe7..bfe5ec4be48 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java
@@ -18,9 +18,15 @@
 package org.apache.doris.nereids.stats;
 
 import org.apache.doris.analysis.IntLiteral;
+import org.apache.doris.analysis.LiteralExpr;
 import org.apache.doris.catalog.Column;
 import org.apache.doris.catalog.Env;
+import org.apache.doris.catalog.ListPartitionItem;
 import org.apache.doris.catalog.OlapTable;
+import org.apache.doris.catalog.PartitionItem;
+import org.apache.doris.catalog.PartitionKey;
+import org.apache.doris.catalog.PartitionType;
+import org.apache.doris.catalog.RangePartitionItem;
 import org.apache.doris.catalog.TableIf;
 import org.apache.doris.common.FeConstants;
 import org.apache.doris.common.Pair;
@@ -136,11 +142,13 @@ import org.apache.doris.statistics.StatisticRange;
 import org.apache.doris.statistics.Statistics;
 import org.apache.doris.statistics.StatisticsBuilder;
 import org.apache.doris.statistics.TableStatsMeta;
+import org.apache.doris.statistics.util.StatisticsUtil;
 
 import com.google.common.base.Preconditions;
 import com.google.common.collect.ImmutableSet;
 import com.google.common.collect.Lists;
 import com.google.common.collect.Maps;
+import com.google.common.collect.Range;
 import org.apache.commons.collections.CollectionUtils;
 import org.apache.logging.log4j.LogManager;
 import org.apache.logging.log4j.Logger;
@@ -899,6 +907,155 @@ public class StatsCalculator extends 
DefaultPlanVisitor<Statistics, Void> {
         }
     }
 
+    /**
+     * Determine whether it is a partition key inside the function.
+     */
+    private ColumnStatistic updateMinMaxForPartitionKey(OlapTable olapTable,
+            List<String> selectedPartitionNames,
+            SlotReference slot, ColumnStatistic cache) {
+        if (olapTable.getPartitionType() == PartitionType.LIST) {
+            cache = updateMinMaxForListPartitionKey(olapTable, 
selectedPartitionNames, slot, cache);
+        } else if (olapTable.getPartitionType() == PartitionType.RANGE) {
+            cache = updateMinMaxForTheFirstRangePartitionKey(olapTable, 
selectedPartitionNames, slot, cache);
+        }
+        return cache;
+    }
+
+    private double convertLegacyLiteralToDouble(LiteralExpr literal) throws 
org.apache.doris.common.AnalysisException {
+        return StatisticsUtil.convertToDouble(literal.getType(), 
literal.getStringValue());
+    }
+
+    private ColumnStatistic updateMinMaxForListPartitionKey(OlapTable 
olapTable,
+            List<String> selectedPartitionNames,
+            SlotReference slot, ColumnStatistic cache) {
+        int partitionColumnIdx = 
olapTable.getPartitionColumns().indexOf(slot.getColumn().get());
+        if (partitionColumnIdx != -1) {
+            try {
+                LiteralExpr minExpr = null;
+                LiteralExpr maxExpr = null;
+                double minValue = 0;
+                double maxValue = 0;
+                for (String selectedPartitionName : selectedPartitionNames) {
+                    PartitionItem item = 
olapTable.getPartitionItemOrAnalysisException(
+                            selectedPartitionName);
+                    if (item instanceof ListPartitionItem) {
+                        ListPartitionItem lp = (ListPartitionItem) item;
+                        for (PartitionKey key : lp.getItems()) {
+                            if (minExpr == null) {
+                                minExpr = 
key.getKeys().get(partitionColumnIdx);
+                                minValue = 
convertLegacyLiteralToDouble(minExpr);
+                                maxExpr = 
key.getKeys().get(partitionColumnIdx);
+                                maxValue = 
convertLegacyLiteralToDouble(maxExpr);
+                            } else {
+                                double current = 
convertLegacyLiteralToDouble(key.getKeys().get(partitionColumnIdx));
+                                if (current > maxValue) {
+                                    maxValue = current;
+                                    maxExpr = 
key.getKeys().get(partitionColumnIdx);
+                                } else if (current < minValue) {
+                                    minValue = current;
+                                    minExpr = 
key.getKeys().get(partitionColumnIdx);
+                                }
+                            }
+                        }
+                    }
+                }
+                if (minExpr != null) {
+                    cache = updateMinMax(cache, minValue, minExpr, maxValue, 
maxExpr);
+                }
+            } catch (org.apache.doris.common.AnalysisException e) {
+                LOG.debug(e.getMessage());
+            }
+        }
+        return cache;
+    }
+
+    private ColumnStatistic updateMinMaxForTheFirstRangePartitionKey(OlapTable 
olapTable,
+            List<String> selectedPartitionNames,
+            SlotReference slot, ColumnStatistic cache) {
+        int partitionColumnIdx = 
olapTable.getPartitionColumns().indexOf(slot.getColumn().get());
+        // for multi partition keys, only the first partition key need to 
adjust min/max
+        if (partitionColumnIdx == 0) {
+            // update partition column min/max by partition info
+            try {
+                LiteralExpr minExpr = null;
+                LiteralExpr maxExpr = null;
+                double minValue = 0;
+                double maxValue = 0;
+                for (String selectedPartitionName : selectedPartitionNames) {
+                    PartitionItem item = 
olapTable.getPartitionItemOrAnalysisException(
+                            selectedPartitionName);
+                    if (item instanceof RangePartitionItem) {
+                        RangePartitionItem ri = (RangePartitionItem) item;
+                        Range<PartitionKey> range = ri.getItems();
+                        PartitionKey upper = range.upperEndpoint();
+                        PartitionKey lower = range.lowerEndpoint();
+                        if (maxExpr == null) {
+                            maxExpr = upper.getKeys().get(partitionColumnIdx);
+                            maxValue = convertLegacyLiteralToDouble(maxExpr);
+                            minExpr = lower.getKeys().get(partitionColumnIdx);
+                            minValue = convertLegacyLiteralToDouble(minExpr);
+                        } else {
+                            double currentValue = 
convertLegacyLiteralToDouble(upper.getKeys()
+                                    .get(partitionColumnIdx));
+                            if (currentValue > maxValue) {
+                                maxValue = currentValue;
+                                maxExpr = 
upper.getKeys().get(partitionColumnIdx);
+                            }
+                            currentValue = 
convertLegacyLiteralToDouble(lower.getKeys().get(partitionColumnIdx));
+                            if (currentValue < minValue) {
+                                minValue = currentValue;
+                                minExpr = 
lower.getKeys().get(partitionColumnIdx);
+                            }
+                        }
+                    }
+                }
+                if (minExpr != null) {
+                    cache = updateMinMax(cache, minValue, minExpr, maxValue, 
maxExpr);
+                }
+            } catch (org.apache.doris.common.AnalysisException e) {
+                LOG.debug(e.getMessage());
+            }
+        }
+        return cache;
+    }
+
+    private ColumnStatistic updateMinMax(ColumnStatistic cache, double 
minValue, LiteralExpr minExpr,
+            double maxValue, LiteralExpr maxExpr) {
+        boolean shouldUpdateCache = false;
+        if (!cache.isUnKnown) {
+            // merge the min/max with cache.
+            // example: min/max range in cache is [10-20]
+            // range from partition def is [15-30]
+            // the final range is [15-20]
+            if (cache.minValue > minValue) {
+                minValue = cache.minValue;
+                minExpr = cache.minExpr;
+            } else {
+                shouldUpdateCache = true;
+            }
+            if (cache.maxValue < maxValue) {
+                maxValue = cache.maxValue;
+                maxExpr = cache.maxExpr;
+            } else {
+                shouldUpdateCache = true;
+            }
+            // if min/max is invalid, do not update cache
+            if (minValue > maxValue) {
+                shouldUpdateCache = false;
+            }
+        }
+
+        if (shouldUpdateCache) {
+            cache = new ColumnStatisticBuilder(cache)
+                    .setMinExpr(minExpr)
+                    .setMinValue(minValue)
+                    .setMaxExpr(maxExpr)
+                    .setMaxValue(maxValue)
+                    .build();
+        }
+        return cache;
+    }
+
     // TODO: 1. Subtract the pruned partition
     //       2. Consider the influence of runtime filter
     //       3. Get NDV and column data size from StatisticManger, 
StatisticManager doesn't support it now.
@@ -944,6 +1101,18 @@ public class StatsCalculator extends 
DefaultPlanVisitor<Statistics, Void> {
             LOG.debug("{} is partially analyzed, clear min/max values in 
column stats",
                     catalogRelation.getTable().getName());
         }
+        List<String> selectedPartitionNames = null;
+        if (catalogRelation instanceof OlapScan) {
+            OlapScan olapScan = (OlapScan) catalogRelation;
+            if (olapScan.getSelectedPartitionIds().size() < 
olapScan.getTable().getPartitionNum()) {
+                // partition pruned
+                // try to use selected partition stats, if failed, fall back 
to table stats
+                selectedPartitionNames = new 
ArrayList<>(olapScan.getSelectedPartitionIds().size());
+                for (Long id : olapScan.getSelectedPartitionIds()) {
+                    
selectedPartitionNames.add(olapScan.getTable().getPartition(id).getName());
+                }
+            }
+        }
         for (SlotReference slotReference : slotSet) {
             String colName = slotReference.getColumn().isPresent()
                     ? slotReference.getColumn().get().getName()
@@ -959,6 +1128,11 @@ public class StatsCalculator extends 
DefaultPlanVisitor<Statistics, Void> {
                 cache = ColumnStatistic.UNKNOWN;
             } else {
                 cache = getColumnStatistic(table, colName, idxId);
+                if (table instanceof OlapTable && 
slotReference.getColumn().isPresent()
+                        && selectedPartitionNames != null) {
+                    cache = updateMinMaxForPartitionKey((OlapTable) table,
+                            selectedPartitionNames, slotReference, cache);
+                }
             }
             ColumnStatisticBuilder colStatsBuilder = new 
ColumnStatisticBuilder(cache);
             colStatsBuilder.normalizeAvgSizeByte(slotReference);


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to