This is an automated email from the ASF dual-hosted git repository.
morrysnow pushed a commit to branch branch-3.1
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-3.1 by this push:
new 1fabd3ea0c3 branch-3.1: [enhancement](Nereids) add interface for
binary search filtering prune external table's partitions #47781 (#52014)
1fabd3ea0c3 is described below
commit 1fabd3ea0c3ecd9c4d686c33f0bea096d465ff68
Author: 924060929 <[email protected]>
AuthorDate: Fri Jun 20 13:24:45 2025 +0800
branch-3.1: [enhancement](Nereids) add interface for binary search
filtering prune external table's partitions #47781 (#52014)
cherry pick from #47781
---
.../java/org/apache/doris/catalog/OlapTable.java | 19 ++++++++-
.../SupportBinarySearchFilteringPartitions.java | 46 +++++++++++++++++++++
.../cache/NereidsSortedPartitionsCacheManager.java | 47 +++++++++++++---------
.../rules/rewrite/PruneFileScanPartition.java | 16 +++++++-
.../rules/rewrite/PruneOlapScanPartition.java | 3 +-
5 files changed, 110 insertions(+), 21 deletions(-)
diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java
b/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java
index 3a2a3b1f60a..1248cbd5537 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java
@@ -55,6 +55,7 @@ import org.apache.doris.mtmv.MTMVRefreshContext;
import org.apache.doris.mtmv.MTMVRelatedTableIf;
import org.apache.doris.mtmv.MTMVSnapshotIf;
import org.apache.doris.mtmv.MTMVVersionSnapshot;
+import org.apache.doris.nereids.trees.plans.algebra.CatalogRelation;
import org.apache.doris.persist.gson.GsonPostProcessable;
import org.apache.doris.persist.gson.GsonUtils;
import org.apache.doris.qe.ConnectContext;
@@ -118,9 +119,25 @@ import java.util.stream.Collectors;
* Internal representation of tableFamilyGroup-related metadata. A
OlaptableFamilyGroup contains several tableFamily.
* Note: when you add a new olap table property, you should modify
TableProperty class
*/
-public class OlapTable extends Table implements MTMVRelatedTableIf,
GsonPostProcessable {
+public class OlapTable extends Table implements MTMVRelatedTableIf,
GsonPostProcessable,
+ SupportBinarySearchFilteringPartitions {
private static final Logger LOG = LogManager.getLogger(OlapTable.class);
+ @Override
+ public Map<Long, PartitionItem> getOriginPartitions(CatalogRelation scan) {
+ return getPartitionInfo().getIdToItem(false);
+ }
+
+ @Override
+ public Object getPartitionMetaVersion(CatalogRelation scan) {
+ return getVisibleVersion();
+ }
+
+ @Override
+ public long getPartitionMetaLoadTimeMillis(CatalogRelation scan) {
+ return getVisibleVersionTime();
+ }
+
public enum OlapTableState {
NORMAL,
ROLLUP,
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/catalog/SupportBinarySearchFilteringPartitions.java
b/fe/fe-core/src/main/java/org/apache/doris/catalog/SupportBinarySearchFilteringPartitions.java
new file mode 100644
index 00000000000..8ba63086a64
--- /dev/null
+++
b/fe/fe-core/src/main/java/org/apache/doris/catalog/SupportBinarySearchFilteringPartitions.java
@@ -0,0 +1,46 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.catalog;
+
+import org.apache.doris.nereids.trees.plans.algebra.CatalogRelation;
+
+import java.util.Map;
+
+/**
+ * SupportBinarySearchFilteringPartitions: this interface is used to support
binary search filtering partitions
+ */
+public interface SupportBinarySearchFilteringPartitions extends TableIf {
+ /**
+ * get the origin partition info which maybe not sorted, the
NereidsSortedPartitionsCacheManager will
+ * sort this partitions and cache in frontend. you can save the
partition's meta snapshot id in the
+ * CatalogRelation and get the partitions by the snapshot id.
+ */
+ Map<?, PartitionItem> getOriginPartitions(CatalogRelation scan);
+
+ /**
+ * return the version of the partitions meta, if the version changed, we
should skip the legacy sorted
+ * partitions and reload it.
+ */
+ Object getPartitionMetaVersion(CatalogRelation scan);
+
+ /**
+ * when the partition meta loaded? if the partition meta load too
frequently, we will skip sort partitions meta
+ * and will not use binary search to filtering partitions
+ */
+ long getPartitionMetaLoadTimeMillis(CatalogRelation scan);
+}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/common/cache/NereidsSortedPartitionsCacheManager.java
b/fe/fe-core/src/main/java/org/apache/doris/common/cache/NereidsSortedPartitionsCacheManager.java
index 499c3b46709..72f50d5b7b3 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/common/cache/NereidsSortedPartitionsCacheManager.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/common/cache/NereidsSortedPartitionsCacheManager.java
@@ -19,9 +19,8 @@ package org.apache.doris.common.cache;
import org.apache.doris.catalog.DatabaseIf;
import org.apache.doris.catalog.Env;
-import org.apache.doris.catalog.OlapTable;
-import org.apache.doris.catalog.PartitionInfo;
import org.apache.doris.catalog.PartitionItem;
+import org.apache.doris.catalog.SupportBinarySearchFilteringPartitions;
import org.apache.doris.common.Config;
import org.apache.doris.common.ConfigBase.DefaultConfHandler;
import org.apache.doris.datasource.CatalogIf;
@@ -30,6 +29,7 @@ import
org.apache.doris.nereids.rules.expression.rules.PartitionItemToRange;
import org.apache.doris.nereids.rules.expression.rules.SortedPartitionRanges;
import
org.apache.doris.nereids.rules.expression.rules.SortedPartitionRanges.PartitionItemAndId;
import
org.apache.doris.nereids.rules.expression.rules.SortedPartitionRanges.PartitionItemAndRange;
+import org.apache.doris.nereids.trees.plans.algebra.CatalogRelation;
import com.github.benmanes.caffeine.cache.Cache;
import com.github.benmanes.caffeine.cache.Caffeine;
@@ -43,6 +43,7 @@ import java.time.Duration;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
+import java.util.Objects;
import java.util.Optional;
/**
@@ -62,7 +63,8 @@ public class NereidsSortedPartitionsCacheManager {
);
}
- public Optional<SortedPartitionRanges<?>> get(OlapTable table) {
+ public Optional<SortedPartitionRanges<?>> get(
+ SupportBinarySearchFilteringPartitions table, CatalogRelation
scan) {
DatabaseIf<?> database = table.getDatabase();
if (database == null) {
return Optional.empty();
@@ -75,25 +77,33 @@ public class NereidsSortedPartitionsCacheManager {
catalog.getName(), database.getFullName(), table.getName());
PartitionCacheContext partitionCacheContext =
partitionCaches.getIfPresent(key);
if (partitionCacheContext == null) {
- return Optional.of(loadCache(key, table));
+ return Optional.ofNullable(loadCache(key, table, scan));
}
if (table.getId() != partitionCacheContext.tableId
- || table.getVisibleVersion() !=
partitionCacheContext.tableVersion) {
+ || !Objects.equals(table.getPartitionMetaVersion(scan),
partitionCacheContext.partitionMetaVersion)) {
partitionCaches.invalidate(key);
- return Optional.of(loadCache(key, table));
+ return Optional.ofNullable(loadCache(key, table, scan));
}
return Optional.of(partitionCacheContext.sortedPartitionRanges);
}
- private SortedPartitionRanges<?> loadCache(TableIdentifier key, OlapTable
olapTable) {
- PartitionInfo partitionInfo = olapTable.getPartitionInfo();
- Map<Long, PartitionItem> allPartitions =
partitionInfo.getIdToItem(false);
- List<Entry<Long, PartitionItem>> sortedList =
Lists.newArrayList(allPartitions.entrySet());
- List<PartitionItemAndRange<?>> sortedRanges =
Lists.newArrayListWithCapacity(allPartitions.size());
+ private SortedPartitionRanges<?> loadCache(
+ TableIdentifier key, SupportBinarySearchFilteringPartitions table,
CatalogRelation scan) {
+ long now = System.currentTimeMillis();
+ long partitionMetaLoadTime =
table.getPartitionMetaLoadTimeMillis(scan);
+
+ // if insert too frequently, we will skip sort partitions
+ if (now <= partitionMetaLoadTime || (now - partitionMetaLoadTime) <=
(10 * 1000)) {
+ return null;
+ }
+
+ Map<?, PartitionItem> unsortedMap = table.getOriginPartitions(scan);
+ List<Entry<?, PartitionItem>> unsortedList =
Lists.newArrayList(unsortedMap.entrySet());
+ List<PartitionItemAndRange<?>> sortedRanges =
Lists.newArrayListWithCapacity(unsortedMap.size());
List<PartitionItemAndId<?>> defaultPartitions = Lists.newArrayList();
- for (Entry<Long, PartitionItem> entry : sortedList) {
+ for (Entry<?, PartitionItem> entry : unsortedList) {
PartitionItem partitionItem = entry.getValue();
- Long id = entry.getKey();
+ Object id = entry.getKey();
if (!partitionItem.isDefaultPartition()) {
List<Range<MultiColumnBound>> ranges =
PartitionItemToRange.toRanges(partitionItem);
for (Range<MultiColumnBound> range : ranges) {
@@ -118,7 +128,7 @@ public class NereidsSortedPartitionsCacheManager {
sortedRanges, defaultPartitions
);
PartitionCacheContext context = new PartitionCacheContext(
- olapTable.getId(), olapTable.getVisibleVersion(),
sortedPartitionRanges);
+ table.getId(), table.getPartitionMetaVersion(scan),
sortedPartitionRanges);
partitionCaches.put(key, context);
return sortedPartitionRanges;
}
@@ -166,20 +176,21 @@ public class NereidsSortedPartitionsCacheManager {
private static class PartitionCacheContext {
private final long tableId;
- private final long tableVersion;
+ private final Object partitionMetaVersion;
private final SortedPartitionRanges sortedPartitionRanges;
public PartitionCacheContext(
- long tableId, long tableVersion, SortedPartitionRanges
sortedPartitionRanges) {
+ long tableId, Object partitionMetaVersion,
SortedPartitionRanges sortedPartitionRanges) {
this.tableId = tableId;
- this.tableVersion = tableVersion;
+ this.partitionMetaVersion
+ = Objects.requireNonNull(partitionMetaVersion,
"partitionMetaVersion cannot be null");
this.sortedPartitionRanges = sortedPartitionRanges;
}
@Override
public String toString() {
return "PartitionCacheContext(tableId="
- + tableId + ", tableVersion=" + tableVersion
+ + tableId + ", tableVersion=" + partitionMetaVersion
+ ", partitionNum=" +
sortedPartitionRanges.sortedPartitions.size() + ")";
}
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PruneFileScanPartition.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PruneFileScanPartition.java
index e99906f5e13..04d8b8ffe65 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PruneFileScanPartition.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PruneFileScanPartition.java
@@ -17,13 +17,17 @@
package org.apache.doris.nereids.rules.rewrite;
+import org.apache.doris.catalog.Env;
import org.apache.doris.catalog.PartitionItem;
+import org.apache.doris.catalog.SupportBinarySearchFilteringPartitions;
+import org.apache.doris.common.cache.NereidsSortedPartitionsCacheManager;
import org.apache.doris.datasource.ExternalTable;
import org.apache.doris.nereids.CascadesContext;
import org.apache.doris.nereids.rules.Rule;
import org.apache.doris.nereids.rules.RuleType;
import org.apache.doris.nereids.rules.expression.rules.PartitionPruner;
import
org.apache.doris.nereids.rules.expression.rules.PartitionPruner.PartitionTableType;
+import org.apache.doris.nereids.rules.expression.rules.SortedPartitionRanges;
import org.apache.doris.nereids.trees.expressions.Slot;
import org.apache.doris.nereids.trees.plans.logical.LogicalFileScan;
import
org.apache.doris.nereids.trees.plans.logical.LogicalFileScan.SelectedPartitions;
@@ -36,6 +40,7 @@ import org.apache.commons.collections.CollectionUtils;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
+import java.util.Optional;
import java.util.function.Function;
import java.util.stream.Collectors;
@@ -89,8 +94,17 @@ public class PruneFileScanPartition extends
OneRewriteRuleFactory {
.collect(Collectors.toList());
Map<String, PartitionItem> nameToPartitionItem =
scan.getSelectedPartitions().selectedPartitions;
+ Optional<SortedPartitionRanges<String>> sortedPartitionRanges =
Optional.empty();
+ if (externalTable instanceof SupportBinarySearchFilteringPartitions) {
+ NereidsSortedPartitionsCacheManager partitionsCacheManager =
Env.getCurrentEnv()
+ .getSortedPartitionsCacheManager();
+ sortedPartitionRanges = (Optional) partitionsCacheManager.get(
+ (SupportBinarySearchFilteringPartitions)
externalTable, scan);
+ }
+
List<String> prunedPartitions = new ArrayList<>(PartitionPruner.prune(
- partitionSlots, filter.getPredicate(), nameToPartitionItem,
ctx, PartitionTableType.EXTERNAL));
+ partitionSlots, filter.getPredicate(), nameToPartitionItem,
ctx,
+ PartitionTableType.EXTERNAL, sortedPartitionRanges));
for (String name : prunedPartitions) {
selectedPartitionItems.put(name, nameToPartitionItem.get(name));
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PruneOlapScanPartition.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PruneOlapScanPartition.java
index c5868b4b68a..debf4e880cc 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PruneOlapScanPartition.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PruneOlapScanPartition.java
@@ -86,7 +86,8 @@ public class PruneOlapScanPartition extends
OneRewriteRuleFactory {
Map<Long, PartitionItem> idToPartitions;
Optional<SortedPartitionRanges<Long>> sortedPartitionRanges =
Optional.empty();
if (manuallySpecifiedPartitions.isEmpty()) {
- Optional<SortedPartitionRanges<?>> sortedPartitionRangesOpt =
sortedPartitionsCacheManager.get(table);
+ Optional<SortedPartitionRanges<?>> sortedPartitionRangesOpt
+ = sortedPartitionsCacheManager.get(table, scan);
if (sortedPartitionRangesOpt.isPresent()) {
sortedPartitionRanges = (Optional)
sortedPartitionRangesOpt;
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]