zhaorongsheng commented on code in PR #50424:
URL: https://github.com/apache/doris/pull/50424#discussion_r2080738751


##########
fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PruneOlapScanPartition.java:
##########
@@ -49,65 +56,145 @@
  * Used to prune partition of olap scan, should execute after 
SwapProjectAndFilter, MergeConsecutiveFilters,
  * MergeConsecutiveProjects and all predicate push down related rules.
  */
-public class PruneOlapScanPartition extends OneRewriteRuleFactory {
+public class PruneOlapScanPartition implements RewriteRuleFactory {
 
     @Override
-    public Rule build() {
-        return logicalFilter(logicalOlapScan()).when(p -> 
!p.child().isPartitionPruned()).thenApply(ctx -> {
-            LogicalFilter<LogicalOlapScan> filter = ctx.root;
-            LogicalOlapScan scan = filter.child();
-            OlapTable table = scan.getTable();
-            Set<String> partitionColumnNameSet = 
Utils.execWithReturnVal(table::getPartitionColumnNames);
-            if (partitionColumnNameSet.isEmpty()) {
-                return null;
-            }
-            List<Slot> output = scan.getOutput();
-            PartitionInfo partitionInfo = table.getPartitionInfo();
-            List<Column> partitionColumns = 
partitionInfo.getPartitionColumns();
-            List<Slot> partitionSlots = new 
ArrayList<>(partitionColumns.size());
-            for (Column column : partitionColumns) {
-                Slot partitionSlot = null;
-                // loop search is faster than build a map
-                for (Slot slot : output) {
-                    if (slot.getName().equalsIgnoreCase(column.getName())) {
-                        partitionSlot = slot;
-                        break;
-                    }
-                }
-                if (partitionSlot == null) {
-                    return null;
-                } else {
-                    partitionSlots.add(partitionSlot);
+    public List<Rule> buildRules() {
+        return ImmutableList.of(
+                logicalOlapScan()
+                    .when(scan -> !scan.isPartitionPruned()
+                            && !scan.getManuallySpecifiedTabletIds().isEmpty()
+                            && scan.getTable().isPartitionedTable()
+                    )
+                    .thenApply(ctx -> {
+                        // Case1: sql without filter condition, e.g. SELECT * 
FROM tbl (${tabletID})
+                        LogicalOlapScan scan = ctx.root;
+                        OlapTable table = scan.getTable();
+                        return prunePartition(scan, table, null, ctx);
+                    }).toRule(RuleType.OLAP_SCAN_PARTITION_PRUNE),
+                logicalFilter(logicalOlapScan())
+                    .when(p -> !p.child().isPartitionPruned())
+                    .thenApply(ctx -> {
+                        // Case2: sql with filter condition, e.g. SELECT * 
FROM tbl (${tabletID}) WHERE part_column='x'
+                        LogicalFilter<LogicalOlapScan> filter = ctx.root;
+                        LogicalOlapScan scan = filter.child();
+                        OlapTable table = scan.getTable();
+                        LogicalRelation rewrittenLogicalRelation = 
prunePartition(scan, table, filter, ctx);
+                        if (rewrittenLogicalRelation == null) {
+                            return null;
+                        }
+                        if (rewrittenLogicalRelation instanceof 
LogicalEmptyRelation) {
+                            return rewrittenLogicalRelation;
+                        } else {
+                            LogicalOlapScan rewrittenScan = (LogicalOlapScan) 
rewrittenLogicalRelation;
+                            return 
filter.withChildren(ImmutableList.of(rewrittenScan));
+                        }
+                    }).toRule(RuleType.OLAP_SCAN_PARTITION_PRUNE)
+        );
+    }
+
+    private LogicalRelation prunePartition(LogicalOlapScan scan,
+                                      OlapTable table,
+                                      LogicalFilter filter,
+                                      MatchingContext ctx) {
+        List<Long> prunedPartitionsByFilters = prunePartitionByFilters(scan, 
table, filter, ctx);
+        List<Long> prunedPartitions = prunePartitionByTabletIds(scan, table, 
prunedPartitionsByFilters);
+        if (prunedPartitions == null) {
+            return null;
+        }
+        if (prunedPartitions.isEmpty()) {
+            return new LogicalEmptyRelation(
+                ConnectContext.get().getStatementContext().getNextRelationId(),
+                ctx.root.getOutput());
+        }
+        return scan.withSelectedPartitionIds(prunedPartitions);
+    }
+
+    private List<Long> prunePartitionByFilters(LogicalOlapScan scan,
+                                               OlapTable table,
+                                               LogicalFilter filter,
+                                               MatchingContext ctx) {
+        Set<String> partitionColumnNameSet = 
Utils.execWithReturnVal(table::getPartitionColumnNames);
+        if (partitionColumnNameSet.isEmpty()) {
+            return null;
+        }
+        List<Slot> output = scan.getOutput();
+        PartitionInfo partitionInfo = table.getPartitionInfo();
+        List<Column> partitionColumns = partitionInfo.getPartitionColumns();
+        List<Slot> partitionSlots = new ArrayList<>(partitionColumns.size());
+        for (Column column : partitionColumns) {
+            Slot partitionSlot = null;
+            // loop search is faster than build a map
+            for (Slot slot : output) {
+                if (slot.getName().equalsIgnoreCase(column.getName())) {
+                    partitionSlot = slot;
+                    break;
                 }
             }
-            NereidsSortedPartitionsCacheManager sortedPartitionsCacheManager = 
Env.getCurrentEnv()
-                    .getSortedPartitionsCacheManager();
-            List<Long> manuallySpecifiedPartitions = 
scan.getManuallySpecifiedPartitions();
-            Map<Long, PartitionItem> idToPartitions;
-            Optional<SortedPartitionRanges<Long>> sortedPartitionRanges = 
Optional.empty();
-            if (manuallySpecifiedPartitions.isEmpty()) {
-                Optional<SortedPartitionRanges<?>> sortedPartitionRangesOpt
-                        = sortedPartitionsCacheManager.get(table, scan);
-                if (sortedPartitionRangesOpt.isPresent()) {
-                    sortedPartitionRanges = (Optional) 
sortedPartitionRangesOpt;
-                }
-                idToPartitions = partitionInfo.getIdToItem(false);
+            if (partitionSlot == null) {
+                return null;
             } else {
-                Map<Long, PartitionItem> allPartitions = 
partitionInfo.getAllPartitions();
-                idToPartitions = allPartitions.keySet().stream()
-                        .filter(manuallySpecifiedPartitions::contains)
-                        .collect(Collectors.toMap(Function.identity(), 
allPartitions::get));
+                partitionSlots.add(partitionSlot);
+            }
+        }
+        NereidsSortedPartitionsCacheManager sortedPartitionsCacheManager = 
Env.getCurrentEnv()
+                .getSortedPartitionsCacheManager();
+        List<Long> manuallySpecifiedPartitions = 
scan.getManuallySpecifiedPartitions();
+        Map<Long, PartitionItem> idToPartitions;
+        Optional<SortedPartitionRanges<Long>> sortedPartitionRanges = 
Optional.empty();
+        if (manuallySpecifiedPartitions.isEmpty()) {
+            Optional<SortedPartitionRanges<?>> sortedPartitionRangesOpt
+                    = sortedPartitionsCacheManager.get(table, scan);
+            if (sortedPartitionRangesOpt.isPresent()) {
+                sortedPartitionRanges = (Optional) sortedPartitionRangesOpt;
             }
+            idToPartitions = partitionInfo.getIdToItem(false);
+        } else {
+            Map<Long, PartitionItem> allPartitions = 
partitionInfo.getAllPartitions();
+            idToPartitions = allPartitions.keySet().stream()
+                    .filter(manuallySpecifiedPartitions::contains)
+                    .collect(Collectors.toMap(Function.identity(), 
allPartitions::get));
+        }
+        if (filter != null) {
             List<Long> prunedPartitions = PartitionPruner.prune(
                     partitionSlots, filter.getPredicate(), idToPartitions, 
ctx.cascadesContext,
                     PartitionTableType.OLAP, sortedPartitionRanges);
-            if (prunedPartitions.isEmpty()) {
-                return new LogicalEmptyRelation(
-                        
ConnectContext.get().getStatementContext().getNextRelationId(),
-                        filter.getOutput());
+            return prunedPartitions;
+        } else if (!manuallySpecifiedPartitions.isEmpty()) {
+            return Utils.fastToImmutableList(idToPartitions.keySet());
+        } else {
+            return null;
+        }
+    }
+
+    private List<Long> prunePartitionByTabletIds(LogicalOlapScan scan,
+                                                 OlapTable table,
+                                                 List<Long> 
prunedPartitionsByFilters) {
+        if (scan.getManuallySpecifiedTabletIds().size() == 0
+                || (prunedPartitionsByFilters != null && 
prunedPartitionsByFilters.isEmpty())) {
+            // `prunedPartitionsByFilters is not null and is empty` means 
empty partitions after pruner
+            return prunedPartitionsByFilters;
+        }
+
+        Set<Long> selectedPartitions = new LinkedHashSet<>();
+        if (prunedPartitionsByFilters != null) {
+            selectedPartitions.addAll(prunedPartitionsByFilters);
+        }
+
+        Set<Long> manuallySpecifiedTabletIds = 
ImmutableSet.copyOf(scan.getManuallySpecifiedTabletIds());
+        List<Long> selectPartitionIds = new ArrayList<>();
+        for (Partition partition : table.getPartitions()) {
+            if (!selectedPartitions.isEmpty() && 
!selectedPartitions.contains(partition.getId())) {
+                continue;
+            }

Review Comment:
   The type of `prunedPartitionsByFilters ` is `List`. The function 
`contains()` may cost too much if the list's length is high.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to