This is an automated email from the ASF dual-hosted git repository. xiangfu pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/pinot.git
The following commit(s) were added to refs/heads/master by this push: new 4af0c8f153 Fixing the logic to only process deep store untracked segments in the offline table part for a hybrid table (#16107) 4af0c8f153 is described below commit 4af0c8f15367f2e0371d1cf2daa2fc4b9fd13a0a Author: Xiang Fu <xiangfu.1...@gmail.com> AuthorDate: Sun Jun 15 03:21:55 2025 +0800 Fixing the logic to only process deep store untracked segments in the offline table part for a hybrid table (#16107) --- .../helix/core/retention/RetentionManager.java | 29 +++++++++++++++++++--- 1 file changed, 26 insertions(+), 3 deletions(-) diff --git a/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/retention/RetentionManager.java b/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/retention/RetentionManager.java index e139a93cc6..984bfbd527 100644 --- a/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/retention/RetentionManager.java +++ b/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/retention/RetentionManager.java @@ -301,6 +301,18 @@ public class RetentionManager extends ControllerPeriodicTask<Void> { private List<String> getSegmentsToDeleteFromDeepstore(String tableNameWithType, RetentionStrategy retentionStrategy, List<SegmentZKMetadata> segmentZKMetadataList, int untrackedSegmentsDeletionBatchSize) { List<String> segmentsToDelete = new ArrayList<>(); + String rawTableName = TableNameBuilder.extractRawTableName(tableNameWithType); + boolean isHybridTable = _pinotHelixResourceManager.hasOfflineTable(rawTableName) + && _pinotHelixResourceManager.hasRealtimeTable(rawTableName); + if (isHybridTable && TableNameBuilder.isRealtimeTableResource(tableNameWithType)) { + // If it is a hybrid table, we don't need to scan deep store for untracked segments when processing the + // realtime table. + // This is because realtime tables are expected to have short retention periods, so scanning deep store for + // untracked segments is not necessary. + LOGGER.info("Skipping deep store scan for untracked segments for realtime table: {} as it's a hybrid table", + tableNameWithType); + return segmentsToDelete; + } if (!_untrackedSegmentDeletionEnabled) { LOGGER.info( @@ -317,8 +329,20 @@ public class RetentionManager extends ControllerPeriodicTask<Void> { return segmentsToDelete; } - List<String> segmentsPresentInZK = - segmentZKMetadataList.stream().map(SegmentZKMetadata::getSegmentName).collect(Collectors.toList()); + List<String> segmentsPresentInZK; + if (isHybridTable) { + segmentsPresentInZK = new ArrayList<>(); + // This must be the OFFLINE table + segmentsPresentInZK.addAll( + segmentZKMetadataList.stream().map(SegmentZKMetadata::getSegmentName).collect(Collectors.toList())); + // Add segments from the REALTIME table as well + segmentsPresentInZK.addAll( + _pinotHelixResourceManager.getSegmentsFor(TableNameBuilder.REALTIME.tableNameWithType(rawTableName), false)); + } else { + segmentsPresentInZK = + segmentZKMetadataList.stream().map(SegmentZKMetadata::getSegmentName).collect(Collectors.toList()); + } + try { LOGGER.info("Fetch segments present in deep store that are beyond retention period for table: {}", tableNameWithType); @@ -340,7 +364,6 @@ public class RetentionManager extends ControllerPeriodicTask<Void> { return segmentsToDelete; } - /** * Identifies segments in deepstore that are ready for deletion based on the retention strategy. * --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For additional commands, e-mail: commits-h...@pinot.apache.org