klsince commented on code in PR #15142: URL: https://github.com/apache/pinot/pull/15142#discussion_r1999439274
########## pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/retention/RetentionManager.java: ########## @@ -189,6 +219,110 @@ private boolean shouldDeleteInProgressLLCSegment(String segmentName, IdealState } } + private List<String> getSegmentsToDeleteFromDeepstore(String tableNameWithType, RetentionStrategy retentionStrategy, + List<SegmentZKMetadata> segmentZKMetadataList, int untrackedSegmentsDeletionBatchSize) { + List<String> segmentsToDelete = new ArrayList<>(); + + if (untrackedSegmentsDeletionBatchSize <= 0) { + // return an empty list in case untracked segment deletion is not configured + LOGGER.info( + "Not scanning deep store for untracked segments for table: {} as untrackedSegmentsDeletionBatchSize is set " + + "to: {}", + tableNameWithType, untrackedSegmentsDeletionBatchSize); + return segmentsToDelete; + } + + List<String> segmentsPresentInZK = + segmentZKMetadataList.stream().map(SegmentZKMetadata::getSegmentName).collect(Collectors.toList()); + try { + LOGGER.info("Fetch segments present in deep store that are beyond retention period for table: {}", + tableNameWithType); + segmentsToDelete = + findUntrackedSegmentsToDeleteFromDeepstore(tableNameWithType, retentionStrategy, segmentsPresentInZK); + _controllerMetrics.setValueOfTableGauge(tableNameWithType, ControllerGauge.UNTRACKED_SEGMENTS_COUNT, + segmentsToDelete.size()); + + if (segmentsToDelete.size() > untrackedSegmentsDeletionBatchSize) { Review Comment: +1 to stop listing once batch size is reached, perhaps leave a TODO here ########## pinot-common/src/main/java/org/apache/pinot/common/metrics/ControllerGauge.java: ########## @@ -190,7 +190,11 @@ public enum ControllerGauge implements AbstractMetrics.Gauge { REINGESTED_SEGMENT_UPLOADS_IN_PROGRESS("reingestedSegmentUploadsInProgress", true), // Resource utilization is within limits or not for a table - RESOURCE_UTILIZATION_LIMIT_EXCEEDED("ResourceUtilizationLimitExceeded", false); + RESOURCE_UTILIZATION_LIMIT_EXCEEDED("ResourceUtilizationLimitExceeded", false), Review Comment: nit: lower case the first letter `resourceUtilizationLimitExceeded` for consistency -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For additional commands, e-mail: commits-h...@pinot.apache.org