krishan1390 commented on code in PR #16249: URL: https://github.com/apache/pinot/pull/16249#discussion_r2191785562
########## pinot-spi/src/main/java/org/apache/pinot/spi/utils/CommonConstants.java: ########## @@ -1424,6 +1424,12 @@ public static class Minion { public static final char TASK_LIST_SEPARATOR = ','; public static final String CONFIG_OF_ALLOW_DOWNLOAD_FROM_SERVER = "pinot.minion.task.allow.download.from.server"; public static final String DEFAULT_ALLOW_DOWNLOAD_FROM_SERVER = "false"; + + /** + * The number of threads to use for downloading segments from the deepstore. + * This is a global setting that applies to all tasks of BaseMultipleSegmentsConversionExecutor class. + */ + public static final String SEGMENT_DOWNLOAD_THREAD_POOL_SIZE = "pinot.minion.task.segmentDownloadThreadPoolSize"; Review Comment: this can be moved to MinionConf itself where other such minion confs are defined ? ########## pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/BaseMultipleSegmentsConversionExecutor.java: ########## @@ -352,6 +349,75 @@ public List<SegmentConversionResult> executeTask(PinotTaskConfig pinotTaskConfig } } + private int getThreadPoolSize(Map<String, String> taskConfigs) { + int nThreads = _minionConf.getProperty(Minion.SEGMENT_DOWNLOAD_THREAD_POOL_SIZE, + MinionConstants.DEFAULT_SEGMENT_DOWNLOAD_THREAD_POOL_SIZE); + nThreads = Integer.parseInt(taskConfigs.getOrDefault(MinionConstants.SEGMENT_DOWNLOAD_THREAD_POOL_SIZE, + String.valueOf(nThreads))); + return nThreads; + } + + private void parallelDownloadAndUntarSegments(int nThreads, String tableNameWithType, String taskType, + String[] segmentNames, String[] downloadURLs, File tempDataDir, AtomicInteger recordCounter, + List<File> inputSegmentDirs) + throws Exception { + + ExecutorService executorService = null; + int length = downloadURLs.length; + try { + executorService = Executors.newFixedThreadPool(nThreads); + List<Future<Void>> futures = new ArrayList<>(length); + for (int i = 0; i < length; i++) { + int index = i; + futures.add(executorService.submit(() -> { + downloadAndUntarSegment(tableNameWithType, taskType, segmentNames[index], downloadURLs[index], + tempDataDir, index, recordCounter, inputSegmentDirs); + return null; + })); + + // Wait for all downloads to complete and cancel other tasks if any download fails + for (Future<Void> future : futures) { Review Comment: this should be outside the above for loop right ? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For additional commands, e-mail: commits-h...@pinot.apache.org