fx19880617 commented on a change in pull request #5927:
URL: https://github.com/apache/incubator-pinot/pull/5927#discussion_r477566730



##########
File path: 
pinot-plugins/pinot-batch-ingestion/pinot-batch-ingestion-spark/src/main/java/org/apache/pinot/plugin/ingestion/batch/spark/SparkSegmentGenerationJobRunner.java
##########
@@ -200,8 +199,30 @@ public void run()
       }
 
       List<String> pathAndIdxList = new ArrayList<>();
-      for (int i = 0; i < filteredFiles.size(); i++) {
-        pathAndIdxList.add(String.format("%s %d", filteredFiles.get(i), i));
+      String localDirectorySequenceIdString = 
_spec.getSegmentNameGeneratorSpec().getConfigs().get(LOCAL_DIRECTORY_SEQUENCE_ID);
+      boolean localDirectorySequenceId = false;
+      if (localDirectorySequenceIdString != null) {
+        localDirectorySequenceId = 
Boolean.parseBoolean(localDirectorySequenceIdString);
+      }
+      if (localDirectorySequenceId) {
+        Map<String, List<String>> localDirIndex = new HashMap<>();
+        for (String filteredFile : filteredFiles) {
+          Path filteredParentPath = Paths.get(filteredFile).getParent();
+          if (!localDirIndex.containsKey(filteredParentPath.toString())) {
+            localDirIndex.put(filteredParentPath.toString(), new 
ArrayList<>());
+          }
+          localDirIndex.get(filteredParentPath.toString()).add(filteredFile);
+        }
+        for (String parentPath: localDirIndex.keySet()){
+          List<String> siblingFiles = localDirIndex.get(parentPath);

Review comment:
       suggest to sort siblingFiles list, so rerun segment creation job will 
give same segments list

##########
File path: 
pinot-plugins/pinot-batch-ingestion/pinot-batch-ingestion-spark/src/main/java/org/apache/pinot/plugin/ingestion/batch/spark/SparkSegmentGenerationJobRunner.java
##########
@@ -29,12 +30,10 @@
 import java.io.Serializable;
 import java.net.URI;
 import java.nio.file.FileSystems;
+import java.nio.file.Path;
 import java.nio.file.PathMatcher;
 import java.nio.file.Paths;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-import java.util.UUID;
+import java.util.*;

Review comment:
       expend the .* 




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org
For additional commands, e-mail: commits-h...@pinot.apache.org

Reply via email to