yupeng9 commented on a change in pull request #6567:
URL: https://github.com/apache/incubator-pinot/pull/6567#discussion_r596473726



##########
File path: 
pinot-common/src/main/java/org/apache/pinot/common/utils/fetcher/SegmentFetcherFactory.java
##########
@@ -118,6 +125,47 @@ public static void fetchSegmentToLocal(String uri, File 
dest)
     fetchSegmentToLocal(new URI(uri), dest);
   }
 
+  /**
+   * Fetches a segment from a given URI and untar the segment file to the dest 
dir (i.e., tableDataDir + segmentName).
+   */
+  public static void fetchAndUntarSegmentToLocal(String uri, File 
tableDataDir, String segmentName)

Review comment:
       @npawar fyi, shall this be part of the `SegmentUploader` implementation?

##########
File path: 
pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/assignment/segment/RealtimeSegmentAssignment.java
##########
@@ -172,6 +183,39 @@ private void checkReplication(InstancePartitions 
instancePartitions) {
     }
   }
 
+  private int getSegmentPartitionId(String segmentName) {
+    // A fast path if the segmentName is a LLC segment name and we can get the 
partition id from the name directly.
+    if (LLCSegmentName.isLowLevelConsumerSegmentName(segmentName)) {
+      return new LLCSegmentName(segmentName).getPartitionGroupId();
+    }
+    // Otherwise, retrieve the partition id from the segment zk metadata. 
Currently only realtime segments from upsert
+    // enabled tables have partition ids in their segment metadata.
+    RealtimeSegmentZKMetadata segmentZKMetadata = ZKMetadataProvider
+        .getRealtimeSegmentZKMetadata(_helixManager.getHelixPropertyStore(), 
_realtimeTableName, segmentName);
+    Preconditions
+        .checkState(segmentZKMetadata != null, "Failed to find segment ZK 
metadata for segment: %s of table: %s",
+            segmentName, _realtimeTableName);
+    return getSegmentPartitionIdFromZkMetaData(segmentZKMetadata);
+  }
+
+  private int getSegmentPartitionIdFromZkMetaData(RealtimeSegmentZKMetadata 
segmentZKMetadata) {
+    String segmentName = segmentZKMetadata.getSegmentName();
+    Preconditions.checkState(segmentZKMetadata.getPartitionMetadata() != null,
+        "Segment ZK metadata for segment: %s of table: %s does not contain 
partition metadata", segmentName,
+        _realtimeTableName);
+
+    ColumnPartitionMetadata partitionMetadata =
+        
segmentZKMetadata.getPartitionMetadata().getColumnPartitionMap().get(_partitionColumn);
+    Preconditions.checkState(partitionMetadata != null,
+        "Segment ZK metadata for segment: %s of table: %s does not contain 
partition metadata for column: %s",

Review comment:
       shall we make the message more informative, by adding `checking if the 
table is in upsert mode`?

##########
File path: 
pinot-common/src/main/java/org/apache/pinot/common/utils/fetcher/SegmentFetcherFactory.java
##########
@@ -118,6 +125,47 @@ public static void fetchSegmentToLocal(String uri, File 
dest)
     fetchSegmentToLocal(new URI(uri), dest);
   }
 
+  /**
+   * Fetches a segment from a given URI and untar the segment file to the dest 
dir (i.e., tableDataDir + segmentName).
+   */
+  public static void fetchAndUntarSegmentToLocal(String uri, File 
tableDataDir, String segmentName)
+      throws Exception {
+    File tempDir = new File(tableDataDir, "tmp-" + segmentName + "-" + 
UUID.randomUUID());
+    FileUtils.forceMkdir(tempDir);
+    File tempTarFile = new File(tempDir, segmentName + TAR_GZ_SUFFIX);
+    File tempSegmentDir = new File(tempDir, segmentName);
+    try {
+      try {
+        SegmentFetcherFactory.fetchSegmentToLocal(uri, tempTarFile);
+        LOGGER.info("Downloaded tarred segment: {} from: {} to: {}, file 
length: {}", segmentName, uri, tempTarFile,
+            tempTarFile.length());
+      } catch (AttemptsExceededException e) {
+        LOGGER.error("Attempts exceeded when downloading segment: {} from: {} 
to: {}", segmentName, uri,
+            tempTarFile, e);
+        Utils.rethrowException(e);
+      }
+
+      try {
+        // If an exception is thrown when untarring, it means the tar file is 
broken OR not found after the retry.
+        // Thus, there's no need to retry again.
+        File tempIndexDir = TarGzCompressionUtils.untar(tempTarFile, 
tempSegmentDir).get(0);
+        File segmentDir = new File(tableDataDir, segmentName);
+        if (segmentDir.exists()) {
+          LOGGER.info("Deleting existing index directory for segment: {}", 
segmentName);
+          FileUtils.deleteDirectory(segmentDir);
+        }
+        FileUtils.moveDirectory(tempIndexDir, segmentDir);
+        LOGGER.info("Successfully downloaded segment: {} to: {}", segmentName, 
segmentDir);
+      } catch (Exception e) {
+        LOGGER.error("Exception when untarring segment: {} for from {} to {}", 
segmentName, tempTarFile, tempSegmentDir,

Review comment:
       no need for `for` in the msg?

##########
File path: 
pinot-core/src/main/java/org/apache/pinot/core/data/manager/realtime/RealtimeTableDataManager.java
##########
@@ -543,4 +556,41 @@ private boolean isValid(Schema schema, IndexingConfig 
indexingConfig) {
     }
     return isValid;
   }
+
+  private int getSegmentPartitionId(String segmentName, String tableName) {

Review comment:
       this logic is repeated in `RealtimeSegmentAssignment.java`?

##########
File path: 
pinot-common/src/main/java/org/apache/pinot/common/utils/CommonConstants.java
##########
@@ -358,7 +358,7 @@
   public static class Segment {
     public static class Realtime {
       public enum Status {
-        IN_PROGRESS, DONE
+        IN_PROGRESS, DONE, UPLOAD

Review comment:
       +1 to @mcvsubbu's suggestion. @chenboat is this status/flow documented 
somewhere in the design doc? 

##########
File path: 
pinot-common/src/main/java/org/apache/pinot/common/utils/fetcher/SegmentFetcherFactory.java
##########
@@ -118,6 +125,47 @@ public static void fetchSegmentToLocal(String uri, File 
dest)
     fetchSegmentToLocal(new URI(uri), dest);
   }
 
+  /**
+   * Fetches a segment from a given URI and untar the segment file to the dest 
dir (i.e., tableDataDir + segmentName).
+   */
+  public static void fetchAndUntarSegmentToLocal(String uri, File 
tableDataDir, String segmentName)
+      throws Exception {
+    File tempDir = new File(tableDataDir, "tmp-" + segmentName + "-" + 
UUID.randomUUID());
+    FileUtils.forceMkdir(tempDir);
+    File tempTarFile = new File(tempDir, segmentName + TAR_GZ_SUFFIX);
+    File tempSegmentDir = new File(tempDir, segmentName);
+    try {
+      try {
+        SegmentFetcherFactory.fetchSegmentToLocal(uri, tempTarFile);
+        LOGGER.info("Downloaded tarred segment: {} from: {} to: {}, file 
length: {}", segmentName, uri, tempTarFile,
+            tempTarFile.length());
+      } catch (AttemptsExceededException e) {
+        LOGGER.error("Attempts exceeded when downloading segment: {} from: {} 
to: {}", segmentName, uri,
+            tempTarFile, e);
+        Utils.rethrowException(e);
+      }
+
+      try {
+        // If an exception is thrown when untarring, it means the tar file is 
broken OR not found after the retry.
+        // Thus, there's no need to retry again.
+        File tempIndexDir = TarGzCompressionUtils.untar(tempTarFile, 
tempSegmentDir).get(0);
+        File segmentDir = new File(tableDataDir, segmentName);
+        if (segmentDir.exists()) {
+          LOGGER.info("Deleting existing index directory for segment: {}", 
segmentName);
+          FileUtils.deleteDirectory(segmentDir);
+        }
+        FileUtils.moveDirectory(tempIndexDir, segmentDir);
+        LOGGER.info("Successfully downloaded segment: {} to: {}", segmentName, 
segmentDir);
+      } catch (Exception e) {
+        LOGGER.error("Exception when untarring segment: {} for from {} to {}", 
segmentName, tempTarFile, tempSegmentDir,
+            e);
+        Utils.rethrowException(e);
+      }
+    } finally {
+      FileUtils.deleteQuietly(tempDir);

Review comment:
       the tmp files might not be cleaned up in the process crashed before 
hitting `finally`.  Do we have some mechanism to recycle zombie files?




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org
For additional commands, e-mail: commits-h...@pinot.apache.org

Reply via email to