szehon-ho commented on code in PR #12885:
URL: https://github.com/apache/iceberg/pull/12885#discussion_r2141270284


##########
core/src/main/java/org/apache/iceberg/RewriteTablePathUtil.java:
##########
@@ -312,7 +314,87 @@ public static RewriteResult<DataFile> rewriteDataManifest(
         ManifestReader<DataFile> reader =
             ManifestFiles.read(manifestFile, io, 
specsById).select(Arrays.asList("*"))) {
       return StreamSupport.stream(reader.entries().spliterator(), false)
-          .map(entry -> writeDataFileEntry(entry, spec, sourcePrefix, 
targetPrefix, writer))
+          .map(
+              entry ->
+                  writeDataFileEntry(entry, Set.of(), spec, sourcePrefix, 
targetPrefix, writer))
+          .reduce(new RewriteResult<>(), RewriteResult::append);
+    }
+  }
+
+  /**
+   * Rewrite a data manifest, replacing path references.
+   *
+   * @param manifestFile source manifest file to rewrite
+   * @param deltaSnapshotIds snapshot ids to filter manifest entry
+   * @param outputFile output file to rewrite manifest file to
+   * @param io file io
+   * @param format format of the manifest file
+   * @param specsById map of partition specs by id
+   * @param sourcePrefix source prefix that will be replaced
+   * @param targetPrefix target prefix that will replace it
+   * @return a copy plan of content files in the manifest that was rewritten
+   */
+  public static RewriteResult<DataFile> rewriteDataManifest(
+      ManifestFile manifestFile,
+      Set<Long> deltaSnapshotIds,
+      OutputFile outputFile,
+      FileIO io,
+      int format,
+      Map<Integer, PartitionSpec> specsById,
+      String sourcePrefix,
+      String targetPrefix)
+      throws IOException {
+    PartitionSpec spec = specsById.get(manifestFile.partitionSpecId());
+    try (ManifestWriter<DataFile> writer =
+            ManifestFiles.write(format, spec, outputFile, 
manifestFile.snapshotId());
+        ManifestReader<DataFile> reader =
+            ManifestFiles.read(manifestFile, io, 
specsById).select(Arrays.asList("*"))) {
+      return StreamSupport.stream(reader.entries().spliterator(), false)
+          .map(
+              entry ->
+                  writeDataFileEntry(
+                      entry, deltaSnapshotIds, spec, sourcePrefix, 
targetPrefix, writer))
+          .reduce(new RewriteResult<>(), RewriteResult::append);
+    }
+  }
+
+  /**
+   * Rewrite a delete manifest, replacing path references.
+   *
+   * @param manifestFile source delete manifest to rewrite
+   * @param outputFile output file to rewrite manifest file to
+   * @param io file io
+   * @param format format of the manifest file
+   * @param specsById map of partition specs by id
+   * @param sourcePrefix source prefix that will be replaced
+   * @param targetPrefix target prefix that will replace it
+   * @param stagingLocation staging location for rewritten files (referred 
delete file will be
+   *     rewritten here)
+   * @return a copy plan of content files in the manifest that was rewritten
+   * @deprecated since 1.9.0, will be removed in 2.0.0

Review Comment:
   same



##########
core/src/main/java/org/apache/iceberg/RewriteTablePathUtil.java:
##########
@@ -404,8 +498,10 @@ private static RewriteResult<DeleteFile> 
writeDeleteFileEntry(
                 .withMetrics(metricsWithTargetPath)
                 .build();
         appendEntryWithFile(entry, writer, movedFile);
-        // keep deleted position delete entries but exclude them from copyPlan
-        if (entry.isLive()) {
+        // keep the following entries in metadata but exclude them from 
copyPlan
+        // 1) deleted position delete files
+        // 2) entries not changed by snapshots within the range

Review Comment:
   same



##########
core/src/main/java/org/apache/iceberg/RewriteTablePathUtil.java:
##########
@@ -374,15 +465,18 @@ private static RewriteResult<DataFile> writeDataFileEntry(
     DataFile newDataFile =
         
DataFiles.builder(spec).copy(entry.file()).withPath(targetDataFilePath).build();
     appendEntryWithFile(entry, writer, newDataFile);
-    // keep deleted data file entries but exclude them from copyPlan
-    if (entry.isLive()) {
+    // keep the following entries in metadata but exclude them from copyPlan
+    // 1) deleted data files
+    // 2) entries not changed by snapshots within the range

Review Comment:
   within the range => in the set of snapshotIds



##########
core/src/main/java/org/apache/iceberg/RewriteTablePathUtil.java:
##########
@@ -296,7 +296,9 @@ private static List<ManifestFile> 
manifestFilesInSnapshot(FileIO io, Snapshot sn
    * @param sourcePrefix source prefix that will be replaced
    * @param targetPrefix target prefix that will replace it
    * @return a copy plan of content files in the manifest that was rewritten
+   * @deprecated since 1.9.0, will be removed in 2.0.0

Review Comment:
   deprecated since 1.10, removed 1.11



##########
core/src/main/java/org/apache/iceberg/RewriteTablePathUtil.java:
##########
@@ -415,8 +511,10 @@ private static RewriteResult<DeleteFile> 
writeDeleteFileEntry(
       case EQUALITY_DELETES:
         DeleteFile eqDeleteFile = newEqualityDeleteEntry(file, spec, 
sourcePrefix, targetPrefix);
         appendEntryWithFile(entry, writer, eqDeleteFile);
-        // keep deleted equality delete entries but exclude them from copyPlan
-        if (entry.isLive()) {
+        // keep the following entries in metadata but exclude them from 
copyPlan
+        // 1) deleted equality delete files
+        // 2) entries not changed by snapshots within the range

Review Comment:
   same



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org
For additional commands, e-mail: issues-h...@iceberg.apache.org

Reply via email to