vaultah commented on code in PR #13720:
URL: https://github.com/apache/iceberg/pull/13720#discussion_r2286620778


##########
spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/actions/TestRewriteTablePathsAction.java:
##########
@@ -1209,6 +1213,191 @@ public void testNestedDirectoryStructurePreservation() 
throws Exception {
     assertThat(targetPath2).startsWith(targetTableLocation());
   }
 
+  @Test
+  public void testFullRewriteUpdatesAllManifestLengthsInManifestList(
+      @TempDir Path rootTargetLocation) throws Exception {
+    String location = newTableLocation();
+    Table sourceTable = createTableWithSnapshots(location, 10);
+
+    addAnyPositionDelete(
+        sourceTable, removePrefix(sourceTable.location() + 
"/data/deeply/nested/deletes.parquet"));
+
+    Map<String, Long> manifestSizesBeforeRewrite =
+        sourceTable.currentSnapshot().allManifests(sourceTable.io()).stream()
+            .collect(Collectors.toMap(m -> fileName(m.path()), m -> 
m.length()));
+
+    // Rewrite table metadata to a location that's much longer than the 
original in order
+    // to make manifests larger
+    String targetLocation = toAbsolute(rootTargetLocation) + 
generateLongNestedPath(25);
+    RewriteTablePath.Result result =
+        actions()
+            .rewriteTablePath(sourceTable)
+            .rewriteLocationPrefix(newTableLocation(), targetLocation)
+            .execute();
+
+    // 1 + 11 JSON metadata files, 11 snapshots, 11 manifests, 10 data files, 
1 delete file
+    checkFileNum(12, 11, 11, 45, result);
+    copyTableFiles(result);
+
+    Table targetTable = TABLES.load(targetLocation);
+
+    // We have rewritten all 11 snapshots. Make sure all sizes were correctly 
updated
+    // across all manifest lists
+    assertThat(targetTable.snapshots())
+        .allSatisfy(
+            snapshot ->
+                assertThat(snapshot.allManifests(targetTable.io()))
+                    .allSatisfy(
+                        manifest -> {
+                          String manifestName = fileName(manifest.path());
+                          assertThat(manifest.length())
+                              
.isNotEqualTo(manifestSizesBeforeRewrite.get(manifestName));
+                        })
+                    .allSatisfy(
+                        manifest -> {
+                          
assertThat(targetTable.io().newInputFile(manifest.path()).getLength())
+                              .isEqualTo(manifest.length());
+                        }));
+  }
+
+  @Test
+  public void testPartialRewriteUpdatesDataManifestLengthInManifestList(
+      @TempDir Path rootTargetLocation) throws Exception {
+    String location = newTableLocation();
+    Table sourceTable = createTableWithSnapshots(location, 10);
+
+    Map<String, Long> manifestSizesBeforeRewrite =
+        sourceTable.currentSnapshot().allManifests(sourceTable.io()).stream()
+            .collect(Collectors.toMap(m -> fileName(m.path()), m -> 
m.length()));
+
+    // Rewrite just the latest table version to a location that's much longer 
than
+    // the original in order to make manifests larger
+    String targetLocation = toAbsolute(rootTargetLocation) + 
generateLongNestedPath(25);
+    RewriteTablePath.Result result =
+        actions()
+            .rewriteTablePath(sourceTable)
+            .rewriteLocationPrefix(newTableLocation(), targetLocation)
+            .startVersion("v10.metadata.json")
+            .execute();
+
+    // 1 metadata JSON file, 1 snapshot, 10 manifests, 1 data file
+    checkFileNum(1, 1, 10, 13, result);
+    copyTableFiles(result);
+
+    Table targetTable = TABLES.load(targetLocation);
+    Snapshot lastSnapshot = targetTable.currentSnapshot();
+
+    List<ManifestFile> allManifests = 
lastSnapshot.allManifests(targetTable.io());
+    ManifestFile rewrittenManifest =
+        Iterables.getOnlyElement(
+            allManifests.stream()
+                .filter(manifest -> manifest.snapshotId() == 
lastSnapshot.snapshotId())
+                .collect(Collectors.toList()));
+
+    // We have rewritten all manifests in one snapshot. Make sure all sizes 
were correctly
+    // updated in the manifest list
+    assertThat(targetTable.currentSnapshot().allManifests(targetTable.io()))
+        .allSatisfy(
+            manifest -> {
+              String manifestName = fileName(manifest.path());
+              assertThat(manifest.length())
+                  .isNotEqualTo(manifestSizesBeforeRewrite.get(manifestName));
+            })
+        .allSatisfy(
+            manifest -> {
+              
assertThat(targetTable.io().newInputFile(manifest.path()).getLength())
+                  .isEqualTo(manifest.length());
+            });
+  }
+
+  @Test
+  public void testPartialRewriteUpdatesDeleteManifestLengthInManifestList(
+      @TempDir Path rootTargetLocation) throws Exception {
+    String location = newTableLocation();
+    Table sourceTable = createTableWithSnapshots(location, 5);
+
+    // Add two more snapshots with just position deletes
+    addAnyPositionDelete(
+        sourceTable,
+        removePrefix(sourceTable.location() + 
"/data/deeply/nested/deletes-1.parquet"));
+    addAnyPositionDelete(
+        sourceTable,
+        removePrefix(sourceTable.location() + 
"/data/deeply/nested/deletes-2.parquet"));
+
+    Map<String, Long> manifestSizesBeforeRewrite =
+        sourceTable.currentSnapshot().allManifests(sourceTable.io()).stream()
+            .collect(Collectors.toMap(m -> fileName(m.path()), m -> 
m.length()));
+
+    // Rewrite just the latest table version to a location that's much longer 
than
+    // the original in order to make manifests larger
+    String targetLocation = toAbsolute(rootTargetLocation) + 
generateLongNestedPath(25);
+    RewriteTablePath.Result result =
+        actions()
+            .rewriteTablePath(sourceTable)
+            .rewriteLocationPrefix(newTableLocation(), targetLocation)
+            .startVersion("v7.metadata.json")
+            .execute();
+
+    // 1 metadata JSON file, 1 snapshot, 5 + 2 manifests, 1 delete file
+    checkFileNum(1, 1, 7, 10, result);
+    copyTableFiles(result);
+
+    Table targetTable = TABLES.load(targetLocation);
+    Snapshot lastSnapshot = targetTable.currentSnapshot();
+
+    List<ManifestFile> allManifests = 
lastSnapshot.allManifests(targetTable.io());
+    ManifestFile rewrittenManifest =
+        Iterables.getOnlyElement(
+            allManifests.stream()
+                .filter(manifest -> manifest.snapshotId() == 
lastSnapshot.snapshotId())
+                .collect(Collectors.toList()));
+
+    // We have rewritten all manifests in one snapshot. Make sure all sizes 
were correctly
+    // updated in the manifest list
+    assertThat(targetTable.currentSnapshot().allManifests(targetTable.io()))
+        .allSatisfy(
+            manifest -> {
+              String manifestName = fileName(manifest.path());
+              assertThat(manifest.length())
+                  .isNotEqualTo(manifestSizesBeforeRewrite.get(manifestName));
+            })
+        .allSatisfy(
+            manifest -> {
+              
assertThat(targetTable.io().newInputFile(manifest.path()).getLength())
+                  .isEqualTo(manifest.length());
+            });
+  }
+
+  protected static String generateLongNestedPath(int depth) {
+    StringBuilder pathBuilder = new StringBuilder();
+    for (int i = 1; i <= depth; i++) {
+      pathBuilder.append(String.format("/%03d", i));
+    }
+    pathBuilder.append("/");
+    return pathBuilder.toString();
+  }
+
+  protected void addAnyPositionDelete(Table targetTable, String path) throws 
Exception {

Review Comment:
   Renamed.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to