ajantha-bhat commented on code in PR #9822:
URL: https://github.com/apache/iceberg/pull/9822#discussion_r1505826283


##########
core/src/test/java/org/apache/iceberg/TestOverwrite.java:
##########
@@ -312,4 +313,154 @@ public void testValidatedOverwriteWithAppendSuccess() {
     Assert.assertEquals(
         "Should not create a new snapshot", baseId, latestSnapshot(base, 
branch).snapshotId());
   }
+
+  @Test
+  public void testOverwriteWithEmptyTableAppendManifest() throws IOException {
+    Assert.assertEquals("Table should start empty", 0, 
listManifestFiles().size());
+
+    TableMetadata base = readMetadata();
+    Assert.assertNull("Should not have a current snapshot", 
latestSnapshot(base, branch));

Review Comment:
   Please use `AssertJ` framework.
   https://iceberg.apache.org/contribute/#assertj



##########
core/src/test/java/org/apache/iceberg/TestOverwrite.java:
##########
@@ -312,4 +313,154 @@ public void testValidatedOverwriteWithAppendSuccess() {
     Assert.assertEquals(
         "Should not create a new snapshot", baseId, latestSnapshot(base, 
branch).snapshotId());
   }
+
+  @Test
+  public void testOverwriteWithEmptyTableAppendManifest() throws IOException {
+    Assert.assertEquals("Table should start empty", 0, 
listManifestFiles().size());
+
+    TableMetadata base = readMetadata();
+    Assert.assertNull("Should not have a current snapshot", 
latestSnapshot(base, branch));
+    Assert.assertEquals("Last sequence number should be 0", 0, 
base.lastSequenceNumber());
+
+    ManifestFile manifest = writeManifest(FILE_A, FILE_B);
+
+    commit(super.table, super.table.newOverwrite().appendManifest(manifest), 
branch);
+
+    Snapshot committedSnapshot = latestSnapshot(super.table, branch);
+    Assert.assertNotNull("Should create a snapshot", 
latestSnapshot(super.table, branch));
+    V1Assert.assertEquals(
+        "Last sequence number should be 0", 0, 
super.table.ops().current().lastSequenceNumber());
+    V2Assert.assertEquals(
+        "Last sequence number should be 1", 1, 
super.table.ops().current().lastSequenceNumber());
+    Assert.assertEquals(
+        "Should create 1 manifest for initial write",
+        1,
+        committedSnapshot.allManifests(super.table.io()).size());
+
+    long overwriteId = committedSnapshot.snapshotId();
+    validateManifest(
+        committedSnapshot.allManifests(table.io()).get(0),
+        dataSeqs(1L, 1L),
+        fileSeqs(1L, 1L),
+        ids(overwriteId, overwriteId),
+        files(FILE_A, FILE_B),
+        statuses(Status.ADDED, Status.ADDED));
+
+    // validate that the metadata summary is correct when using appendManifest
+    Assert.assertEquals(
+        "Summary metadata should include 2 added files",
+        "2",
+        committedSnapshot.summary().get("added-data-files"));
+  }
+
+  @Test
+  public void testOverwriteWithAddFileAndAppendManifest() throws IOException {
+    // merge all manifests for this test
+    super.table.updateProperties().set("commit.manifest.min-count-to-merge", 
"1").commit();
+
+    Assert.assertEquals("Table should start empty", 0, 
listManifestFiles().size());
+
+    TableMetadata base = readMetadata();
+    Assert.assertNull("Should not have a current snapshot", 
latestSnapshot(base, branch));
+    Assert.assertEquals("Last sequence number should be 0", 0, 
base.lastSequenceNumber());
+
+    ManifestFile manifest = writeManifest(FILE_A, FILE_B);
+
+    commit(
+        super.table,
+        
super.table.newOverwrite().addFile(FILE_C).addFile(FILE_D).appendManifest(manifest),
+        branch);
+
+    Snapshot committedSnapshot = latestSnapshot(super.table, branch);
+    Assert.assertNotNull("Should create a snapshot", 
latestSnapshot(super.table, branch));
+    V1Assert.assertEquals(
+        "Last sequence number should be 0", 0, 
super.table.ops().current().lastSequenceNumber());
+    V2Assert.assertEquals(
+        "Last sequence number should be 1", 1, 
super.table.ops().current().lastSequenceNumber());
+
+    long snapshotId = committedSnapshot.snapshotId();
+
+    Assert.assertEquals(
+        "Should create 1 merged manifest",
+        1,
+        committedSnapshot.allManifests(super.table.io()).size());
+
+    validateManifest(
+        committedSnapshot.allManifests(super.table.io()).get(0),
+        dataSeqs(1L, 1L, 1L, 1L),
+        fileSeqs(1L, 1L, 1L, 1L),
+        ids(snapshotId, snapshotId, snapshotId, snapshotId),
+        files(FILE_C, FILE_D, FILE_A, FILE_B),
+        statuses(Status.ADDED, Status.ADDED, Status.ADDED, Status.ADDED));
+  }
+
+  @Test
+  public void testOverwriteWithDeleteFileAndAppendManifest() throws 
IOException {
+    // merge all manifests for this test
+    super.table.updateProperties().set("commit.manifest.min-count-to-merge", 
"1").commit();
+
+    Assert.assertNull("Should not have a current snapshot", 
latestSnapshot(readMetadata(), branch));
+    Assert.assertEquals("Table should start empty", 0, 
listManifestFiles().size());
+
+    commit(
+        super.table,
+        super.table.newOverwrite().addFile(FILE_C).addFile(FILE_D),
+        branch); // Added data file , FILE_C and FILE_D
+
+    Assert.assertNotNull("Should create a snapshot", 
latestSnapshot(super.table, branch));
+    V1Assert.assertEquals(
+        "Last sequence number should be 0", 0, 
super.table.ops().current().lastSequenceNumber());
+    V2Assert.assertEquals(
+        "Last sequence number should be 1", 1, 
super.table.ops().current().lastSequenceNumber());
+
+    Snapshot commitBefore = latestSnapshot(super.table, branch);
+    long baseId = commitBefore.snapshotId();
+    validateSnapshot(null, commitBefore, 1, FILE_C, FILE_D);
+
+    Assert.assertEquals(
+        "Should create 1 manifest for initial write",
+        1,
+        commitBefore.allManifests(table.io()).size());
+    ManifestFile initialManifest = 
commitBefore.allManifests(table.io()).get(0);
+    validateManifest(
+        initialManifest,
+        dataSeqs(1L, 1L),
+        fileSeqs(1L, 1L),
+        ids(baseId, baseId),
+        files(FILE_C, FILE_D),
+        statuses(Status.ADDED, Status.ADDED));
+
+    ManifestFile manifest = writeManifest(FILE_A, FILE_B); // Added Data File 
FILE_A, FILE_B
+    commit(
+        super.table,
+        super.table
+            .newOverwrite()
+            .appendManifest(manifest)
+            .deleteFile(FILE_C), // Delete Data File FILE_C
+        branch);
+
+    V1Assert.assertEquals(
+        "Last sequence number should be 0", 0, 
super.table.ops().current().lastSequenceNumber());
+    V2Assert.assertEquals(
+        "Last sequence number should be 2", 2, 
super.table.ops().current().lastSequenceNumber());
+
+    Snapshot committedAfter = latestSnapshot(super.table, branch);
+    Assert.assertEquals(
+        "Should contain 1 merged manifest for second write",
+        1,
+        committedAfter.allManifests(super.table.io()).size());
+    ManifestFile newManifest = 
committedAfter.allManifests(super.table.io()).get(0);
+    Assert.assertNotEquals(
+        "Should not contain manifest from initial write", initialManifest, 
newManifest);
+
+    long snapshotId = committedAfter.snapshotId();
+
+    validateManifest(
+        newManifest,
+        dataSeqs(2L, 2L, 1L, 1L),
+        fileSeqs(2L, 2L, 1L, 1L),
+        ids(snapshotId, snapshotId, snapshotId, baseId),
+        concat(files(FILE_A, FILE_B), files(initialManifest)), // FILE_A, 
FILE_B, FILE_C, FILE_D
+        statuses(Status.ADDED, Status.ADDED, Status.DELETED, 
Status.EXISTING)); // FILE_C - DELETED
+  }

Review Comment:
   Can we add more testcase which throws exception when delete manifest is 
added etc?



##########
api/src/main/java/org/apache/iceberg/OverwriteFiles.java:
##########
@@ -154,4 +154,14 @@ public interface OverwriteFiles extends 
SnapshotUpdate<OverwriteFiles> {
    * @return this for method chaining
    */
   OverwriteFiles validateNoConflictingDeletes();
+  /**
+   * Append a {@link ManifestFile} to the table. Similar to {@link
+   * AppendFiles#appendManifest(ManifestFile)} Having this with {@link 
OverwriteFiles} too, helps to
+   * Use manifestFile with data files. It will put less pressure on memory in 
case of thousands of
+   * data files together.
+   *
+   * @param file a manifest file
+   * @return this for method chaining
+   */

Review Comment:
   I am also thinking we should call it as `appendDataManifest`, since it 
doesn't support delete manifests. 



##########
api/src/main/java/org/apache/iceberg/OverwriteFiles.java:
##########
@@ -154,4 +154,14 @@ public interface OverwriteFiles extends 
SnapshotUpdate<OverwriteFiles> {
    * @return this for method chaining
    */
   OverwriteFiles validateNoConflictingDeletes();
+  /**
+   * Append a {@link ManifestFile} to the table. Similar to {@link
+   * AppendFiles#appendManifest(ManifestFile)} Having this with {@link 
OverwriteFiles} too, helps to
+   * Use manifestFile with data files. It will put less pressure on memory in 
case of thousands of
+   * data files together.
+   *
+   * @param file a manifest file
+   * @return this for method chaining
+   */

Review Comment:
   I think cases and grammar has to be fixed in the above java doc.
   
   How about copy pasting the java doc from `AppendFiles#appendManifest()`?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org
For additional commands, e-mail: issues-h...@iceberg.apache.org

Reply via email to