dramaticlly commented on code in PR #10935:
URL: https://github.com/apache/iceberg/pull/10935#discussion_r1765491165


##########
core/src/test/java/org/apache/iceberg/TestBaseIncrementalChangelogScan.java:
##########
@@ -132,6 +131,175 @@ public void testFileDeletes() {
     assertThat(t1.existingDeletes()).as("Must be no deletes").isEmpty();
   }
 
+  @TestTemplate
+  public void testRowDeletes() {
+    assumeThat(formatVersion).isEqualTo(2);
+
+    table
+        .newFastAppend()
+        .appendFile(FILE_A)
+        .appendFile(FILE_A2)
+        .appendFile(FILE_B)
+        .appendFile(FILE_C)
+        .commit();
+    Snapshot snap1 = table.currentSnapshot();
+
+    // position delete
+    table.newRowDelta().addDeletes(FILE_B_DELETES).commit();
+    Snapshot snap2 = table.currentSnapshot();
+
+    // equality delete
+    table.newRowDelta().addDeletes(FILE_C2_DELETES).commit();
+    Snapshot snap3 = table.currentSnapshot();
+
+    // mix of position and equality deletes
+    
table.newRowDelta().addDeletes(FILE_A_DELETES).addDeletes(FILE_A2_DELETES).commit();
+    Snapshot snap4 = table.currentSnapshot();
+
+    IncrementalChangelogScan scan =
+        
newScan().fromSnapshotExclusive(snap1.snapshotId()).toSnapshot(snap4.snapshotId());
+
+    List<ChangelogScanTask> tasks = plan(scan);
+
+    assertThat(tasks).as("Must have 4 tasks").hasSize(4);
+
+    DeletedRowsScanTask t1 = (DeletedRowsScanTask) tasks.get(0);
+    assertThat(t1.changeOrdinal()).as("Ordinal must match").isEqualTo(0);
+    assertThat(t1.commitSnapshotId()).as("Snapshot must 
match").isEqualTo(snap2.snapshotId());
+    assertThat(t1.file().path()).as("Data file must 
match").isEqualTo(FILE_B.path());
+    assertThat(t1.addedDeletes().get(0).path())
+        .as("Added delete file must match")
+        .isEqualTo(FILE_B_DELETES.path());
+    assertThat(t1.existingDeletes()).as("Must be no existing 
deletes").isEmpty();
+
+    DeletedRowsScanTask t2 = (DeletedRowsScanTask) tasks.get(1);
+    assertThat(t2.changeOrdinal()).as("Ordinal must match").isEqualTo(1);
+    assertThat(t2.commitSnapshotId()).as("Snapshot must 
match").isEqualTo(snap3.snapshotId());
+    assertThat(t2.file().path()).as("Data file must 
match").isEqualTo(FILE_C.path());
+    assertThat(t2.addedDeletes().get(0).path())
+        .as("Added delete file must match")
+        .isEqualTo(FILE_C2_DELETES.path());
+    assertThat(t2.existingDeletes()).as("Must be no existing 
deletes").isEmpty();
+
+    DeletedRowsScanTask t3 = (DeletedRowsScanTask) tasks.get(2);
+    assertThat(t3.changeOrdinal()).as("Ordinal must match").isEqualTo(2);
+    assertThat(t3.commitSnapshotId()).as("Snapshot must 
match").isEqualTo(snap4.snapshotId());
+    assertThat(t3.file().path()).as("Data file must 
match").isEqualTo(FILE_A2.path());
+    assertThat(t3.addedDeletes().size()).as("Number of added delete files must 
match").isEqualTo(2);
+    assertThat(t3.addedDeletes().get(0).path())
+        .as("Added delete file must match")
+        .isEqualTo(FILE_A2_DELETES.path());
+    assertThat(t3.addedDeletes().get(1).path())
+        .as("Added delete file must match")
+        .isEqualTo(FILE_A_DELETES.path());
+    assertThat(t3.existingDeletes()).as("Must be no existing 
deletes").isEmpty();
+
+    DeletedRowsScanTask t4 = (DeletedRowsScanTask) tasks.get(3);
+    assertThat(t4.changeOrdinal()).as("Ordinal must match").isEqualTo(2);
+    assertThat(t4.commitSnapshotId()).as("Snapshot must 
match").isEqualTo(snap4.snapshotId());
+    assertThat(t4.file().path()).as("Data file must 
match").isEqualTo(FILE_A.path());
+    assertThat(t4.addedDeletes().size()).as("Number of added delete files must 
match").isEqualTo(2);
+    assertThat(t4.addedDeletes().get(0).path())
+        .as("Added delete file must match")
+        .isEqualTo(FILE_A2_DELETES.path());
+    assertThat(t4.addedDeletes().get(1).path())
+        .as("Added delete file must match")
+        .isEqualTo(FILE_A_DELETES.path());
+    assertThat(t4.existingDeletes()).as("Must be no existing 
deletes").isEmpty();

Review Comment:
   I believe this is the core of the test I am looking for! I think there's 
some feature we can leverage in assertJ so I took the stub to organize this in 
a little different way.
   
   I do notice that the we use the file path instead of object itself for 
asserting equality on data and delete files,  probably because once added to 
the table, its data and file sequence number are inherited from manifest entry. 
I think we can probably reuse this `FILE_COMPARISON_CONFIG` from another class 
[`TestManifestReader`](https://github.com/apache/iceberg/blob/5ea78e3fbe5d8c9c846a1b86bcd2b77d13a31acd/core/src/test/java/org/apache/iceberg/TestManifestReader.java#L40)
   
   ```java
     private static final RecursiveComparisonConfiguration 
FILE_COMPARISON_CONFIG =
         RecursiveComparisonConfiguration.builder()
             .withIgnoredFields(
                 "dataSequenceNumber", "fileOrdinal", "fileSequenceNumber", 
"fromProjectionPos")
             .build();
   ```
   
   complete code 
   - group assertion around changelogScanTask
   - ensure all are actually instance of DeletedRowsScanTask and assert on its 
data and existing/added delete files
   - leverage FILE_COMPARISON_CONFIG for asserting equality
   ```java
       assertThat(tasks)
           .extracting(ChangelogScanTask::changeOrdinal)
           .as("Ordinal must match")
           .containsExactly(0, 1, 2, 2);
   
       assertThat(tasks)
           .extracting(ChangelogScanTask::commitSnapshotId)
           .as("Snapshot must match")
           .containsExactly(
               snap2.snapshotId(), snap3.snapshotId(), snap4.snapshotId(), 
snap4.snapshotId());
   
       assertThat(tasks)
           .extracting(ChangelogScanTask::operation)
           .as("Operation must match")
           .containsExactly(
               ChangelogOperation.DELETE,
               ChangelogOperation.DELETE,
               ChangelogOperation.DELETE,
               ChangelogOperation.DELETE);
   
       List<DeletedRowsScanTask> deletedRowsScanTasks =
           tasks.stream()
               .filter(t -> t instanceof DeletedRowsScanTask)
               .map(t -> (DeletedRowsScanTask) t)
               .collect(Collectors.toList());
       assertThat(deletedRowsScanTasks).as("Must have 4 
DeletedRowsScanTask").hasSize(4);
       assertThat(deletedRowsScanTasks)
           .extracting(DeletedRowsScanTask::existingDeletes)
           .as("Existing deletes shall be empty")
           .allMatch(List::isEmpty);
   
       DeletedRowsScanTask t1 = deletedRowsScanTasks.get(0);
       assertThat(t1.file())
           .usingRecursiveComparison(FILE_COMPARISON_CONFIG)
           .as("Data file must match")
           .isEqualTo(FILE_B);
       assertThat(t1.addedDeletes().get(0))
           .usingRecursiveComparison(FILE_COMPARISON_CONFIG)
           .as("Added delete file must match")
           .isEqualTo(FILE_B_DELETES);
   
       DeletedRowsScanTask t2 = deletedRowsScanTasks.get(1);
       assertThat(t2.file())
           .usingRecursiveComparison(FILE_COMPARISON_CONFIG)
           .as("Data file must match")
           .isEqualTo(FILE_C);
       assertThat(t2.addedDeletes().get(0))
           .usingRecursiveComparison(FILE_COMPARISON_CONFIG)
           .as("Added delete file must match")
           .isEqualTo(FILE_C2_DELETES);
   
       DeletedRowsScanTask t3 = deletedRowsScanTasks.get(2);
       assertThat(t3.file())
           .usingRecursiveComparison(FILE_COMPARISON_CONFIG)
           .as("Data file must match")
           .isEqualTo(FILE_A2);
       assertThat(t3.addedDeletes()).as("Number of added delete files must 
match").hasSize(2);
       assertThat(t3.addedDeletes().get(0))
           .usingRecursiveComparison(FILE_COMPARISON_CONFIG)
           .as("Added delete file must match")
           .isEqualTo(FILE_A2_DELETES);
       assertThat(t3.addedDeletes().get(1))
           .usingRecursiveComparison(FILE_COMPARISON_CONFIG)
           .as("Added delete file must match")
           .isEqualTo(FILE_A_DELETES);
   
       DeletedRowsScanTask t4 = deletedRowsScanTasks.get(3);
       assertThat(t4.file())
           .usingRecursiveComparison(FILE_COMPARISON_CONFIG)
           .as("Data file must match")
           .isEqualTo(FILE_A);
       assertThat(t4.addedDeletes()).as("Number of added delete files must 
match").hasSize(2);
       assertThat(t4.addedDeletes().get(0))
           .usingRecursiveComparison(FILE_COMPARISON_CONFIG)
           .as("Added delete file must match")
           .isEqualTo(FILE_A2_DELETES);
       assertThat(t4.addedDeletes().get(1))
           .usingRecursiveComparison(FILE_COMPARISON_CONFIG)
           .as("Added delete file must match")
           .isEqualTo(FILE_A_DELETES);
   ```



##########
core/src/test/java/org/apache/iceberg/TestBaseIncrementalChangelogScan.java:
##########
@@ -132,6 +131,175 @@ public void testFileDeletes() {
     assertThat(t1.existingDeletes()).as("Must be no deletes").isEmpty();
   }
 
+  @TestTemplate
+  public void testRowDeletes() {
+    assumeThat(formatVersion).isEqualTo(2);
+
+    table
+        .newFastAppend()
+        .appendFile(FILE_A)
+        .appendFile(FILE_A2)
+        .appendFile(FILE_B)
+        .appendFile(FILE_C)
+        .commit();
+    Snapshot snap1 = table.currentSnapshot();
+
+    // position delete
+    table.newRowDelta().addDeletes(FILE_B_DELETES).commit();
+    Snapshot snap2 = table.currentSnapshot();
+
+    // equality delete
+    table.newRowDelta().addDeletes(FILE_C2_DELETES).commit();
+    Snapshot snap3 = table.currentSnapshot();
+
+    // mix of position and equality deletes
+    
table.newRowDelta().addDeletes(FILE_A_DELETES).addDeletes(FILE_A2_DELETES).commit();
+    Snapshot snap4 = table.currentSnapshot();
+
+    IncrementalChangelogScan scan =
+        
newScan().fromSnapshotExclusive(snap1.snapshotId()).toSnapshot(snap4.snapshotId());
+
+    List<ChangelogScanTask> tasks = plan(scan);
+
+    assertThat(tasks).as("Must have 4 tasks").hasSize(4);
+
+    DeletedRowsScanTask t1 = (DeletedRowsScanTask) tasks.get(0);
+    assertThat(t1.changeOrdinal()).as("Ordinal must match").isEqualTo(0);
+    assertThat(t1.commitSnapshotId()).as("Snapshot must 
match").isEqualTo(snap2.snapshotId());
+    assertThat(t1.file().path()).as("Data file must 
match").isEqualTo(FILE_B.path());
+    assertThat(t1.addedDeletes().get(0).path())
+        .as("Added delete file must match")
+        .isEqualTo(FILE_B_DELETES.path());
+    assertThat(t1.existingDeletes()).as("Must be no existing 
deletes").isEmpty();
+
+    DeletedRowsScanTask t2 = (DeletedRowsScanTask) tasks.get(1);
+    assertThat(t2.changeOrdinal()).as("Ordinal must match").isEqualTo(1);
+    assertThat(t2.commitSnapshotId()).as("Snapshot must 
match").isEqualTo(snap3.snapshotId());
+    assertThat(t2.file().path()).as("Data file must 
match").isEqualTo(FILE_C.path());
+    assertThat(t2.addedDeletes().get(0).path())
+        .as("Added delete file must match")
+        .isEqualTo(FILE_C2_DELETES.path());
+    assertThat(t2.existingDeletes()).as("Must be no existing 
deletes").isEmpty();
+
+    DeletedRowsScanTask t3 = (DeletedRowsScanTask) tasks.get(2);
+    assertThat(t3.changeOrdinal()).as("Ordinal must match").isEqualTo(2);
+    assertThat(t3.commitSnapshotId()).as("Snapshot must 
match").isEqualTo(snap4.snapshotId());
+    assertThat(t3.file().path()).as("Data file must 
match").isEqualTo(FILE_A2.path());
+    assertThat(t3.addedDeletes().size()).as("Number of added delete files must 
match").isEqualTo(2);
+    assertThat(t3.addedDeletes().get(0).path())
+        .as("Added delete file must match")
+        .isEqualTo(FILE_A2_DELETES.path());
+    assertThat(t3.addedDeletes().get(1).path())
+        .as("Added delete file must match")
+        .isEqualTo(FILE_A_DELETES.path());
+    assertThat(t3.existingDeletes()).as("Must be no existing 
deletes").isEmpty();
+
+    DeletedRowsScanTask t4 = (DeletedRowsScanTask) tasks.get(3);
+    assertThat(t4.changeOrdinal()).as("Ordinal must match").isEqualTo(2);
+    assertThat(t4.commitSnapshotId()).as("Snapshot must 
match").isEqualTo(snap4.snapshotId());
+    assertThat(t4.file().path()).as("Data file must 
match").isEqualTo(FILE_A.path());
+    assertThat(t4.addedDeletes().size()).as("Number of added delete files must 
match").isEqualTo(2);
+    assertThat(t4.addedDeletes().get(0).path())
+        .as("Added delete file must match")
+        .isEqualTo(FILE_A2_DELETES.path());
+    assertThat(t4.addedDeletes().get(1).path())
+        .as("Added delete file must match")
+        .isEqualTo(FILE_A_DELETES.path());
+    assertThat(t4.existingDeletes()).as("Must be no existing 
deletes").isEmpty();
+  }
+
+  @TestTemplate
+  public void testAddingAndDeletingInSameCommit() {
+    assumeThat(formatVersion).isEqualTo(2);
+
+    table.newFastAppend().appendFile(FILE_A).commit();
+    Snapshot snap1 = table.currentSnapshot();
+
+    table.newRowDelta().addRows(FILE_B).addDeletes(FILE_B_DELETES).commit();
+    Snapshot snap2 = table.currentSnapshot();
+
+    IncrementalChangelogScan scan =
+        
newScan().fromSnapshotExclusive(snap1.snapshotId()).toSnapshot(snap2.snapshotId());
+
+    List<ChangelogScanTask> tasks = plan(scan);
+
+    assertThat(tasks).as("Must have 1 tasks").hasSize(1);
+
+    AddedRowsScanTask t1 = (AddedRowsScanTask) tasks.get(0);
+    assertThat(t1.changeOrdinal()).as("Ordinal must match").isEqualTo(0);
+    assertThat(t1.commitSnapshotId()).as("Snapshot must 
match").isEqualTo(snap2.snapshotId());
+    assertThat(t1.file().path()).as("Data file must 
match").isEqualTo(FILE_B.path());
+    assertThat(t1.deletes().get(0).path())
+        .as("Delete file must match")
+        .isEqualTo(FILE_B_DELETES.path());
+  }
+
+  @TestTemplate
+  public void testDeletingRowsInDataFileWithExistingDeletes() {

Review Comment:
   love this



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org
For additional commands, e-mail: issues-h...@iceberg.apache.org

Reply via email to