danielcweeks commented on code in PR #11481:
URL: https://github.com/apache/iceberg/pull/11481#discussion_r1831797637


##########
data/src/test/java/org/apache/iceberg/io/TestDVWriters.java:
##########
@@ -100,6 +114,211 @@ public void testBasicDVs() throws IOException {
         .contains(dataFile1.location())
         .contains(dataFile2.location());
     assertThat(result.referencesDataFiles()).isTrue();
+
+    // commit the deletes
+    commit(result);
+
+    // verify correctness
+    assertRows(ImmutableList.of(toRow(11, "aaa"), toRow(12, "aaa")));
+  }
+
+  @TestTemplate
+  public void testRewriteDVs() throws IOException {
+    assumeThat(formatVersion).isGreaterThanOrEqualTo(3);
+
+    FileWriterFactory<T> writerFactory = newWriterFactory(table.schema());
+
+    // add a data file with 3 data records
+    List<T> rows = ImmutableList.of(toRow(1, "aaa"), toRow(2, "aaa"), toRow(3, 
"aaa"));
+    DataFile dataFile = writeData(writerFactory, parquetFileFactory, rows, 
table.spec(), null);
+    table.newFastAppend().appendFile(dataFile).commit();
+
+    // write the first DV
+    DVFileWriter dvWriter1 =
+        new BaseDVFileWriter(fileFactory, new PreviousDeleteLoader(table, 
ImmutableMap.of()));
+    dvWriter1.delete(dataFile.location(), 1L, table.spec(), null);
+    dvWriter1.close();
+
+    // validate the writer result
+    DeleteWriteResult result1 = dvWriter1.result();
+    assertThat(result1.deleteFiles()).hasSize(1);
+    
assertThat(result1.referencedDataFiles()).containsOnly(dataFile.location());
+    assertThat(result1.referencesDataFiles()).isTrue();
+    assertThat(result1.rewrittenDeleteFiles()).isEmpty();
+
+    // commit the first DV
+    commit(result1);
+    
assertThat(table.currentSnapshot().addedDeleteFiles(table.io())).hasSize(1);
+    
assertThat(table.currentSnapshot().removedDeleteFiles(table.io())).isEmpty();
+
+    // verify correctness after committing the first DV
+    assertRows(ImmutableList.of(toRow(1, "aaa"), toRow(3, "aaa")));
+
+    // write the second DV, merging with the first one
+    DeleteFile dv1 = Iterables.getOnlyElement(result1.deleteFiles());
+    DVFileWriter dvWriter2 =
+        new BaseDVFileWriter(
+            fileFactory,
+            new PreviousDeleteLoader(table, 
ImmutableMap.of(dataFile.location(), dv1)));
+    dvWriter2.delete(dataFile.location(), 2L, table.spec(), null);
+    dvWriter2.close();
+
+    // validate the writer result
+    DeleteWriteResult result2 = dvWriter2.result();
+    assertThat(result2.deleteFiles()).hasSize(1);
+    
assertThat(result2.referencedDataFiles()).containsOnly(dataFile.location());
+    assertThat(result2.referencesDataFiles()).isTrue();
+    assertThat(result2.rewrittenDeleteFiles()).hasSize(1);
+
+    // replace DVs
+    commit(result2);
+    
assertThat(table.currentSnapshot().addedDeleteFiles(table.io())).hasSize(1);
+    
assertThat(table.currentSnapshot().removedDeleteFiles(table.io())).hasSize(1);
+
+    // verify correctness after replacing DVs
+    assertRows(ImmutableList.of(toRow(1, "aaa")));
+  }
+
+  @TestTemplate
+  public void testRewriteFileScopedPositionDeletes() throws IOException {
+    assumeThat(formatVersion).isEqualTo(2);
+
+    FileWriterFactory<T> writerFactory = newWriterFactory(table.schema());
+
+    // add a data file with 3 records
+    List<T> rows = ImmutableList.of(toRow(1, "aaa"), toRow(2, "aaa"), toRow(3, 
"aaa"));
+    DataFile dataFile = writeData(writerFactory, parquetFileFactory, rows, 
table.spec(), null);
+    table.newFastAppend().appendFile(dataFile).commit();
+
+    // add a file-scoped position delete file
+    DeleteFile deleteFile =
+        writePositionDeletes(writerFactory, 
ImmutableList.of(Pair.of(dataFile.location(), 0L)));
+    table.newRowDelta().addDeletes(deleteFile).commit();
+
+    // verify correctness after adding the file-scoped position delete
+    assertRows(ImmutableList.of(toRow(2, "aaa"), toRow(3, "aaa")));
+
+    // upgrade the table to V3 to enable DVs
+    table.updateProperties().set(TableProperties.FORMAT_VERSION, "3").commit();
+
+    // write a DV, merging with the file-scoped position delete
+    DVFileWriter dvWriter =
+        new BaseDVFileWriter(
+            fileFactory,
+            new PreviousDeleteLoader(table, 
ImmutableMap.of(dataFile.location(), deleteFile)));
+    dvWriter.delete(dataFile.location(), 1L, table.spec(), null);
+    dvWriter.close();
+
+    // validate the writer result
+    DeleteWriteResult result = dvWriter.result();
+    assertThat(result.deleteFiles()).hasSize(1);
+    assertThat(result.referencedDataFiles()).containsOnly(dataFile.location());
+    assertThat(result.referencesDataFiles()).isTrue();
+    assertThat(result.rewrittenDeleteFiles()).hasSize(1);
+
+    // replace the position delete file with the DV
+    commit(result);
+    
assertThat(table.currentSnapshot().addedDeleteFiles(table.io())).hasSize(1);
+    
assertThat(table.currentSnapshot().removedDeleteFiles(table.io())).hasSize(1);
+
+    // verify correctness
+    assertRows(ImmutableList.of(toRow(3, "aaa")));
+  }
+
+  @TestTemplate
+  public void testApplyPartitionScopedPositionDeletes() throws IOException {
+    assumeThat(formatVersion).isEqualTo(2);
+
+    FileWriterFactory<T> writerFactory = newWriterFactory(table.schema());
+
+    // add the first data file with 3 records
+    List<T> rows1 = ImmutableList.of(toRow(1, "aaa"), toRow(2, "aaa"), 
toRow(3, "aaa"));
+    DataFile dataFile1 = writeData(writerFactory, parquetFileFactory, rows1, 
table.spec(), null);
+    table.newFastAppend().appendFile(dataFile1).commit();
+
+    // add the second data file with 3 records
+    List<T> rows2 = ImmutableList.of(toRow(4, "aaa"), toRow(5, "aaa"), 
toRow(6, "aaa"));
+    DataFile dataFile2 = writeData(writerFactory, parquetFileFactory, rows2, 
table.spec(), null);
+    table.newFastAppend().appendFile(dataFile2).commit();
+
+    // add a position delete file with deletes for both data files
+    DeleteFile deleteFile =
+        writePositionDeletes(
+            writerFactory,
+            ImmutableList.of(
+                Pair.of(dataFile1.location(), 0L),
+                Pair.of(dataFile1.location(), 1L),
+                Pair.of(dataFile2.location(), 0L)));
+    table.newRowDelta().addDeletes(deleteFile).commit();
+
+    // verify correctness with the position delete file
+    assertRows(ImmutableList.of(toRow(3, "aaa"), toRow(5, "aaa"), toRow(6, 
"aaa")));
+
+    // upgrade the table to V3 to enable DVs
+    table.updateProperties().set(TableProperties.FORMAT_VERSION, "3").commit();
+
+    // write a DV, applying old positions but keeping the position delete file 
in place
+    DVFileWriter dvWriter =
+        new BaseDVFileWriter(
+            fileFactory,
+            new PreviousDeleteLoader(table, 
ImmutableMap.of(dataFile2.location(), deleteFile)));
+    dvWriter.delete(dataFile2.location(), 1L, table.spec(), null);
+    dvWriter.close();
+
+    // validate the writer result
+    DeleteWriteResult result = dvWriter.result();
+    assertThat(result.deleteFiles()).hasSize(1);
+    
assertThat(result.referencedDataFiles()).containsOnly(dataFile2.location());
+    assertThat(result.referencesDataFiles()).isTrue();
+    assertThat(result.rewrittenDeleteFiles()).isEmpty();
+    DeleteFile dv = Iterables.getOnlyElement(result.deleteFiles());
+
+    // commit the DV, ensuring the position delete file remains
+    commit(result);
+    
assertThat(table.currentSnapshot().addedDeleteFiles(table.io())).hasSize(1);
+    
assertThat(table.currentSnapshot().removedDeleteFiles(table.io())).isEmpty();
+
+    // verify correctness with DVs and position delete files
+    assertRows(ImmutableList.of(toRow(3, "aaa"), toRow(6, "aaa")));
+
+    // verify the position delete file applies only to the data file without 
the DV

Review Comment:
   Love the context comments.  This makes it very easy to follow what's going 
on in the test. 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org
For additional commands, e-mail: issues-h...@iceberg.apache.org

Reply via email to