nastra commented on code in PR #9724: URL: https://github.com/apache/iceberg/pull/9724#discussion_r1499380088
########## spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/actions/TestRewriteDataFilesAction.java: ########## @@ -334,6 +344,208 @@ public void testBinPackWithDeletes() { assertThat(actualRecords).as("7 rows are removed").hasSize(total - 7); } + @Test + public void testRemoveDanglingEqualityDeletesDropped() { + Table table = + TABLES.create( + SCHEMA, + SPEC, + Collections.singletonMap(TableProperties.FORMAT_VERSION, "2"), + tableLocation); + // write partition 1 data sequence 1 + List<ThreeColumnRecord> records1 = + Lists.newArrayList( + new ThreeColumnRecord(1, null, "AAAA"), new ThreeColumnRecord(1, "BBBBBBBBBB", "BBBB")); + writeRecords(records1); + + // write partition 2 data sequence 2 + List<ThreeColumnRecord> records2 = + Lists.newArrayList( + new ThreeColumnRecord(1, "CCCCCCCCCC", "CCCC"), + new ThreeColumnRecord(1, "DDDDDDDDDD", "DDDD")); + writeRecords(records2); + shouldHaveFiles(table, 4); + + // write equality deletes sequence 3 + writeEqDeleteRecord(table, "c1", 1, "c3", "AAAA"); + + List<Object[]> expectedRecords = currentData(); + shouldHaveSnapshots(table, 3); + + Result rewriteResult = + actions() + .rewriteDataFiles(table) + .option(SizeBasedFileRewriter.REWRITE_ALL, "true") + .option( + RewriteDataFiles.REMOVE_DANGLING_DELETES, + RemoveDanglingDeletesMode.METADATA.modeName()) + .execute(); + + assertThat(rewriteResult) + .extracting( + Result::addedDataFilesCount, + Result::rewrittenDataFilesCount, + Result::removedDeleteFilesCount) + .as("Should compact 4 data files and 1 delete files into 1 result data file") + .containsExactly(1, 4, 1); + shouldHaveMinSequenceNumberInPartition(table, "data_file.partition.c1 == 1", 3); + + shouldHaveSnapshots(table, 5); + assertThat(table.currentSnapshot().summary().get("total-equality-deletes")) + .isEqualTo("0") + .as("Expect no equality delete left in other partition"); Review Comment: `.as(..)` needs to come before the final assertion, otherwise it will be ignored -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org