RussellSpitzer commented on code in PR #9803: URL: https://github.com/apache/iceberg/pull/9803#discussion_r1508069068
########## spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/actions/TestRewriteDataFilesAction.java: ########## @@ -1463,6 +1465,148 @@ public void testSnapshotProperty() { assertThat(table.currentSnapshot().summary()).containsKeys(commitMetricsKeys); } + @Test + public void testBinPackRewriterWithSpecificUnparitionedOutputSpec() { + Table table = createTable(10); + shouldHaveFiles(table, 10); + // to be used for rewrite + int outputSpecId = table.spec().specId(); + // create multiple partition specs with different commit + table.updateSpec().addField(Expressions.truncate("c2", 2)).commit(); + + long dataSizeBefore = testDataSize(table); + long count = currentData().size(); + + RewriteDataFiles.Result result = + basicRewrite(table) + .option(RewriteDataFiles.OUTPUT_SPEC_ID, String.valueOf(outputSpecId)) + .option(SizeBasedFileRewriter.REWRITE_ALL, "true") + .binPack() + .execute(); + + assertThat(result.rewrittenBytesCount()).isEqualTo(dataSizeBefore); + assertThat(currentData().size()).isEqualTo(count); + shouldRewriteDataFilesWithPartitionSpec(table, outputSpecId); + } + + @Test + public void testBinPackRewriterWithSpecificOutputSpec() { + Table table = createTable(10); + shouldHaveFiles(table, 10); + // to be used for rewrite back to original un-partitioned spec + int outputSpecId = table.spec().specId(); + // create multiple partition specs with different commit + table.updateSpec().addField(Expressions.truncate("c2", 2)).commit(); + table.updateSpec().addField(Expressions.bucket("c3", 2)).commit(); + + long dataSizeBefore = testDataSize(table); + long count = currentData().size(); + + RewriteDataFiles.Result result = + basicRewrite(table) + .option(RewriteDataFiles.OUTPUT_SPEC_ID, String.valueOf(outputSpecId)) + .option(SizeBasedFileRewriter.REWRITE_ALL, "true") + .binPack() + .execute(); + + assertThat(result.rewrittenBytesCount()).isEqualTo(dataSizeBefore); + assertThat(currentData().size()).isEqualTo(count); + shouldRewriteDataFilesWithPartitionSpec(table, outputSpecId); + } + + @Test + public void testBinpackRewriteWithInvalidOutputSpecId() { + Table table = createTable(10); + shouldHaveFiles(table, 10); + int previousSpecId = table.spec().specId(); + // simulate multiple partition specs with different commit + table.updateSpec().addField(Expressions.truncate("c2", 2)).commit(); + table.updateSpec().addField(Expressions.bucket("c3", 2)).commit(); + Assertions.assertThatThrownBy( + () -> + actions() + .rewriteDataFiles(table) + .option(RewriteDataFiles.OUTPUT_SPEC_ID, String.valueOf(1234)) + .binPack() + .execute()) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage( + "Cannot use output spec id 1234 because the table does not contain a reference to this spec-id."); + } + + @Test + public void testSortRewriterWithSpecificOutputSpecId() { + Table table = createTable(10); + shouldHaveFiles(table, 10); + Integer previousSpecId = table.spec().specId(); + // simulate multiple partition specs with different commit + table.updateSpec().addField(Expressions.truncate("c2", 2)).commit(); + // to be used for rewrite Review Comment: I'm not sure this comment adds anything, the variable name makes it clear what you are saving this for -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org