amogh-jahagirdar commented on code in PR #10755: URL: https://github.com/apache/iceberg/pull/10755#discussion_r1904445943
########## core/src/test/java/org/apache/iceberg/catalog/CatalogTests.java: ########## @@ -1284,6 +1284,62 @@ public void testUpdateTableSpecThenRevert() { assertThat(table.spec()).as("Loaded table should have expected spec").isEqualTo(TABLE_SPEC); } + @ParameterizedTest + @ValueSource(booleans = {true, false}) + public void testRemoveUnusedSpec(boolean withBranch) { + String branch = "test"; + C catalog = catalog(); + + if (requiresNamespaceCreate()) { + catalog.createNamespace(NS); + } + + Table table = + catalog + .buildTable(TABLE, SCHEMA) + .withPartitionSpec(SPEC) + .withProperty(TableProperties.GC_ENABLED, "true") + .create(); + PartitionSpec spec = table.spec(); + // added a file to trigger snapshot expiration + table.newFastAppend().appendFile(FILE_A).commit(); + if (withBranch) { + table.manageSnapshots().createBranch(branch).commit(); + } + table.updateSpec().addField(Expressions.bucket("data", 16)).commit(); + table.updateSpec().removeField(Expressions.bucket("data", 16)).commit(); + table.updateSpec().addField("data").commit(); + assertThat(table.specs()).as("Should have 3 total specs").hasSize(3); + PartitionSpec current = table.spec(); + table.expireSnapshots().cleanExpiredMetadata(true).commit(); + + Table loaded = catalog.loadTable(TABLE); + assertThat(loaded.specs().values()).containsExactlyInAnyOrder(spec, current); + + // add a data file with current spec and remove the old data file + table.newDelete().deleteFile(FILE_A).commit(); + DataFile anotherFile = + DataFiles.builder(current) + .withPath("/path/to/data-b.parquet") + .withFileSizeInBytes(10) + .withPartitionPath("id_bucket=0/data=123") // easy way to set partition data for now + .withRecordCount(2) // needs at least one record or else metrics will filter it out Review Comment: Non-blocking nit: Sorry for failing to catch this earlier, but these comments don't seem particularly useful, could we just remove them? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org