nastra commented on code in PR #10755: URL: https://github.com/apache/iceberg/pull/10755#discussion_r1852353595
########## api/src/main/java/org/apache/iceberg/ExpireSnapshots.java: ########## @@ -118,4 +118,16 @@ public interface ExpireSnapshots extends PendingUpdate<List<Snapshot>> { * @return this for method chaining */ ExpireSnapshots cleanExpiredFiles(boolean clean); + + /** + * Allows expiration of unreachable metadata, such as partition specs as part of the operation. + * + * @param clean setting this to true will remove metadata(such as partition spec) that are no + * longer reachable by any snapshot Review Comment: ```suggestion * @param clean setting this to true will remove metadata (such as partition spec) that is no * longer reachable by any snapshot ``` ########## core/src/test/java/org/apache/iceberg/TestUpdateRequirements.java: ########## @@ -424,6 +424,89 @@ public void setDefaultPartitionSpecFailure() { .hasMessage("Requirement failed: default partition spec changed: expected id 0 != 1"); } + @Test + public void testRemovePartitionSpec() { + int defaultSpecId = 3; + when(metadata.defaultSpecId()).thenReturn(defaultSpecId); + // empty refs + when(metadata.refs()).thenReturn(ImmutableMap.of()); + + List<UpdateRequirement> requirements = + UpdateRequirements.forUpdateTable( + metadata, + ImmutableList.of(new MetadataUpdate.RemovePartitionSpecs(Sets.newHashSet(1, 2)))); + + assertThat(requirements) + .hasSize(2) + .hasOnlyElementsOfTypes( + UpdateRequirement.AssertTableUUID.class, UpdateRequirement.AssertDefaultSpecID.class); + + assertTableUUID(requirements); + + assertThat(requirements) + .element(1) + .asInstanceOf(InstanceOfAssertFactories.type(UpdateRequirement.AssertDefaultSpecID.class)) + .extracting(UpdateRequirement.AssertDefaultSpecID::specId) + .isEqualTo(defaultSpecId); + } + + @Test + public void testRemovePartitionSpecsWithRefs() { + int defaultSpecId = 3; + long snapshotId = 42L; + when(metadata.defaultSpecId()).thenReturn(defaultSpecId); + + String branch = "branch"; + SnapshotRef snapshotRef = mock(SnapshotRef.class); + when(snapshotRef.snapshotId()).thenReturn(snapshotId); + when(snapshotRef.isBranch()).thenReturn(true); + when(metadata.refs()).thenReturn(ImmutableMap.of(branch, snapshotRef)); + + List<UpdateRequirement> requirements = + UpdateRequirements.forUpdateTable( + metadata, + ImmutableList.of(new MetadataUpdate.RemovePartitionSpecs(Sets.newHashSet(1, 2)))); + + assertThat(requirements) Review Comment: this is also missing a call to `requirements.forEach(req -> req.validate(metadata));` before this line ########## core/src/test/java/org/apache/iceberg/TestUpdateRequirements.java: ########## @@ -424,6 +424,89 @@ public void setDefaultPartitionSpecFailure() { .hasMessage("Requirement failed: default partition spec changed: expected id 0 != 1"); } + @Test + public void testRemovePartitionSpec() { Review Comment: ```suggestion public void removePartitionSpec() { ``` test methods in this class don't have a `test` prefix, so would be good to keep it that way in this test class ########## core/src/test/java/org/apache/iceberg/catalog/CatalogTests.java: ########## @@ -1284,6 +1284,34 @@ public void testUpdateTableSpecThenRevert() { assertThat(table.spec()).as("Loaded table should have expected spec").isEqualTo(TABLE_SPEC); } + @Test + public void testRemoveUnusedSpec() { + C catalog = catalog(); + + if (requiresNamespaceCreate()) { + catalog.createNamespace(NS); + } + + Table table = + catalog + .buildTable(TABLE, SCHEMA) + .withPartitionSpec(SPEC) + .withProperty(TableProperties.GC_ENABLED, "true") + .create(); + PartitionSpec spec = table.spec(); + // added some file to trigger expire snapshot Review Comment: ```suggestion // add a file to trigger snapshot expiration ``` ########## core/src/test/java/org/apache/iceberg/TestUpdateRequirements.java: ########## @@ -424,6 +424,89 @@ public void setDefaultPartitionSpecFailure() { .hasMessage("Requirement failed: default partition spec changed: expected id 0 != 1"); } + @Test + public void testRemovePartitionSpec() { + int defaultSpecId = 3; + when(metadata.defaultSpecId()).thenReturn(defaultSpecId); + // empty refs + when(metadata.refs()).thenReturn(ImmutableMap.of()); + + List<UpdateRequirement> requirements = + UpdateRequirements.forUpdateTable( + metadata, + ImmutableList.of(new MetadataUpdate.RemovePartitionSpecs(Sets.newHashSet(1, 2)))); + + assertThat(requirements) Review Comment: this is missing a call to `requirements.forEach(req -> req.validate(metadata));` before this line ########## core/src/main/java/org/apache/iceberg/TableMetadata.java: ########## @@ -1108,6 +1108,24 @@ public Builder setDefaultPartitionSpec(int specId) { return this; } + Builder removeUnusedSpecs(Iterable<Integer> specIds) { + Set<Integer> specIdsToRemove = Sets.newHashSet(); + for (Integer specId : specIds) { + Preconditions.checkArgument( + specId != defaultSpecId, "Cannot remove default partition spec"); + PartitionSpec toBeRemoved = specsById.remove(specId); + if (toBeRemoved != null) { + specIdsToRemove.add(specId); + } + } Review Comment: nit: newline after } ########## core/src/test/java/org/apache/iceberg/TestUpdateRequirements.java: ########## @@ -424,6 +424,89 @@ public void setDefaultPartitionSpecFailure() { .hasMessage("Requirement failed: default partition spec changed: expected id 0 != 1"); } + @Test + public void testRemovePartitionSpec() { + int defaultSpecId = 3; + when(metadata.defaultSpecId()).thenReturn(defaultSpecId); + // empty refs + when(metadata.refs()).thenReturn(ImmutableMap.of()); Review Comment: this isn't being needed here ########## core/src/test/java/org/apache/iceberg/TestUpdateRequirements.java: ########## @@ -424,6 +424,89 @@ public void setDefaultPartitionSpecFailure() { .hasMessage("Requirement failed: default partition spec changed: expected id 0 != 1"); } + @Test + public void testRemovePartitionSpec() { + int defaultSpecId = 3; + when(metadata.defaultSpecId()).thenReturn(defaultSpecId); + // empty refs + when(metadata.refs()).thenReturn(ImmutableMap.of()); + + List<UpdateRequirement> requirements = + UpdateRequirements.forUpdateTable( + metadata, + ImmutableList.of(new MetadataUpdate.RemovePartitionSpecs(Sets.newHashSet(1, 2)))); + + assertThat(requirements) + .hasSize(2) + .hasOnlyElementsOfTypes( + UpdateRequirement.AssertTableUUID.class, UpdateRequirement.AssertDefaultSpecID.class); + + assertTableUUID(requirements); + + assertThat(requirements) + .element(1) + .asInstanceOf(InstanceOfAssertFactories.type(UpdateRequirement.AssertDefaultSpecID.class)) + .extracting(UpdateRequirement.AssertDefaultSpecID::specId) + .isEqualTo(defaultSpecId); + } + + @Test + public void testRemovePartitionSpecsWithRefs() { + int defaultSpecId = 3; + long snapshotId = 42L; + when(metadata.defaultSpecId()).thenReturn(defaultSpecId); + + String branch = "branch"; + SnapshotRef snapshotRef = mock(SnapshotRef.class); + when(snapshotRef.snapshotId()).thenReturn(snapshotId); + when(snapshotRef.isBranch()).thenReturn(true); + when(metadata.refs()).thenReturn(ImmutableMap.of(branch, snapshotRef)); Review Comment: this also requires mocking this: `when(metadata.ref(branch)).thenReturn(snapshotRef);` (it wasn't failing because the call to `validate()` was missing ########## core/src/test/java/org/apache/iceberg/catalog/CatalogTests.java: ########## @@ -1284,6 +1284,34 @@ public void testUpdateTableSpecThenRevert() { assertThat(table.spec()).as("Loaded table should have expected spec").isEqualTo(TABLE_SPEC); } + @Test + public void testRemoveUnusedSpec() { Review Comment: I think it would make sense to also add a test with a branch (in order to make sure that a spec used on some branch isn't accidentally removed) -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org