nastra commented on code in PR #10755: URL: https://github.com/apache/iceberg/pull/10755#discussion_r1837955470
########## core/src/test/java/org/apache/iceberg/TestRemoveSnapshots.java: ########## @@ -1620,6 +1621,90 @@ public void testRetainFilesOnRetainedBranches() { assertThat(deletedFiles).isEqualTo(expectedDeletes); } + @TestTemplate + public void testRemoveSpecDuringExpiration() { + DataFile file = + DataFiles.builder(table.spec()) + .withPath("/path/to/data-0.parquet") + .withPartitionPath("data_bucket=0") + .withFileSizeInBytes(10) + .withRecordCount(100) + .build(); + table.newAppend().appendFile(file).commit(); + Snapshot append = table.currentSnapshot(); + String appendManifest = + Iterables.getOnlyElement( + table.currentSnapshot().allManifests(table.io()).stream() + .map(ManifestFile::path) + .collect(Collectors.toList())); + table.newDelete().deleteFile(file).commit(); + Snapshot delete = table.currentSnapshot(); + String deleteManifest = + Iterables.getOnlyElement( + table.currentSnapshot().allManifests(table.io()).stream() + .map(ManifestFile::path) + .collect(Collectors.toList())); + + table.updateSpec().addField("id_bucket", Expressions.bucket("id", 16)).commit(); + PartitionSpec idAndDataBucketSpec = table.spec(); + DataFile bucketFile = + DataFiles.builder(table.spec()) + .withPath("/path/to/data-0-id-0.parquet") + .withFileSizeInBytes(10) + .withRecordCount(100) + .withPartitionPath("data_bucket=0/id_bucket=0") + .build(); + table.newAppend().appendFile(bucketFile).commit(); + + Set<String> deletedFiles = Sets.newHashSet(); + // Expiring snapshots should remove the data_bucket partition + removeSnapshots(table) + .expireOlderThan(System.currentTimeMillis()) + .cleanExpiredMeta(true) + .deleteWith(deletedFiles::add) + .commit(); + + assertThat(deletedFiles) + .containsExactlyInAnyOrder( + appendManifest, + deleteManifest, + file.location(), + append.manifestListLocation(), + delete.manifestListLocation()); + assertThat(Iterables.getOnlyElement(table.specs().keySet())) + .as("Only id_bucket + data_bucket transform should exist") + .isEqualTo(idAndDataBucketSpec.specId()); + } + + @TestTemplate + public void testRemoveSpecsDoesntRemoveDefaultSpec() throws IOException { + // The default spec for table is bucketed on data, but write using unpartitioned + PartitionSpec dataBucketSpec = table.spec(); + DataFile file = + DataFiles.builder(PartitionSpec.unpartitioned()) + .withPath("/path/to/data-0.parquet") + .withFileSizeInBytes(10) + .withRecordCount(100) + .build(); + + table.newAppend().appendFile(file).commit(); + Snapshot append = table.currentSnapshot(); + table.newDelete().deleteFile(file).commit(); + + Set<String> deletedFiles = Sets.newHashSet(); + // Expiring snapshots should remove only the unpartitioned spec + removeSnapshots(table) + .expireOlderThan(System.currentTimeMillis()) + .cleanExpiredMeta(true) + .deleteWith(deletedFiles::add) + .commit(); + + assertThat(deletedFiles).containsExactlyInAnyOrder(append.manifestListLocation()); + assertThat(Iterables.getOnlyElement(table.specs().keySet())) Review Comment: same as above -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org