fqaiser94 commented on code in PR #9860: URL: https://github.com/apache/iceberg/pull/9860#discussion_r1521453498
########## core/src/test/java/org/apache/iceberg/TestOverwrite.java: ########## @@ -293,23 +332,178 @@ public void testValidatedOverwriteWithAppendOutsideOfDeleteMetrics() { } @Test - public void testValidatedOverwriteWithAppendSuccess() { + public void testValidatedOverwriteWithAppendInsideOfDeleteMetrics() { + // ensure the overwrite results in a merge + table.updateProperties().set(TableProperties.MANIFEST_MIN_MERGE_COUNT, "1").commit(); + TableMetadata base = TestTables.readMetadata(TABLE_NAME); long baseId = latestSnapshot(base, branch) == null ? -1 : latestSnapshot(base, branch).snapshotId(); + DataFile file10To14WithMetrics = + DataFiles.builder(PARTITION_BY_DATE) + .withPath("/path/to/data-3.parquet") + .withFileSizeInBytes(0) + .withPartitionPath("date=2018-06-09") + .withMetrics( + new Metrics( + 5L, + null, // no column sizes + ImmutableMap.of(1, 5L, 2, 3L), // value count + ImmutableMap.of(1, 0L, 2, 2L), // null count + null, + ImmutableMap.of( + 1, longToBuffer(5L), 3, stringToBuffer("2018-06-09")), // lower bounds + ImmutableMap.of( + 1, longToBuffer(9L), 3, stringToBuffer("2018-06-09")) // upper bounds + )) + .build(); + OverwriteFiles overwrite = table .newOverwrite() - .overwriteByRowFilter(and(equal("date", "2018-06-09"), lessThan("id", 20))) - .addFile(FILE_10_TO_14) // in 2018-06-09 matches and IDs are inside range + .overwriteByRowFilter(and(equal("date", "2018-06-09"), lessThan("id", 10))) + .addFile( + file10To14WithMetrics) // in 2018-06-09 and IDs are inside range based on metrics .validateAddedFilesMatchOverwriteFilter(); - Assertions.assertThatThrownBy(() -> commit(table, overwrite, branch)) - .isInstanceOf(ValidationException.class) - .hasMessageStartingWith("Cannot append file with rows that do not match filter"); + commit(table, overwrite, branch); - Assert.assertEquals( - "Should not create a new snapshot", baseId, latestSnapshot(base, branch).snapshotId()); + long overwriteId = latestSnapshot(table, branch).snapshotId(); + + assertThat(overwriteId).as("Should create a new snapshot").isNotEqualTo(baseId); + assertThat(latestSnapshot(table, branch).allManifests(table.io()).size()) + .as("Table should have one manifest") + .isEqualTo(1); + + validateManifestEntries( + latestSnapshot(table, branch).allManifests(table.io()).get(0), + ids(overwriteId, baseId, overwriteId), + files(FILE_10_TO_14, FILE_0_TO_4, FILE_5_TO_9), + statuses(Status.ADDED, Status.EXISTING, Status.DELETED)); + } + + @Test + public void testShouldFailValidationIfAnyFileIsInAPartitionNotMatchingOverwriteByRowFilter() { + // ensure the overwrite results in a merge + table.updateProperties().set(TableProperties.MANIFEST_MIN_MERGE_COUNT, "1").commit(); + + table.updateSpec().addField("id").commit(); + PartitionSpec partitionByDateAndId = + PartitionSpec.builderFor(DATE_SCHEMA).withSpecId(1).identity("date").identity("id").build(); + assertThat(table.spec()) + .as("New spec should be partitioned by date and id.") + .isEqualTo(partitionByDateAndId); + + TableMetadata base = TestTables.readMetadata(TABLE_NAME); + long baseId = + latestSnapshot(base, branch) == null ? -1 : latestSnapshot(base, branch).snapshotId(); + + String overwriteDate = "2018-06-09"; + String notOverwriteDate = "2018-06-10"; + + ImmutableList.of( Review Comment: I agree but unfortunately, I'm not seeing any way to combine `@RunWith(Parameterized.class)` with `@ParmaterizedTest`. The two seem to clash :/ <details> <summary>Code</summary> ``` @ParameterizedTest @CsvSource({"2018-06-09,2018-06-10", "2018-06-10,2018-06-09"}) public void testShouldFailValidationIfAnyFileIsInAPartitionNotMatchingOverwriteByRowFilter( String fileOldSpecDatePartition, String fileNewSpecDatePartition) { String overwriteDate = "2018-06-09"; // ensure the overwrite results in a merge table.updateProperties().set(TableProperties.MANIFEST_MIN_MERGE_COUNT, "1").commit(); table.updateSpec().addField("id").commit(); PartitionSpec partitionByDateAndId = PartitionSpec.builderFor(DATE_SCHEMA).withSpecId(1).identity("date").identity("id").build(); assertThat(table.spec()) .as("New spec should be partitioned by date and id.") .isEqualTo(partitionByDateAndId); TableMetadata base = TestTables.readMetadata(TABLE_NAME); long baseId = latestSnapshot(base, branch) == null ? -1 : latestSnapshot(base, branch).snapshotId(); DataFile fileOldSpec = DataFiles.builder(PARTITION_BY_DATE) .withPath("/path/to/data-1.parquet") .withPartitionPath(String.format("date=%s", fileOldSpecDatePartition)) .withFileSizeInBytes(0) .withRecordCount(100) .build(); DataFile fileNewSpec = DataFiles.builder(partitionByDateAndId) .withPath("/path/to/data-2.parquet") .withPartitionPath(String.format("date=%s/id=10", fileNewSpecDatePartition)) .withFileSizeInBytes(0) .withRecordCount(100) .build(); assertThatThrownBy( () -> commit( table, table .newOverwrite() .overwriteByRowFilter(equal("date", overwriteDate)) .addFile(fileOldSpec) .addFile(fileNewSpec) .validateAddedFilesMatchOverwriteFilter(), branch)) .isInstanceOf(ValidationException.class) .hasMessageStartingWith("Cannot append file with rows that do not match filter"); assertThat(latestSnapshot(table, branch).snapshotId()) .as("Should not create a new snapshot") .isEqualTo(baseId); } ``` </details> Error: ``` org.junit.jupiter.api.extension.ParameterResolutionException: No ParameterResolver registered for parameter [int arg0] in constructor [public org.apache.iceberg.TestOverwrite(int,java.lang.String)] ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org