nastra commented on code in PR #9860:
URL: https://github.com/apache/iceberg/pull/9860#discussion_r1519791560
##########
core/src/test/java/org/apache/iceberg/TestOverwrite.java:
##########
@@ -293,23 +332,178 @@ public void
testValidatedOverwriteWithAppendOutsideOfDeleteMetrics() {
}
@Test
- public void testValidatedOverwriteWithAppendSuccess() {
+ public void testValidatedOverwriteWithAppendInsideOfDeleteMetrics() {
+ // ensure the overwrite results in a merge
+ table.updateProperties().set(TableProperties.MANIFEST_MIN_MERGE_COUNT,
"1").commit();
+
TableMetadata base = TestTables.readMetadata(TABLE_NAME);
long baseId =
latestSnapshot(base, branch) == null ? -1 : latestSnapshot(base,
branch).snapshotId();
+ DataFile file10To14WithMetrics =
+ DataFiles.builder(PARTITION_BY_DATE)
+ .withPath("/path/to/data-3.parquet")
+ .withFileSizeInBytes(0)
+ .withPartitionPath("date=2018-06-09")
+ .withMetrics(
+ new Metrics(
+ 5L,
+ null, // no column sizes
+ ImmutableMap.of(1, 5L, 2, 3L), // value count
+ ImmutableMap.of(1, 0L, 2, 2L), // null count
+ null,
+ ImmutableMap.of(
+ 1, longToBuffer(5L), 3, stringToBuffer("2018-06-09")),
// lower bounds
+ ImmutableMap.of(
+ 1, longToBuffer(9L), 3, stringToBuffer("2018-06-09"))
// upper bounds
+ ))
+ .build();
+
OverwriteFiles overwrite =
table
.newOverwrite()
- .overwriteByRowFilter(and(equal("date", "2018-06-09"),
lessThan("id", 20)))
- .addFile(FILE_10_TO_14) // in 2018-06-09 matches and IDs are
inside range
+ .overwriteByRowFilter(and(equal("date", "2018-06-09"),
lessThan("id", 10)))
+ .addFile(
+ file10To14WithMetrics) // in 2018-06-09 and IDs are inside
range based on metrics
.validateAddedFilesMatchOverwriteFilter();
- Assertions.assertThatThrownBy(() -> commit(table, overwrite, branch))
- .isInstanceOf(ValidationException.class)
- .hasMessageStartingWith("Cannot append file with rows that do not
match filter");
+ commit(table, overwrite, branch);
- Assert.assertEquals(
- "Should not create a new snapshot", baseId, latestSnapshot(base,
branch).snapshotId());
+ long overwriteId = latestSnapshot(table, branch).snapshotId();
+
+ assertThat(overwriteId).as("Should create a new
snapshot").isNotEqualTo(baseId);
+ assertThat(latestSnapshot(table, branch).allManifests(table.io()).size())
+ .as("Table should have one manifest")
+ .isEqualTo(1);
+
+ validateManifestEntries(
+ latestSnapshot(table, branch).allManifests(table.io()).get(0),
+ ids(overwriteId, baseId, overwriteId),
+ files(FILE_10_TO_14, FILE_0_TO_4, FILE_5_TO_9),
+ statuses(Status.ADDED, Status.EXISTING, Status.DELETED));
+ }
+
+ @Test
+ public void
testShouldFailValidationIfAnyFileIsInAPartitionNotMatchingOverwriteByRowFilter()
{
+ // ensure the overwrite results in a merge
+ table.updateProperties().set(TableProperties.MANIFEST_MIN_MERGE_COUNT,
"1").commit();
+
+ table.updateSpec().addField("id").commit();
+ PartitionSpec partitionByDateAndId =
+
PartitionSpec.builderFor(DATE_SCHEMA).withSpecId(1).identity("date").identity("id").build();
+ assertThat(table.spec())
+ .as("New spec should be partitioned by date and id.")
+ .isEqualTo(partitionByDateAndId);
+
+ TableMetadata base = TestTables.readMetadata(TABLE_NAME);
+ long baseId =
+ latestSnapshot(base, branch) == null ? -1 : latestSnapshot(base,
branch).snapshotId();
+
+ String overwriteDate = "2018-06-09";
+ String notOverwriteDate = "2018-06-10";
+
+ ImmutableList.of(
+ ImmutableList.of(notOverwriteDate, overwriteDate),
+ ImmutableList.of(overwriteDate, notOverwriteDate))
+ .forEach(
+ datePartitions -> {
+ String datePartition1 = datePartitions.get(0);
+ String datePartition2 = datePartitions.get(1);
+
+ DataFile fileOldSpec =
+ DataFiles.builder(PARTITION_BY_DATE)
+ .withPath("/path/to/data-1.parquet")
+ .withPartitionPath(String.format("date=%s",
datePartition1))
+ .withFileSizeInBytes(0)
+ .withRecordCount(100)
+ .build();
+
+ DataFile fileNewSpec =
+ DataFiles.builder(partitionByDateAndId)
+ .withPath("/path/to/data-2.parquet")
+ .withPartitionPath(String.format("date=%s/id=10",
datePartition2))
+ .withFileSizeInBytes(0)
+ .withRecordCount(100)
+ .build();
+
+ assertThatThrownBy(
+ () ->
+ commit(
+ table,
+ table
+ .newOverwrite()
+ .overwriteByRowFilter(equal("date",
overwriteDate))
+ .addFile(fileOldSpec)
+ .addFile(fileNewSpec)
+ .validateAddedFilesMatchOverwriteFilter(),
+ branch))
+ .isInstanceOf(ValidationException.class)
+ .hasMessageStartingWith("Cannot append file with rows that
do not match filter");
+
+ assertThat(latestSnapshot(table, branch).snapshotId())
+ .as("Should not create a new snapshot")
+ .isEqualTo(baseId);
+ });
+ }
+
+ @Test
+ public void
testShouldPassValidationIfAllFilesAreInPartitionMatchingOverwriteByRowFilter() {
+ // ensure the overwrite results in a merge
+ table.updateProperties().set(TableProperties.MANIFEST_MIN_MERGE_COUNT,
"1").commit();
+
+ table.updateSpec().addField("id").commit();
+ PartitionSpec partitionByDateAndId =
+
PartitionSpec.builderFor(DATE_SCHEMA).withSpecId(1).identity("date").identity("id").build();
+ assertThat(table.spec())
+ .as("New spec should be partitioned by date and id.")
+ .isEqualTo(partitionByDateAndId);
+
+ TableMetadata base = TestTables.readMetadata(TABLE_NAME);
+ long baseId =
+ latestSnapshot(base, branch) == null ? -1 : latestSnapshot(base,
branch).snapshotId();
+
+ DataFile fileOldSpec =
+ DataFiles.builder(PARTITION_BY_DATE)
+ .withPath("/path/to/data-1.parquet")
+ .withPartitionPath("date=2018-06-09")
+ .withFileSizeInBytes(0)
+ .withRecordCount(100)
+ .build();
+
+ DataFile fileNewSpec =
+ DataFiles.builder(partitionByDateAndId)
+ .withPath("/path/to/data-2.parquet")
+ .withPartitionPath("date=2018-06-09/id=10")
+ .withFileSizeInBytes(0)
+ .withRecordCount(100)
+ .build();
+
+ commit(
+ table,
+ table
+ .newOverwrite()
+ .overwriteByRowFilter(equal("date", "2018-06-09"))
+ .addFile(fileOldSpec) // in 2018-06-09
+ .addFile(fileNewSpec) // in 2018-06-09
+ .validateAddedFilesMatchOverwriteFilter(),
+ branch);
+
+ long overwriteId = latestSnapshot(table, branch).snapshotId();
+
+ assertThat(overwriteId).as("Should create a new
snapshot").isNotEqualTo(baseId);
+
+ List<ManifestFile> allManifests = latestSnapshot(table,
branch).allManifests(table.io());
+ assertThat(allManifests.size())
Review Comment:
```suggestion
assertThat(allManifests).as(..).hasSize(2)
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]