stevenzwu commented on code in PR #10200: URL: https://github.com/apache/iceberg/pull/10200#discussion_r1575094495
########## flink/v1.18/flink/src/main/java/org/apache/iceberg/flink/source/RowDataRewriter.java: ########## @@ -84,7 +85,8 @@ public RowDataRewriter( format, table.properties(), null, - false); + false, + DeleteGranularity.FILE); Review Comment: is the default change intentional? ########## data/src/test/java/org/apache/iceberg/io/TestTaskEqualityDeltaWriter.java: ########## @@ -409,6 +421,55 @@ public void testUpsertDataWithFullRowSchema() throws IOException { .isEqualTo(ImmutableList.of(posRecord.copy("file_path", dataFile.path(), "pos", 0L))); } + @TestTemplate + public void testDeleteFileGranularity() throws IOException { + withGranularity(DeleteGranularity.FILE); + } + + @TestTemplate + public void testDeletePartitionGranularity() throws IOException { + withGranularity(DeleteGranularity.PARTITION); + } + + private void withGranularity(DeleteGranularity granularity) throws IOException { + List<Integer> eqDeleteFieldIds = Lists.newArrayList(idFieldId, dataFieldId); + Schema eqDeleteRowSchema = table.schema(); + + GenericTaskDeltaWriter deltaWriter = + createTaskWriter(eqDeleteFieldIds, eqDeleteRowSchema, granularity); + + Map<Integer, Record> expected = Maps.newHashMapWithExpectedSize(2000); + // Create enough records, so we have multiple files + for (int i = 0; i < 2000; ++i) { + Record record = createRecord(i, "aaa" + i); + deltaWriter.write(record); + if (i % 5 == 10) { + deltaWriter.delete(record); + } else { + expected.put(i, record); + } + } + + // Add some deletes in the end + for (int i = 0; i < 199; ++i) { + int id = i * 10 + 1; + Record record = createRecord(id, "aaa" + id); + deltaWriter.delete(record); + expected.remove(id); + } + + WriteResult result = deltaWriter.complete(); + assertThat(result.dataFiles()).as("Should have 2 data files.").hasSize(2); Review Comment: target file size is 128 bytes. wondering why only 2 data files are created. ########## data/src/test/java/org/apache/iceberg/io/TestTaskEqualityDeltaWriter.java: ########## @@ -409,6 +421,55 @@ public void testUpsertDataWithFullRowSchema() throws IOException { .isEqualTo(ImmutableList.of(posRecord.copy("file_path", dataFile.path(), "pos", 0L))); } + @TestTemplate + public void testDeleteFileGranularity() throws IOException { + withGranularity(DeleteGranularity.FILE); + } + + @TestTemplate + public void testDeletePartitionGranularity() throws IOException { + withGranularity(DeleteGranularity.PARTITION); + } + + private void withGranularity(DeleteGranularity granularity) throws IOException { + List<Integer> eqDeleteFieldIds = Lists.newArrayList(idFieldId, dataFieldId); + Schema eqDeleteRowSchema = table.schema(); + + GenericTaskDeltaWriter deltaWriter = + createTaskWriter(eqDeleteFieldIds, eqDeleteRowSchema, granularity); + + Map<Integer, Record> expected = Maps.newHashMapWithExpectedSize(2000); + // Create enough records, so we have multiple files + for (int i = 0; i < 2000; ++i) { + Record record = createRecord(i, "aaa" + i); + deltaWriter.write(record); + if (i % 5 == 10) { + deltaWriter.delete(record); + } else { + expected.put(i, record); + } + } + + // Add some deletes in the end + for (int i = 0; i < 199; ++i) { + int id = i * 10 + 1; + Record record = createRecord(id, "aaa" + id); + deltaWriter.delete(record); + expected.remove(id); + } + + WriteResult result = deltaWriter.complete(); + assertThat(result.dataFiles()).as("Should have 2 data files.").hasSize(2); + assertThat(result.deleteFiles()) Review Comment: are `deleteFiles()` only return position deletes? what about equality deletes? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org