rdblue commented on code in PR #12593: URL: https://github.com/apache/iceberg/pull/12593#discussion_r2011041333
########## core/src/test/java/org/apache/iceberg/TestRowLineageMetadata.java: ########## @@ -241,85 +257,110 @@ public void testDeletes() { table.newDelete().deleteFile(file).commit(); // Deleting a file should create a new snapshot which should inherit last-row-id from the - // previous metadata and not - // change last-row-id for this metadata. + // previous metadata and not change last-row-id for this metadata. assertThat(table.currentSnapshot().firstRowId()).isEqualTo(30); assertThat(table.currentSnapshot().addedRows()).isEqualTo(0); assertThat(table.ops().current().nextRowId()).isEqualTo(30); } @TestTemplate - public void testReplace() { - assumeThat(formatVersion).isGreaterThanOrEqualTo(TableMetadata.MIN_FORMAT_VERSION_ROW_LINEAGE); + public void testPositionDeletes() { + assumeThat(formatVersion).isGreaterThanOrEqualTo(ROW_LINEAGE_MIN_FORMAT_VERSION); TestTables.TestTable table = TestTables.create( tableDir, "test", TEST_SCHEMA, PartitionSpec.unpartitioned(), formatVersion); - TableMetadata base = table.ops().current(); - table.ops().commit(base, TableMetadata.buildFrom(base).enableRowLineage().build()); - - assertThat(table.ops().current().rowLineageEnabled()).isTrue(); assertThat(table.ops().current().nextRowId()).isEqualTo(0L); - DataFile filePart1 = fileWithRows(30); - DataFile filePart2 = fileWithRows(30); - DataFile fileCompacted = fileWithRows(60); + DataFile file = fileWithRows(30); - table.newAppend().appendFile(filePart1).appendFile(filePart2).commit(); + table.newAppend().appendFile(file).commit(); assertThat(table.currentSnapshot().firstRowId()).isEqualTo(0); - assertThat(table.ops().current().nextRowId()).isEqualTo(60); - - table.newRewrite().deleteFile(filePart1).deleteFile(filePart2).addFile(fileCompacted).commit(); + assertThat(table.ops().current().nextRowId()).isEqualTo(30); - // Rewrites are currently just treated as appends. In the future we could treat these as no-ops - assertThat(table.currentSnapshot().firstRowId()).isEqualTo(60); - assertThat(table.ops().current().nextRowId()).isEqualTo(120); + // v3 only allows Puffin-based DVs for position deletes + DeleteFile deletes = + FileMetadata.deleteFileBuilder(PartitionSpec.unpartitioned()) + .ofPositionDeletes() + .withFormat(FileFormat.PUFFIN) + .withFileSizeInBytes(100) + .withRecordCount(10) + .withContentOffset(0) + .withContentSizeInBytes(50) + .withPath("deletes.puffin") + .withReferencedDataFile(file.location()) + .build(); + + table.newRowDelta().addDeletes(deletes).commit(); + + // Delete file records do not count toward added rows + assertThat(table.currentSnapshot().firstRowId()).isEqualTo(30); + assertThat(table.currentSnapshot().addedRows()).isEqualTo(0); + assertThat(table.ops().current().nextRowId()).isEqualTo(30); } @TestTemplate - public void testEnableRowLineageViaProperty() { - assumeThat(formatVersion).isGreaterThanOrEqualTo(TableMetadata.MIN_FORMAT_VERSION_ROW_LINEAGE); + public void testEqualityDeletes() { + assumeThat(formatVersion).isGreaterThanOrEqualTo(ROW_LINEAGE_MIN_FORMAT_VERSION); TestTables.TestTable table = TestTables.create( tableDir, "test", TEST_SCHEMA, PartitionSpec.unpartitioned(), formatVersion); - assertThat(table.ops().current().rowLineageEnabled()).isFalse(); + assertThat(table.ops().current().nextRowId()).isEqualTo(0L); - // No-op - table.updateProperties().set(TableProperties.ROW_LINEAGE, "false").commit(); - assertThat(table.ops().current().rowLineageEnabled()).isFalse(); + DataFile file = fileWithRows(30); - // Enable row lineage - table.updateProperties().set(TableProperties.ROW_LINEAGE, "true").commit(); - assertThat(table.ops().current().rowLineageEnabled()).isTrue(); + table.newAppend().appendFile(file).commit(); - // Disabling row lineage is not allowed - assertThatThrownBy( - () -> table.updateProperties().set(TableProperties.ROW_LINEAGE, "false").commit()) - .isInstanceOf(IllegalArgumentException.class) - .hasMessageContaining("Cannot disable row lineage once it has been enabled"); + assertThat(table.currentSnapshot().firstRowId()).isEqualTo(0); + assertThat(table.ops().current().nextRowId()).isEqualTo(30); + + DeleteFile deletes = + FileMetadata.deleteFileBuilder(PartitionSpec.unpartitioned()) + .ofEqualityDeletes(table.schema().findField("x").fieldId()) + .withFormat(FileFormat.PARQUET) + .withFileSizeInBytes(100) + .withRecordCount(10) + .withPath("deletes.parquet") + .withReferencedDataFile(file.location()) + .build(); + + table.newRowDelta().addDeletes(deletes).commit(); - // No-op - table.updateProperties().set(TableProperties.ROW_LINEAGE, "true").commit(); - assertThat(table.ops().current().rowLineageEnabled()).isTrue(); + // Delete file records do not count toward added rows + assertThat(table.currentSnapshot().firstRowId()).isEqualTo(30); + assertThat(table.currentSnapshot().addedRows()).isEqualTo(0); + assertThat(table.ops().current().nextRowId()).isEqualTo(30); } @TestTemplate - public void testEnableRowLineageViaPropertyAtTableCreation() { - assumeThat(formatVersion).isGreaterThanOrEqualTo(TableMetadata.MIN_FORMAT_VERSION_ROW_LINEAGE); + public void testReplace() { Review Comment: Updated this test because `DataOperation.REPLACE` row counts are now handled correctly. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org