amogh-jahagirdar commented on code in PR #11948: URL: https://github.com/apache/iceberg/pull/11948#discussion_r1919142416
########## core/src/main/java/org/apache/iceberg/TableMetadata.java: ########## @@ -615,10 +638,15 @@ public TableMetadata replaceProperties(Map<String, String> rawProperties) { int newFormatVersion = PropertyUtil.propertyAsInt(rawProperties, TableProperties.FORMAT_VERSION, formatVersion); + Boolean newRowLineage = Review Comment: same as above, I think this could be primitive? ########## core/src/main/java/org/apache/iceberg/TableMetadata.java: ########## @@ -262,6 +265,8 @@ public String toString() { private volatile Map<Long, Snapshot> snapshotsById; private volatile Map<String, SnapshotRef> refs; private volatile boolean snapshotsLoaded; + private final Boolean rowLineageEnabled; Review Comment: Can this be made a primitive boolean? ########## core/src/main/java/org/apache/iceberg/TableMetadata.java: ########## @@ -1230,6 +1265,18 @@ public Builder addSnapshot(Snapshot snapshot) { snapshotsById.put(snapshot.snapshotId(), snapshot); changes.add(new MetadataUpdate.AddSnapshot(snapshot)); + if (rowLineage) { + ValidationException.check( + snapshot.firstRowId() >= lastRowId, + "Cannot add a snapshot whose first-row-id (%s) is less than the metadata `last-used-id` (%s) because this will end up generating duplicate row_ids.", Review Comment: Shouldn't this be strictly greater? ########## core/src/test/java/org/apache/iceberg/TestRowLineageMetadata.java: ########## @@ -0,0 +1,327 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; +import static org.assertj.core.api.Assumptions.assumeThat; + +import java.io.File; +import java.util.concurrent.atomic.AtomicInteger; +import org.apache.iceberg.exceptions.ValidationException; +import org.apache.iceberg.types.Types; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.io.TempDir; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.FieldSource; + +public class TestRowLineageMetadata { + + @TempDir private File tableDir = null; + + private static final String TEST_LOCATION = "s3://bucket/test/location"; + + private static final Schema TEST_SCHEMA = + new Schema( + 7, + Types.NestedField.required(1, "x", Types.LongType.get()), + Types.NestedField.required(2, "y", Types.LongType.get(), "comment"), + Types.NestedField.required(3, "z", Types.LongType.get())); + + private TableMetadata baseMetadata(int formatVersion) { + return TableMetadata.buildFromEmpty(formatVersion) + .enableRowLineage() + .addSchema(TEST_SCHEMA) + .setLocation(TEST_LOCATION) + .addPartitionSpec(PartitionSpec.unpartitioned()) + .addSortOrder(SortOrder.unsorted()) + .build(); + } + + @AfterEach + public void cleanup() { + TestTables.clearTables(); + } + + @ParameterizedTest + @FieldSource("org.apache.iceberg.TestHelpers#ALL_VERSIONS") + public void testRowLineageSupported(int formatVersion) { + if (formatVersion == TableMetadata.MIN_FORMAT_VERSION_ROW_LINEAGE) { + assertThat(TableMetadata.buildFromEmpty(formatVersion)).isNotNull(); + } else { + assertThatThrownBy(() -> TableMetadata.buildFromEmpty(formatVersion).enableRowLineage()) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining("Cannot use row lineage"); + } + } + + @ParameterizedTest + @FieldSource("org.apache.iceberg.TestHelpers#ALL_VERSIONS") + public void testSnapshotAddition(int formatVersion) { + assumeThat(formatVersion).isGreaterThanOrEqualTo(TableMetadata.MIN_FORMAT_VERSION_ROW_LINEAGE); + + Long newRows = 30L; Review Comment: Nit: primitive? ########## core/src/main/java/org/apache/iceberg/TableMetadata.java: ########## @@ -1230,6 +1265,18 @@ public Builder addSnapshot(Snapshot snapshot) { snapshotsById.put(snapshot.snapshotId(), snapshot); changes.add(new MetadataUpdate.AddSnapshot(snapshot)); + if (rowLineage) { + ValidationException.check( + snapshot.firstRowId() >= lastRowId, + "Cannot add a snapshot whose first-row-id (%s) is less than the metadata `last-used-id` (%s) because this will end up generating duplicate row_ids.", Review Comment: Ah no, if my first row ID is 100 and I added 40 records (it'd be 100-139 inclusive), the next snapshot should be able to start at 140 (which would be the lastRowId in this case). I think this is good -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org