zeroshade commented on code in PR #1126:
URL: https://github.com/apache/iceberg-go/pull/1126#discussion_r3523596834
##########
table/metadata_schema_compatibility_test.go:
##########
@@ -50,3 +50,31 @@ func TestNewMetadata_DuplicateValues(t *testing.T) {
assert.ErrorIs(t, err, iceberg.ErrInvalidSchema)
assert.Contains(t, err.Error(), "multiple fields for name foo")
}
+
+func TestNewMetadataRejectsReservedMetadataColumnIDsBeforeReassignment(t
*testing.T) {
+ for _, field := range []iceberg.NestedField{
Review Comment:
This test only covers the two top-level row-lineage fields. Since the new
validator recurses into structs, lists, and maps, please add table-driven cases
for a reserved ID on a struct child, list element, map key, and map value. At
least one case should use a non-canonical field name so the assertion proves
the error reports the actual schema path, not just the metadata column name.
##########
table/metadata_schema_compatibility.go:
##########
@@ -150,6 +149,54 @@ func checkSchemaCompatibility(sc *iceberg.Schema,
formatVersion int) error {
return nil
}
+func validateNoReservedMetadataColumnIDs(sc *iceberg.Schema) error {
+ for _, field := range sc.Fields() {
+ if err := validateNoReservedMetadataColumnID(field,
field.Name); err != nil {
+ return err
+ }
+ }
+
+ return nil
+}
+
+func validateNoReservedMetadataColumnID(field iceberg.NestedField, name
string) error {
+ if iceberg.IsMetadataColumn(field.ID) {
Review Comment:
This only rejects IDs recognized by `iceberg.IsMetadataColumn`, which
currently covers the row-lineage columns but not the rest of Iceberg's reserved
metadata IDs. User schemas can still use `_file` (2147483646), `_pos`
(2147483645), `_deleted` (2147483644), `_spec_id`, `_partition`, `file_path`,
and other IDs in the reserved range before reassignment. Per the spec, table
schema field IDs must not exceed 2147483447 (`Integer.MAX_VALUE - 200`). Please
reject `field.ID > 2147483447` here, or expand the predicate to cover the full
spec-reserved set, and add a regression test for at least one non-row-lineage
reserved ID such as 2147483646.
##########
table/scanner_internal_test.go:
##########
@@ -492,45 +492,32 @@ func TestProjectionWithRowLineageRequiresV3(t *testing.T)
{
assert.ErrorContains(t, err, "row lineage")
}
-// TestProjectionV3SchemaAlreadyHasRowID covers the case where the user schema
-// already declares _row_id (a reserved field id, but legal in v3). The
-// projection helper must be idempotent and not panic on the duplicate ID.
-func TestProjectionV3SchemaAlreadyHasRowID(t *testing.T) {
+// TestAppendMissingLineageFieldsAlreadyHasRowID covers the helper's idempotent
+// behavior when a schema already includes reserved lineage fields. New table
+// metadata rejects user schemas with these IDs, but projections can still
+// contain them after the scanner adds row-lineage columns.
+func TestAppendMissingLineageFieldsAlreadyHasRowID(t *testing.T) {
Review Comment:
Keeping a direct helper test is useful, but this removes the end-to-end
`Scan.Projection()` regression coverage for metadata that already contains
row-lineage fields. Please add back an end-to-end projection test over v3
metadata containing row-lineage fields so the scanner path remains covered, in
addition to this helper-level idempotence check.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]