Fokko commented on code in PR #921: URL: https://github.com/apache/iceberg-python/pull/921#discussion_r1677453778
########## tests/io/test_pyarrow.py: ########## @@ -1783,21 +1796,114 @@ def test_schema_mismatch_missing_field(table_schema_simple: Schema) -> None: """ with pytest.raises(ValueError, match=expected): - _check_schema_compatible(table_schema_simple, other_schema) + _check_pyarrow_schema_compatible(table_schema_simple, other_schema) + + +def test_schema_compatible_missing_nullable_field_nested(table_schema_nested: Schema) -> None: + schema = table_schema_nested.as_arrow() + schema = schema.remove(6).insert( + 6, + pa.field( + "person", + pa.struct([ + pa.field("age", pa.int32(), nullable=False), + ]), + nullable=True, + ), + ) + try: + _check_pyarrow_schema_compatible(table_schema_nested, schema) + except Exception: + pytest.fail("Unexpected Exception raised when calling `_check_pyarrow_schema_compatible`") + + +def test_schema_mismatch_missing_required_field_nested(table_schema_nested: Schema) -> None: + other_schema = table_schema_nested.as_arrow() + other_schema = other_schema.remove(6).insert( + 6, + pa.field( + "person", + pa.struct([ + pa.field("name", pa.string(), nullable=True), + ]), + nullable=True, + ), + ) + expected = """Mismatch in fields: +┏━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ +┃ ┃ Table field ┃ Dataframe field ┃ +┡━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩ +│ ✅ │ 1: foo: optional string │ 1: foo: optional string │ +│ ✅ │ 2: bar: required int │ 2: bar: required int │ +│ ✅ │ 3: baz: optional boolean │ 3: baz: optional boolean │ +│ ✅ │ 4: qux: required list<string> │ 4: qux: required list<string> │ +│ ✅ │ 5: element: required string │ 5: element: required string │ Review Comment: I see that we also list all the nested elements, which makes sense. We could also decide on only showing the parent types, but I think this is more comprehensive. Would love to know what others think. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org