syun64 commented on code in PR #921:
URL: https://github.com/apache/iceberg-python/pull/921#discussion_r1678023448
##########
tests/io/test_pyarrow.py:
##########
@@ -1783,21 +1796,114 @@ def
test_schema_mismatch_missing_field(table_schema_simple: Schema) -> None:
"""
with pytest.raises(ValueError, match=expected):
- _check_schema_compatible(table_schema_simple, other_schema)
+ _check_pyarrow_schema_compatible(table_schema_simple, other_schema)
+
+
+def test_schema_compatible_missing_nullable_field_nested(table_schema_nested:
Schema) -> None:
+ schema = table_schema_nested.as_arrow()
+ schema = schema.remove(6).insert(
+ 6,
+ pa.field(
+ "person",
+ pa.struct([
+ pa.field("age", pa.int32(), nullable=False),
+ ]),
+ nullable=True,
+ ),
+ )
+ try:
+ _check_pyarrow_schema_compatible(table_schema_nested, schema)
+ except Exception:
+ pytest.fail("Unexpected Exception raised when calling
`_check_pyarrow_schema_compatible`")
+
+
+def test_schema_mismatch_missing_required_field_nested(table_schema_nested:
Schema) -> None:
+ other_schema = table_schema_nested.as_arrow()
+ other_schema = other_schema.remove(6).insert(
+ 6,
+ pa.field(
+ "person",
+ pa.struct([
+ pa.field("name", pa.string(), nullable=True),
+ ]),
+ nullable=True,
+ ),
+ )
+ expected = """Mismatch in fields:
+┏━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
+┃ ┃ Table field ┃ Dataframe field
┃
+┡━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩
+│ ✅ │ 1: foo: optional string │ 1: foo: optional string │
+│ ✅ │ 2: bar: required int │ 2: bar: required int │
+│ ✅ │ 3: baz: optional boolean │ 3: baz: optional boolean │
+│ ✅ │ 4: qux: required list<string> │ 4: qux: required list<string> │
+│ ✅ │ 5: element: required string │ 5: element: required string │
Review Comment:
Yes, I was thinking about this too, and went with this approach for now.
Curious to hear which representation folks would find helpful!
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]