koenvo commented on code in PR #1878: URL: https://github.com/apache/iceberg-python/pull/1878#discussion_r2051739903
########## tests/table/test_upsert.py: ########## @@ -511,6 +511,76 @@ def test_upsert_without_identifier_fields(catalog: Catalog) -> None: tbl.upsert(df) +def test_upsert_struct_field_fails_in_join(catalog: Catalog) -> None: + identifier = "default.test_upsert_struct_field_fails" + _drop_table(catalog, identifier) + + schema = Schema( + NestedField(1, "id", IntegerType(), required=True), + NestedField( + 2, + "nested_type", + # Struct<type: string, coordinates: list<double>> + StructType( + NestedField(3, "sub1", StringType(), required=True), + NestedField(4, "sub2", StringType(), required=True), + ), + required=False, + ), + identifier_field_ids=[1], + ) + + tbl = catalog.create_table(identifier, schema=schema) + + arrow_schema = pa.schema( + [ + pa.field("id", pa.int32(), nullable=False), + pa.field( + "nested_type", + pa.struct( + [ + pa.field("sub1", pa.large_string(), nullable=False), + pa.field("sub2", pa.large_string(), nullable=False), + ] + ), + nullable=True, + ), + ] + ) + + initial_data = pa.Table.from_pylist( + [ + { + "id": 1, + "nested_type": {"sub1": "bla1", "sub2": "bla"}, + } + ], + schema=arrow_schema, + ) + tbl.append(initial_data) + + update_data = pa.Table.from_pylist( + [ + { + "id": 2, + "nested_type": {"sub1": "bla1", "sub2": "bla"}, + }, + { + "id": 1, + "nested_type": {"sub1": "bla1", "sub2": "bla"}, + }, + ], + schema=arrow_schema, + ) + + upd = tbl.upsert(update_data, join_cols=["id"]) + + # Row needs to be updated even tho it's not changed. + # When pyarrow isn't able to compare rows, just update everything + assert upd.rows_updated == 1 Review Comment: Do you think we should warn when the fallback is used, or when non-changed rows are updated? The latter case is not detectable - as this is the reason for this PR. Either way, I would suggest to make this configurable. In my own case, I'm aware my schema includes complex-types and I don't need the warnings. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org