koenvo commented on code in PR #1878:
URL: https://github.com/apache/iceberg-python/pull/1878#discussion_r2051739903


##########
tests/table/test_upsert.py:
##########
@@ -511,6 +511,76 @@ def test_upsert_without_identifier_fields(catalog: 
Catalog) -> None:
         tbl.upsert(df)
 
 
+def test_upsert_struct_field_fails_in_join(catalog: Catalog) -> None:
+    identifier = "default.test_upsert_struct_field_fails"
+    _drop_table(catalog, identifier)
+
+    schema = Schema(
+        NestedField(1, "id", IntegerType(), required=True),
+        NestedField(
+            2,
+            "nested_type",
+            # Struct<type: string, coordinates: list<double>>
+            StructType(
+                NestedField(3, "sub1", StringType(), required=True),
+                NestedField(4, "sub2", StringType(), required=True),
+            ),
+            required=False,
+        ),
+        identifier_field_ids=[1],
+    )
+
+    tbl = catalog.create_table(identifier, schema=schema)
+
+    arrow_schema = pa.schema(
+        [
+            pa.field("id", pa.int32(), nullable=False),
+            pa.field(
+                "nested_type",
+                pa.struct(
+                    [
+                        pa.field("sub1", pa.large_string(), nullable=False),
+                        pa.field("sub2", pa.large_string(), nullable=False),
+                    ]
+                ),
+                nullable=True,
+            ),
+        ]
+    )
+
+    initial_data = pa.Table.from_pylist(
+        [
+            {
+                "id": 1,
+                "nested_type": {"sub1": "bla1", "sub2": "bla"},
+            }
+        ],
+        schema=arrow_schema,
+    )
+    tbl.append(initial_data)
+
+    update_data = pa.Table.from_pylist(
+        [
+            {
+                "id": 2,
+                "nested_type": {"sub1": "bla1", "sub2": "bla"},
+            },
+            {
+                "id": 1,
+                "nested_type": {"sub1": "bla1", "sub2": "bla"},
+            },
+        ],
+        schema=arrow_schema,
+    )
+
+    upd = tbl.upsert(update_data, join_cols=["id"])
+
+    # Row needs to be updated even tho it's not changed.
+    # When pyarrow isn't able to compare rows, just update everything
+    assert upd.rows_updated == 1

Review Comment:
   Do you think we should warn when the fallback is used, or when non-changed 
rows are updated? The latter case is not detectable - as this is the reason for 
this PR.
   
   Either way, I would suggest to make this configurable. In my own case, I'm 
aware my schema includes complex-types and I don't need the warnings.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org
For additional commands, e-mail: issues-h...@iceberg.apache.org

Reply via email to