kevinjqliu commented on code in PR #1861: URL: https://github.com/apache/iceberg-python/pull/1861#discussion_r2021305248
########## tests/table/test_upsert.py: ########## @@ -509,3 +509,39 @@ def test_upsert_without_identifier_fields(catalog: Catalog) -> None: ValueError, match="Join columns could not be found, please set identifier-field-ids or pass in explicitly." ): tbl.upsert(df) + + +def test_upsert_with_nulls(catalog: Catalog) -> None: + identifier = "default.test_upsert_with_nulls" + _drop_table(catalog, identifier) + + schema = pa.schema( + [ + ("foo", pa.string()), + ("bar", pa.int32()), + ("baz", pa.bool_()), + ] + ) + + # create table with null value + table = catalog.create_table(identifier, schema) + data_with_null = pa.Table.from_pylist( + [ + {"foo": "apple", "bar": None, "baz": False}, Review Comment: ``` # create table with null value table = catalog.create_table(identifier, schema) data_with_null = pa.Table.from_pylist( [ {"foo": "apple", "bar": None, "baz": False}, {"foo": "banana", "bar": None, "baz": False}, ], schema=schema, ) table.append(data_with_null) assert table.scan().to_arrow()["bar"].is_null() # upsert table with non-null value data_without_null = pa.Table.from_pylist( [ {"foo": "apple", "bar": 7, "baz": False}, ], schema=schema, ) upd = table.upsert(data_without_null, join_cols=["foo"]) assert upd.rows_updated == 1 assert upd.rows_inserted == 0 assert table.scan().to_arrow() == pa.Table.from_pylist( [ {"foo": "apple", "bar": 7, "baz": False}, {"foo": "banana", "bar": None, "baz": False}, ], schema=schema, ) ``` ########## tests/table/test_upsert.py: ########## @@ -509,3 +509,39 @@ def test_upsert_without_identifier_fields(catalog: Catalog) -> None: ValueError, match="Join columns could not be found, please set identifier-field-ids or pass in explicitly." ): tbl.upsert(df) + + +def test_upsert_with_nulls(catalog: Catalog) -> None: + identifier = "default.test_upsert_with_nulls" + _drop_table(catalog, identifier) + + schema = pa.schema( + [ + ("foo", pa.string()), + ("bar", pa.int32()), + ("baz", pa.bool_()), + ] + ) + + # create table with null value + table = catalog.create_table(identifier, schema) + data_with_null = pa.Table.from_pylist( + [ + {"foo": "apple", "bar": None, "baz": False}, Review Comment: this runs successfully, ``` # create table with null value table = catalog.create_table(identifier, schema) data_with_null = pa.Table.from_pylist( [ {"foo": "apple", "bar": None, "baz": False}, {"foo": "banana", "bar": None, "baz": False}, ], schema=schema, ) table.append(data_with_null) assert table.scan().to_arrow()["bar"].is_null() # upsert table with non-null value data_without_null = pa.Table.from_pylist( [ {"foo": "apple", "bar": 7, "baz": False}, ], schema=schema, ) upd = table.upsert(data_without_null, join_cols=["foo"]) assert upd.rows_updated == 1 assert upd.rows_inserted == 0 assert table.scan().to_arrow() == pa.Table.from_pylist( [ {"foo": "apple", "bar": 7, "baz": False}, {"foo": "banana", "bar": None, "baz": False}, ], schema=schema, ) ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org