Fokko commented on code in PR #469: URL: https://github.com/apache/iceberg-python/pull/469#discussion_r1502723574
########## pyiceberg/table/metadata.py: ########## @@ -178,6 +178,12 @@ class TableMetadataCommonFields(IcebergBaseModel): to be used for arbitrary metadata. For example, commit.retry.num-retries is used to control the number of commit retries.""" + @field_validator("properties", mode='before') + @classmethod + def transform_dict_value_to_str(cls, dict: Dict[str, Any]) -> Dict[str, str]: + assert None not in dict.values(), "None type is not a supported value in properties" Review Comment: We try to avoid asserts outside of `tests/`. Could you raise a `ValueError` instead? ########## tests/catalog/test_sql.py: ########## @@ -851,3 +852,39 @@ def test_concurrent_commit_table(catalog: SqlCatalog, table_schema_simple: Schem # This one should fail since it already has been updated with table_b.update_schema() as update: update.add_column(path="c", field_type=IntegerType()) + + +@pytest.mark.parametrize( + 'catalog', + [ + lazy_fixture('catalog_memory'), + lazy_fixture('catalog_sqlite'), + lazy_fixture('catalog_sqlite_without_rowcount'), + ], +) +def test_table_properties_int_value(catalog: SqlCatalog, table_schema_simple: Schema, random_identifier: Identifier) -> None: + # table properties can be set to int, but still serialized to string + database_name, _table_name = random_identifier + catalog.create_namespace(database_name) + property_with_int = {"property_name": 42} + table = catalog.create_table(random_identifier, table_schema_simple, properties=property_with_int) + assert isinstance(table.properties["property_name"], str) + + +@pytest.mark.parametrize( + 'catalog', + [ + lazy_fixture('catalog_memory'), + lazy_fixture('catalog_sqlite'), + lazy_fixture('catalog_sqlite_without_rowcount'), + ], +) +def test_table_properties_raise_for_none_value( + catalog: SqlCatalog, table_schema_simple: Schema, random_identifier: Identifier +) -> None: + database_name, _table_name = random_identifier + catalog.create_namespace(database_name) + property_with_none = {"property_name": None} + with pytest.raises(ValidationError) as exc_info: Review Comment: I would expect an exception being thrown by the field validator, instead of Pydantic itself. ########## tests/integration/test_writes.py: ########## @@ -676,3 +676,37 @@ def test_write_and_evolve(session_catalog: Catalog, format_version: int) -> None with txn.update_snapshot().fast_append() as snapshot_update: for data_file in _dataframe_to_data_files(table=tbl, df=pa_table_with_column, file_schema=txn.schema()): snapshot_update.append_data_file(data_file) + + +@pytest.mark.integration +@pytest.mark.parametrize("format_version", [1, 2]) +def test_table_properties_int_value( + session_catalog: Catalog, + arrow_table_with_null: pa.Table, + format_version: str, +) -> None: + # table properties can be set to int, but still serialized to string + property_with_int = {"property_name": 42} + identifier = "default.test_table_properties_int_value" + + tbl = _create_table( + session_catalog, identifier, {"format-version": format_version, **property_with_int}, [arrow_table_with_null] + ) + assert isinstance(tbl.properties["property_name"], str) + + +@pytest.mark.integration +@pytest.mark.parametrize("format_version", [1, 2]) +def test_table_properties_raise_for_none_value( + session_catalog: Catalog, + arrow_table_with_null: pa.Table, + format_version: str, Review Comment: ```suggestion format_version: int, ``` ########## tests/integration/test_writes.py: ########## @@ -676,3 +676,37 @@ def test_write_and_evolve(session_catalog: Catalog, format_version: int) -> None with txn.update_snapshot().fast_append() as snapshot_update: for data_file in _dataframe_to_data_files(table=tbl, df=pa_table_with_column, file_schema=txn.schema()): snapshot_update.append_data_file(data_file) + + +@pytest.mark.integration +@pytest.mark.parametrize("format_version", [1, 2]) +def test_table_properties_int_value( + session_catalog: Catalog, + arrow_table_with_null: pa.Table, + format_version: str, +) -> None: + # table properties can be set to int, but still serialized to string + property_with_int = {"property_name": 42} + identifier = "default.test_table_properties_int_value" + + tbl = _create_table( + session_catalog, identifier, {"format-version": format_version, **property_with_int}, [arrow_table_with_null] + ) + assert isinstance(tbl.properties["property_name"], str) + + +@pytest.mark.integration +@pytest.mark.parametrize("format_version", [1, 2]) +def test_table_properties_raise_for_none_value( + session_catalog: Catalog, + arrow_table_with_null: pa.Table, + format_version: str, +) -> None: + property_with_none = {"property_name": None} + identifier = "default.test_table_properties_raise_for_none_value" + + with pytest.raises(ServerError) as exc_info: + _ = _create_table( + session_catalog, identifier, {"format-version": format_version, **property_with_none}, [arrow_table_with_null] + ) + assert "NullPointerException: null value in entry: property_name=null" in str(exc_info.value) Review Comment: I don't think we accept null values. Probably we want to catch this in PyIceberg before doing the request. Other backends might have different behavior so we want to make sure that we follow the correct behavior in the client itself. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org