Fokko commented on code in PR #469:
URL: https://github.com/apache/iceberg-python/pull/469#discussion_r1502723574


##########
pyiceberg/table/metadata.py:
##########
@@ -178,6 +178,12 @@ class TableMetadataCommonFields(IcebergBaseModel):
     to be used for arbitrary metadata. For example, commit.retry.num-retries
     is used to control the number of commit retries."""
 
+    @field_validator("properties", mode='before')
+    @classmethod
+    def transform_dict_value_to_str(cls, dict: Dict[str, Any]) -> Dict[str, 
str]:
+        assert None not in dict.values(), "None type is not a supported value 
in properties"

Review Comment:
   We try to avoid asserts outside of `tests/`. Could you raise a `ValueError` 
instead?



##########
tests/catalog/test_sql.py:
##########
@@ -851,3 +852,39 @@ def test_concurrent_commit_table(catalog: SqlCatalog, 
table_schema_simple: Schem
         # This one should fail since it already has been updated
         with table_b.update_schema() as update:
             update.add_column(path="c", field_type=IntegerType())
+
+
+@pytest.mark.parametrize(
+    'catalog',
+    [
+        lazy_fixture('catalog_memory'),
+        lazy_fixture('catalog_sqlite'),
+        lazy_fixture('catalog_sqlite_without_rowcount'),
+    ],
+)
+def test_table_properties_int_value(catalog: SqlCatalog, table_schema_simple: 
Schema, random_identifier: Identifier) -> None:
+    # table properties can be set to int, but still serialized to string
+    database_name, _table_name = random_identifier
+    catalog.create_namespace(database_name)
+    property_with_int = {"property_name": 42}
+    table = catalog.create_table(random_identifier, table_schema_simple, 
properties=property_with_int)
+    assert isinstance(table.properties["property_name"], str)
+
+
+@pytest.mark.parametrize(
+    'catalog',
+    [
+        lazy_fixture('catalog_memory'),
+        lazy_fixture('catalog_sqlite'),
+        lazy_fixture('catalog_sqlite_without_rowcount'),
+    ],
+)
+def test_table_properties_raise_for_none_value(
+    catalog: SqlCatalog, table_schema_simple: Schema, random_identifier: 
Identifier
+) -> None:
+    database_name, _table_name = random_identifier
+    catalog.create_namespace(database_name)
+    property_with_none = {"property_name": None}
+    with pytest.raises(ValidationError) as exc_info:

Review Comment:
   I would expect an exception being thrown by the field validator, instead of 
Pydantic itself.



##########
tests/integration/test_writes.py:
##########
@@ -676,3 +676,37 @@ def test_write_and_evolve(session_catalog: Catalog, 
format_version: int) -> None
         with txn.update_snapshot().fast_append() as snapshot_update:
             for data_file in _dataframe_to_data_files(table=tbl, 
df=pa_table_with_column, file_schema=txn.schema()):
                 snapshot_update.append_data_file(data_file)
+
+
+@pytest.mark.integration
+@pytest.mark.parametrize("format_version", [1, 2])
+def test_table_properties_int_value(
+    session_catalog: Catalog,
+    arrow_table_with_null: pa.Table,
+    format_version: str,
+) -> None:
+    # table properties can be set to int, but still serialized to string
+    property_with_int = {"property_name": 42}
+    identifier = "default.test_table_properties_int_value"
+
+    tbl = _create_table(
+        session_catalog, identifier, {"format-version": format_version, 
**property_with_int}, [arrow_table_with_null]
+    )
+    assert isinstance(tbl.properties["property_name"], str)
+
+
+@pytest.mark.integration
+@pytest.mark.parametrize("format_version", [1, 2])
+def test_table_properties_raise_for_none_value(
+    session_catalog: Catalog,
+    arrow_table_with_null: pa.Table,
+    format_version: str,

Review Comment:
   ```suggestion
       format_version: int,
   ```



##########
tests/integration/test_writes.py:
##########
@@ -676,3 +676,37 @@ def test_write_and_evolve(session_catalog: Catalog, 
format_version: int) -> None
         with txn.update_snapshot().fast_append() as snapshot_update:
             for data_file in _dataframe_to_data_files(table=tbl, 
df=pa_table_with_column, file_schema=txn.schema()):
                 snapshot_update.append_data_file(data_file)
+
+
+@pytest.mark.integration
+@pytest.mark.parametrize("format_version", [1, 2])
+def test_table_properties_int_value(
+    session_catalog: Catalog,
+    arrow_table_with_null: pa.Table,
+    format_version: str,
+) -> None:
+    # table properties can be set to int, but still serialized to string
+    property_with_int = {"property_name": 42}
+    identifier = "default.test_table_properties_int_value"
+
+    tbl = _create_table(
+        session_catalog, identifier, {"format-version": format_version, 
**property_with_int}, [arrow_table_with_null]
+    )
+    assert isinstance(tbl.properties["property_name"], str)
+
+
+@pytest.mark.integration
+@pytest.mark.parametrize("format_version", [1, 2])
+def test_table_properties_raise_for_none_value(
+    session_catalog: Catalog,
+    arrow_table_with_null: pa.Table,
+    format_version: str,
+) -> None:
+    property_with_none = {"property_name": None}
+    identifier = "default.test_table_properties_raise_for_none_value"
+
+    with pytest.raises(ServerError) as exc_info:
+        _ = _create_table(
+            session_catalog, identifier, {"format-version": format_version, 
**property_with_none}, [arrow_table_with_null]
+        )
+    assert "NullPointerException: null value in entry: property_name=null" in 
str(exc_info.value)

Review Comment:
   I don't think we accept null values. Probably we want to catch this in 
PyIceberg before doing the request. Other backends might have different 
behavior so we want to make sure that we follow the correct behavior in the 
client itself.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org
For additional commands, e-mail: issues-h...@iceberg.apache.org

Reply via email to