kevinjqliu commented on code in PR #590: URL: https://github.com/apache/iceberg-python/pull/590#discussion_r1563557100
########## tests/integration/test_writes/test_writes.py: ########## @@ -270,6 +270,48 @@ def get_current_snapshot_id(identifier: str) -> int: assert tbl.current_snapshot().snapshot_id == get_current_snapshot_id(identifier) # type: ignore +@pytest.mark.integration +@pytest.mark.parametrize("format_version", [1, 2]) +def test_python_writes_special_character_column_with_spark_reads( + spark: SparkSession, session_catalog: Catalog, format_version: int +) -> None: + identifier = "default.python_writes_special_character_column_with_spark_reads" + column_name_with_special_character = "letter/abc" + TEST_DATA_WITH_SPECIAL_CHARACTER_COLUMN = { + column_name_with_special_character: ['a', None, 'z'], + 'id': [1, 2, 3], + 'name': ['AB', 'CD', 'EF'], + 'address': [ + {'street': '123', 'city': 'SFO', 'zip': 12345, column_name_with_special_character: 'a'}, + {'street': '456', 'city': 'SW', 'zip': 67890, column_name_with_special_character: 'b'}, + {'street': '789', 'city': 'Random', 'zip': 10112, column_name_with_special_character: 'c'}, + ], + } + pa_schema = pa.schema([ + pa.field(column_name_with_special_character, pa.string()), + pa.field('id', pa.int32()), + pa.field('name', pa.string()), + pa.field( + 'address', + pa.struct([ + pa.field('street', pa.string()), + pa.field('city', pa.string()), + pa.field('zip', pa.int32()), + pa.field(column_name_with_special_character, pa.string()), + ]), + ), + ]) + arrow_table_with_special_character_column = pa.Table.from_pydict(TEST_DATA_WITH_SPECIAL_CHARACTER_COLUMN, schema=pa_schema) + tbl = _create_table(session_catalog, identifier, {"format-version": format_version}, schema=pa_schema) + + tbl.overwrite(arrow_table_with_special_character_column) + # PySpark toPandas() turns nested field into tuple by default, but returns the proper schema when Arrow is enabled + spark.conf.set("spark.sql.execution.arrow.pyspark.enabled", "true") Review Comment: good catch! i didn't know about the fixture scope behavior. Moved to conftest -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org