Fokko commented on code in PR #921:
URL: https://github.com/apache/iceberg-python/pull/921#discussion_r1679424345
##########
tests/integration/test_add_files.py:
##########
@@ -617,3 +607,92 @@ def
test_add_files_with_timestamp_tz_ns_fails(session_catalog: Catalog, format_v
),
):
tbl.add_files(file_paths=[file_path])
+
+
[email protected]
[email protected]("format_version", [1, 2])
+def test_table_write_schema_with_valid_nullability_diff(
+ spark: SparkSession, session_catalog: Catalog, format_version: int
+) -> None:
+ identifier =
f"default.test_table_write_with_valid_nullability_diff{format_version}"
+ table_schema = Schema(
+ NestedField(field_id=1, name="long", field_type=LongType(),
required=False),
+ )
+ other_schema = pa.schema((
+ pa.field("long", pa.int64(), nullable=False), # can support writing
required pyarrow field to optional Iceberg field
+ ))
+ arrow_table = pa.Table.from_pydict(
+ {
+ "long": [1, 9],
+ },
+ schema=other_schema,
+ )
+ tbl = _create_table(session_catalog, identifier, format_version,
schema=table_schema)
+
+ file_path =
f"s3://warehouse/default/test_valid_nullability_diff/v{format_version}/test.parquet"
+ # write parquet files
+ fo = tbl.io.new_output(file_path)
+ with fo.create(overwrite=True) as fos:
+ with pq.ParquetWriter(fos, schema=other_schema) as writer:
+ writer.write_table(arrow_table)
+
+ tbl.add_files(file_paths=[file_path])
+ # table's long field should cast to be optional on read
+ written_arrow_table = tbl.scan().to_arrow()
+ assert written_arrow_table == arrow_table.cast(pa.schema((pa.field("long",
pa.int64(), nullable=True),)))
+ lhs = spark.table(f"{identifier}").toPandas()
+ rhs = written_arrow_table.to_pandas()
+
+ for column in written_arrow_table.column_names:
+ for left, right in zip(lhs[column].to_list(), rhs[column].to_list()):
+ assert left == right
+
+
[email protected]
[email protected]("format_version", [1, 2])
+def test_table_write_schema_with_valid_upcast(
+ spark: SparkSession,
+ session_catalog: Catalog,
+ format_version: int,
+ table_schema_with_promoted_types: Schema,
+ pyarrow_schema_with_promoted_types: pa.Schema,
+ pyarrow_table_with_promoted_types: pa.Table,
+) -> None:
+ identifier = f"default.test_table_write_with_valid_upcast{format_version}"
+ tbl = _create_table(session_catalog, identifier, format_version,
schema=table_schema_with_promoted_types)
+
+ file_path =
f"s3://warehouse/default/test_valid_nullability_diff/v{format_version}/test.parquet"
Review Comment:
```suggestion
file_path =
f"s3://warehouse/default/test_table_write_schema_with_valid_upcast/v{format_version}/test.parquet"
```
##########
tests/integration/test_add_files.py:
##########
@@ -617,3 +607,92 @@ def
test_add_files_with_timestamp_tz_ns_fails(session_catalog: Catalog, format_v
),
):
tbl.add_files(file_paths=[file_path])
+
+
[email protected]
[email protected]("format_version", [1, 2])
+def test_table_write_schema_with_valid_nullability_diff(
+ spark: SparkSession, session_catalog: Catalog, format_version: int
+) -> None:
+ identifier =
f"default.test_table_write_with_valid_nullability_diff{format_version}"
+ table_schema = Schema(
+ NestedField(field_id=1, name="long", field_type=LongType(),
required=False),
+ )
+ other_schema = pa.schema((
+ pa.field("long", pa.int64(), nullable=False), # can support writing
required pyarrow field to optional Iceberg field
+ ))
+ arrow_table = pa.Table.from_pydict(
+ {
+ "long": [1, 9],
+ },
+ schema=other_schema,
+ )
+ tbl = _create_table(session_catalog, identifier, format_version,
schema=table_schema)
+
+ file_path =
f"s3://warehouse/default/test_valid_nullability_diff/v{format_version}/test.parquet"
Review Comment:
```suggestion
file_path =
f"s3://warehouse/default/test_table_write_schema_with_valid_nullability_diff/v{format_version}/test.parquet"
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]