fusion2222 commented on issue #1045: URL: https://github.com/apache/iceberg-python/issues/1045#issuecomment-2283725721
I also have patch for this ready, however it seems like I have no permissions to push a new branch and to create PR ```patch diff --git a/pyiceberg/io/pyarrow.py b/pyiceberg/io/pyarrow.py index b99c3b1..153b8a5 100644 --- a/pyiceberg/io/pyarrow.py +++ b/pyiceberg/io/pyarrow.py @@ -2303,6 +2303,8 @@ def _check_pyarrow_schema_compatible( def parquet_files_to_data_files(io: FileIO, table_metadata: TableMetadata, file_paths: Iterator[str]) -> Iterator[DataFile]: + from pyiceberg.table import DOWNCAST_NS_TIMESTAMP_TO_US_ON_WRITE + for file_path in file_paths: input_file = io.new_input(file_path) with input_file.open() as input_stream: @@ -2313,7 +2315,12 @@ def parquet_files_to_data_files(io: FileIO, table_metadata: TableMetadata, file_ f"Cannot add file {file_path} because it has field IDs. `add_files` only supports addition of files without field_ids" ) schema = table_metadata.schema() - _check_pyarrow_schema_compatible(schema, parquet_metadata.schema.to_arrow_schema()) + downcast_ns_timestamp_to_us = Config().get_bool(DOWNCAST_NS_TIMESTAMP_TO_US_ON_WRITE) or False + _check_pyarrow_schema_compatible( + schema, + parquet_metadata.schema.to_arrow_schema(), + downcast_ns_timestamp_to_us + ) statistics = data_file_statistics_from_parquet_metadata( parquet_metadata=parquet_metadata, ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org