Fokko commented on code in PR #921: URL: https://github.com/apache/iceberg-python/pull/921#discussion_r1678311441
########## pyiceberg/io/pyarrow.py: ########## @@ -2070,47 +2072,30 @@ def bin_pack_arrow_table(tbl: pa.Table, target_file_size: int) -> Iterator[List[ return bin_packed_record_batches -def _check_schema_compatible(table_schema: Schema, other_schema: pa.Schema, downcast_ns_timestamp_to_us: bool = False) -> None: +def _check_pyarrow_schema_compatible( + requested_schema: Schema, provided_schema: pa.Schema, downcast_ns_timestamp_to_us: bool = False +) -> None: """ - Check if the `table_schema` is compatible with `other_schema`. + Check if the `requested_schema` is compatible with `provided_schema`. Two schemas are considered compatible when they are equal in terms of the Iceberg Schema type. Raises: ValueError: If the schemas are not compatible. """ - name_mapping = table_schema.name_mapping + name_mapping = requested_schema.name_mapping try: - task_schema = pyarrow_to_schema( - other_schema, name_mapping=name_mapping, downcast_ns_timestamp_to_us=downcast_ns_timestamp_to_us + provided_schema = pyarrow_to_schema( + provided_schema, name_mapping=name_mapping, downcast_ns_timestamp_to_us=downcast_ns_timestamp_to_us ) except ValueError as e: - other_schema = _pyarrow_to_schema_without_ids(other_schema, downcast_ns_timestamp_to_us=downcast_ns_timestamp_to_us) - additional_names = set(other_schema.column_names) - set(table_schema.column_names) + provided_schema = _pyarrow_to_schema_without_ids(provided_schema, downcast_ns_timestamp_to_us=downcast_ns_timestamp_to_us) + additional_names = set(provided_schema._name_to_id.keys()) - set(requested_schema._name_to_id.keys()) raise ValueError( Review Comment: I think `ValueError` is the best here: _Raised when an operation or function receives an argument that has the right type but an inappropriate value_. In this case, we're passing in a `pa.Table`, but it has too many columns. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org