Fokko commented on code in PR #829: URL: https://github.com/apache/iceberg-python/pull/829#discussion_r1669089423
########## pyiceberg/table/__init__.py: ########## @@ -158,9 +158,11 @@ def _check_schema_compatible(table_schema: Schema, other_schema: "pa.Schema") -> None: """ - Check if the `table_schema` is compatible with `other_schema`. + Check if the `table_schema` is compatible with `other_schema` in terms of the Iceberg Schema representation. - Two schemas are considered compatible when they are equal in terms of the Iceberg Schema type. + The schemas are compatible if: + - All fields in `other_schema` are present in `table_schema`. (other_schema <= table_schema) + - All required fields in `table_schema` are present in `other_schema`. Review Comment: Just a heads up, with V3 this changes since it is allowed to add required fields with a default value. ########## pyiceberg/table/__init__.py: ########## @@ -169,15 +171,18 @@ def _check_schema_compatible(table_schema: Schema, other_schema: "pa.Schema") -> name_mapping = table_schema.name_mapping try: - task_schema = pyarrow_to_schema(other_schema, name_mapping=name_mapping) + other_schema = pyarrow_to_schema(other_schema, name_mapping=name_mapping) except ValueError as e: other_schema = _pyarrow_to_schema_without_ids(other_schema) additional_names = set(other_schema.column_names) - set(table_schema.column_names) raise ValueError( f"PyArrow table contains more columns: {', '.join(sorted(additional_names))}. Update the schema first (hint, use union_by_name)." ) from e - if table_schema.as_struct() != task_schema.as_struct(): + fields_missing_from_table = {field for field in other_schema.fields if field not in table_schema.fields} Review Comment: Can you check if it handles nested structs as well? ########## pyiceberg/table/__init__.py: ########## @@ -158,9 +158,11 @@ def _check_schema_compatible(table_schema: Schema, other_schema: "pa.Schema") -> None: """ - Check if the `table_schema` is compatible with `other_schema`. + Check if the `table_schema` is compatible with `other_schema` in terms of the Iceberg Schema representation. - Two schemas are considered compatible when they are equal in terms of the Iceberg Schema type. + The schemas are compatible if: + - All fields in `other_schema` are present in `table_schema`. (other_schema <= table_schema) Review Comment: ```suggestion - All fields in `other_schema` are present in `table_schema`. (`other_schema ⊆ table_schema`) ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org