Fokko commented on code in PR #921: URL: https://github.com/apache/iceberg-python/pull/921#discussion_r1677184500
########## pyiceberg/schema.py: ########## @@ -1616,3 +1621,145 @@ def _(file_type: FixedType, read_type: IcebergType) -> IcebergType: return read_type else: raise ResolveError(f"Cannot promote {file_type} to {read_type}") + + +def _check_schema_compatible(requested_schema: Schema, provided_schema: Schema) -> None: + """ + Check if the `provided_schema` is compatible with `requested_schema`. + + Both Schemas must have valid IDs and share the same ID for the same field names. + + Two schemas are considered compatible when: + 1. All `required` fields in `requested_schema` are present and are also `required` in the `provided_schema` + 2. Field Types are consistent for fields that are present in both schemas. I.e. the field type + in the `provided_schema` can be promoted to the field type of the same field ID in `requested_schema` + + Raises: + ValueError: If the schemas are not compatible. + """ + visit(requested_schema, _SchemaCompatibilityVisitor(provided_schema)) + + # from rich.console import Console + # from rich.table import Table as RichTable + + # console = Console(record=True) + + # rich_table = RichTable(show_header=True, header_style="bold") + # rich_table.add_column("") + # rich_table.add_column("Table field") + # rich_table.add_column("Dataframe field") + + # is_compatible = True + + # for field_id in requested_schema.field_ids: + # lhs = requested_schema.find_field(field_id) + # try: + # rhs = provided_schema.find_field(field_id) + # except ValueError: + # if lhs.required: + # rich_table.add_row("❌", str(lhs), "Missing") + # is_compatible = False + # else: + # rich_table.add_row("✅", str(lhs), "Missing") + # continue + + # if lhs.required and not rhs.required: + # rich_table.add_row("❌", str(lhs), "Missing") + # is_compatible = False + + # if lhs.field_type == rhs.field_type: + # rich_table.add_row("✅", str(lhs), str(rhs)) + # continue + # elif any( + # (isinstance(lhs.field_type, container_type) and isinstance(rhs.field_type, container_type)) + # for container_type in {StructType, MapType, ListType} + # ): + # rich_table.add_row("✅", str(lhs), str(rhs)) + # continue + # else: + # try: + # promote(rhs.field_type, lhs.field_type) + # rich_table.add_row("✅", str(lhs), str(rhs)) + # except ResolveError: + # rich_table.add_row("❌", str(lhs), str(rhs)) + # is_compatible = False + + # if not is_compatible: + # console.print(rich_table) + # raise ValueError(f"Mismatch in fields:\n{console.export_text()}") + + +class _SchemaCompatibilityVisitor(SchemaVisitor[bool]): Review Comment: Yes, I think pre-order makes sense here 👍 -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org