Fokko commented on code in PR #921: URL: https://github.com/apache/iceberg-python/pull/921#discussion_r1678314538
########## pyiceberg/schema.py: ########## @@ -1616,3 +1616,103 @@ def _(file_type: FixedType, read_type: IcebergType) -> IcebergType: return read_type else: raise ResolveError(f"Cannot promote {file_type} to {read_type}") + + +def _check_schema_compatible(requested_schema: Schema, provided_schema: Schema) -> None: + """ + Check if the `provided_schema` is compatible with `requested_schema`. + + Both Schemas must have valid IDs and share the same ID for the same field names. + + Two schemas are considered compatible when: + 1. All `required` fields in `requested_schema` are present and are also `required` in the `provided_schema` + 2. Field Types are consistent for fields that are present in both schemas. I.e. the field type + in the `provided_schema` can be promoted to the field type of the same field ID in `requested_schema` + + Raises: + ValueError: If the schemas are not compatible. + """ + pre_order_visit(requested_schema, _SchemaCompatibilityVisitor(provided_schema)) + + +class _SchemaCompatibilityVisitor(PreOrderSchemaVisitor[bool]): + provided_schema: Schema + + def __init__(self, provided_schema: Schema): + from rich.console import Console + from rich.table import Table as RichTable + + self.provided_schema = provided_schema + self.rich_table = RichTable(show_header=True, header_style="bold") + self.rich_table.add_column("") + self.rich_table.add_column("Table field") + self.rich_table.add_column("Dataframe field") + self.console = Console(record=True) + + def _is_field_compatible(self, lhs: NestedField) -> bool: + # Validate nullability first. + # An optional field can be missing in the provided schema + # But a required field must exist as a required field + try: + rhs = self.provided_schema.find_field(lhs.field_id) + except ValueError: + if lhs.required: + self.rich_table.add_row("❌", str(lhs), "Missing") + return False + else: + self.rich_table.add_row("✅", str(lhs), "Missing") + return True + + if lhs.required and not rhs.required: + self.rich_table.add_row("❌", str(lhs), str(rhs)) + return False + + # Check type compatibility + if lhs.field_type == rhs.field_type: + self.rich_table.add_row("✅", str(lhs), str(rhs)) + return True + # We only check that the parent node is also of the same type. + # We check the type of the child nodes when we traverse them later. + elif any( + (isinstance(lhs.field_type, container_type) and isinstance(rhs.field_type, container_type)) + for container_type in {StructType, MapType, ListType} + ): Review Comment: Ah, yes I see -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org