syun64 commented on code in PR #433: URL: https://github.com/apache/iceberg-python/pull/433#discussion_r1491658399
########## pyiceberg/schema.py: ########## @@ -1213,34 +1213,50 @@ def build_position_accessors(schema_or_type: Union[Schema, IcebergType]) -> Dict return visit(schema_or_type, _BuildPositionAccessors()) -def assign_fresh_schema_ids(schema_or_type: Union[Schema, IcebergType], next_id: Optional[Callable[[], int]] = None) -> Schema: - """Traverses the schema, and sets new IDs.""" - return pre_order_visit(schema_or_type, _SetFreshIDs(next_id_func=next_id)) +def assign_fresh_schema_ids(schema_or_type: Union[Schema, IcebergType], base_schema: Schema = None, next_id: Optional[Callable[[], int]] = None) -> Schema: + """Traverses the schema and assigns IDs from the base_schema, and fresh IDs from either the next_id function + or monotonically increasing IDs""" + visiting_schema = schema_or_type if isinstance(schema_or_type, Schema) else None + return pre_order_visit(schema_or_type, _SetFreshIDs(visiting_schema=visiting_schema, base_schema=base_schema, next_id_func=next_id)) class _SetFreshIDs(PreOrderSchemaVisitor[IcebergType]): - """Traverses the schema and assigns monotonically increasing ids.""" + """Assigns IDs from the base_schema, and fresh IDs from either the next_id function or monotonically increasing IDs""" - old_id_to_new_id: Dict[int, int] + name_to_id: Dict[str, int] - def __init__(self, next_id_func: Optional[Callable[[], int]] = None) -> None: - self.old_id_to_new_id = {} - counter = itertools.count(1) + def __init__(self, visiting_schema: Optional[Schema] = None, base_schema: Optional[Schema] = None, next_id_func: Optional[Callable[[], int]] = None) -> None: + self.name_to_id = {} + self.visiting_schema: Schema = visiting_schema Review Comment: I think I understand why we are using the visiting_schema, but it still feels a counter-intuitive that we are using the Visiting Schema separately in order to check the name of the field when we are already traversing through the visiting schema itself through this visitor - is there a better way to get the name of the field we already have in hand? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org