syun64 commented on code in PR #433:
URL: https://github.com/apache/iceberg-python/pull/433#discussion_r1491658399


##########
pyiceberg/schema.py:
##########
@@ -1213,34 +1213,50 @@ def build_position_accessors(schema_or_type: 
Union[Schema, IcebergType]) -> Dict
     return visit(schema_or_type, _BuildPositionAccessors())
 
 
-def assign_fresh_schema_ids(schema_or_type: Union[Schema, IcebergType], 
next_id: Optional[Callable[[], int]] = None) -> Schema:
-    """Traverses the schema, and sets new IDs."""
-    return pre_order_visit(schema_or_type, _SetFreshIDs(next_id_func=next_id))
+def assign_fresh_schema_ids(schema_or_type: Union[Schema, IcebergType], 
base_schema: Schema = None, next_id: Optional[Callable[[], int]] = None) -> 
Schema:
+    """Traverses the schema and assigns IDs from the base_schema, and fresh 
IDs from either the next_id function 
+    or monotonically increasing IDs"""
+    visiting_schema = schema_or_type if isinstance(schema_or_type, Schema) 
else None
+    return pre_order_visit(schema_or_type, 
_SetFreshIDs(visiting_schema=visiting_schema, base_schema=base_schema, 
next_id_func=next_id))
 
 
 class _SetFreshIDs(PreOrderSchemaVisitor[IcebergType]):
-    """Traverses the schema and assigns monotonically increasing ids."""
+    """Assigns IDs from the base_schema, and fresh IDs from either the next_id 
function or monotonically increasing IDs"""
 
-    old_id_to_new_id: Dict[int, int]
+    name_to_id: Dict[str, int]
 
-    def __init__(self, next_id_func: Optional[Callable[[], int]] = None) -> 
None:
-        self.old_id_to_new_id = {}
-        counter = itertools.count(1)
+    def __init__(self, visiting_schema: Optional[Schema] = None, base_schema: 
Optional[Schema] = None, next_id_func: Optional[Callable[[], int]] = None) -> 
None:
+        self.name_to_id = {}
+        self.visiting_schema: Schema = visiting_schema

Review Comment:
   I think I understand why we are using the visiting_schema, but it still 
feels a counter-intuitive that we are using the Visiting Schema separately in 
order to check the name of the field when we are already traversing through the 
visiting schema itself through this visitor - is there a better way to get the 
name of the field we already have in hand?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org
For additional commands, e-mail: issues-h...@iceberg.apache.org

Reply via email to