Fokko commented on code in PR #8374:
URL: https://github.com/apache/iceberg/pull/8374#discussion_r1306717140
##########
python/pyiceberg/table/__init__.py:
##########
@@ -934,206 +963,611 @@ def case_sensitive(self, case_sensitive: bool) ->
UpdateSchema:
return self
def add_column(
- self, name: str, type_var: IcebergType, doc: Optional[str] = None,
parent: Optional[str] = None, required: bool = False
+ self, path: Union[str, Tuple[str, ...]], field_type: IcebergType, doc:
Optional[str] = None, required: bool = False
) -> UpdateSchema:
"""Add a new column to a nested struct or Add a new top-level column.
Args:
- name: Name for the new column.
- type_var: Type for the new column.
+ path: Name for the new column.
+ field_type: Type for the new column.
doc: Documentation string for the new column.
- parent: Name of the parent struct to the column will be added to.
required: Whether the new column is required.
Returns:
- This for method chaining
+ This for method chaining.
"""
- if "." in name:
- raise ValueError(f"Cannot add column with ambiguous name: {name}")
+ path = (path,) if isinstance(path, str) else path
+
+ if "." in path[-1]:
+ raise ValueError(f"Cannot add column with ambiguous name:
{path[-1]}, provide a tuple instead")
if required and not self._allow_incompatible_changes:
# Table format version 1 and 2 cannot add required column because
there is no initial value
- raise ValueError(f"Incompatible change: cannot add required
column: {name}")
+ raise ValueError(f'Incompatible change: cannot add required
column: {".".join(path)}')
+
+ name = path[-1]
+ parent = path[:-1]
+
+ full_name = ".".join(path)
+ parent_id: int = TABLE_ROOT_ID
+
+ if len(parent) > 0:
+ parent_field = self._schema.find_field(".".join(parent),
self._case_sensitive)
+ parent_type = parent_field.field_type
+ if isinstance(parent_type, MapType):
+ parent_field = parent_type.value_field
+ elif isinstance(parent_type, ListType):
+ parent_field = parent_type.element_field
+
+ if not parent_field.field_type.is_struct:
+ raise ValueError(f"Cannot add column '{name}' to non-struct
type: {'.'.join(parent)}")
+
+ parent_id = parent_field.field_id
+
+ exists = False
+ try:
+ exists = self._schema.find_field(full_name, self._case_sensitive)
is not None
+ except ValueError:
+ pass
+
+ if exists:
+ raise ValueError(f"Cannot add column, name already exists:
{full_name}")
+
+ # assign new IDs in order
+ new_id = self.assign_new_column_id()
+
+ # update tracking for moves
+ self._added_name_to_id[full_name] = new_id
Review Comment:
Good one, added! The `_id_to_parent` visitor is part of
https://github.com/apache/iceberg/pull/8393
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]