Fokko commented on code in PR #441: URL: https://github.com/apache/iceberg-python/pull/441#discussion_r1493898782
########## pyiceberg/table/__init__.py: ########## @@ -1932,6 +1928,13 @@ def commit(self) -> None: else: updates = (SetCurrentSchemaUpdate(schema_id=existing_schema_id),) # type: ignore + if name_mapping := self._table.name_mapping(): + updated_name_mapping = update_mapping(name_mapping, self._updates, self._adds) + updates += ( # type: ignore + SetPropertiesUpdate(updates={TableProperties.DEFAULT_NAME_MAPPING: updated_name_mapping.model_dump_json()}), + ) + print(f"DEBUG: {updates=}") Review Comment: :) ```suggestion ``` ########## tests/table/test_name_mapping.py: ########## @@ -238,3 +245,67 @@ def test_mapping_lookup_by_name(table_name_mapping_nested: NameMapping) -> None: with pytest.raises(ValueError, match="Could not find field with name: boom"): table_name_mapping_nested.find("boom") + + +def test_invalid_mapped_field() -> None: + with pytest.raises(ValueError): + MappedField(field_id=1, names=[]) + + +def test_update_mapping_no_updates_or_adds(table_name_mapping_nested: NameMapping) -> None: + assert update_mapping(table_name_mapping_nested, {}, {}) == table_name_mapping_nested + + +def test_update_mapping(table_name_mapping_nested: NameMapping) -> None: + updates = {1: NestedField(1, "foo_update", StringType(), True)} + adds = { + -1: [NestedField(18, "add_18", StringType(), True)], + 15: [NestedField(19, "name", StringType(), True), NestedField(20, "add_20", StringType(), True)], + } Review Comment: Should we also have checks in place when you add an existing ID? ########## pyiceberg/table/name_mapping.py: ########## @@ -175,5 +188,72 @@ def primitive(self, primitive: PrimitiveType) -> List[MappedField]: return [] +class _UpdateMapping(NameMappingVisitor[List[MappedField], MappedField]): + _updates: Dict[int, NestedField] + _adds: Dict[int, List[NestedField]] + + def __init__(self, updates: Dict[int, NestedField], adds: Dict[int, List[NestedField]]): + self._updates = updates + self._adds = adds + + @staticmethod + def _remove_reassigned_names(field: MappedField, assignments: Dict[str, int]) -> Optional[MappedField]: + removed_names = set() + for name in field.names: + if (assigned_id := assignments.get(name)) and assigned_id != field.field_id: + removed_names.add(name) + + remaining_names = [f for f in field.names if f not in removed_names] + if remaining_names: + return MappedField(field_id=field.field_id, names=remaining_names, fields=field.fields) + else: + return None + + def _add_new_fields(self, mapped_fields: List[MappedField], parent_id: int) -> List[MappedField]: + if fields_to_add := self._adds.get(parent_id): + fields: List[MappedField] = [] + new_fields: List[MappedField] = [] + + for add in fields_to_add: + new_fields.append( + MappedField(field_id=add.field_id, names=[add.name], fields=visit(add.field_type, _CreateMapping())) + ) + + reassignments = {f.name: f.field_id for f in fields_to_add} + fields = [ + updated_field + for field in mapped_fields + if (updated_field := self._remove_reassigned_names(field, reassignments)) is not None + ] + fields.extend(new_fields) Review Comment: Style nit: ```suggestion fields = [ updated_field for field in mapped_fields if (updated_field := self._remove_reassigned_names(field, reassignments)) is not None ] + new_fields ``` ########## tests/table/test_name_mapping.py: ########## @@ -238,3 +245,67 @@ def test_mapping_lookup_by_name(table_name_mapping_nested: NameMapping) -> None: with pytest.raises(ValueError, match="Could not find field with name: boom"): table_name_mapping_nested.find("boom") + + +def test_invalid_mapped_field() -> None: + with pytest.raises(ValueError): + MappedField(field_id=1, names=[]) + + +def test_update_mapping_no_updates_or_adds(table_name_mapping_nested: NameMapping) -> None: + assert update_mapping(table_name_mapping_nested, {}, {}) == table_name_mapping_nested + + +def test_update_mapping(table_name_mapping_nested: NameMapping) -> None: + updates = {1: NestedField(1, "foo_update", StringType(), True)} + adds = { + -1: [NestedField(18, "add_18", StringType(), True)], + 15: [NestedField(19, "name", StringType(), True), NestedField(20, "add_20", StringType(), True)], + } Review Comment: Or when you reassign an existing name? ########## pyiceberg/table/name_mapping.py: ########## @@ -45,6 +45,18 @@ class MappedField(IcebergBaseModel): def convert_null_to_empty_List(cls, v: Any) -> Any: return v or [] + @field_validator('names', mode='before') Review Comment: Typically we want to do these kinds of checks in the `after` mode, to make sure that all the other checks are being done. For example, this will also run if you pass in a `str` as names, and then it will give a less obvious error than the type check. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org