sungwy commented on code in PR #1083: URL: https://github.com/apache/iceberg-python/pull/1083#discussion_r1727268386
########## pyproject.toml: ########## @@ -16,7 +16,7 @@ # under the License. [tool.poetry] name = "pyiceberg" -version = "0.7.1" +version = "0.7.2" Review Comment: We will update this version when and if we release 0.7.2 ```suggestion version = "0.7.1" ``` ########## tests/table/test_init.py: ########## @@ -512,6 +512,29 @@ def test_add_column(table_v2: Table) -> None: assert apply_schema.highest_field_id == 4 +def test_update_column_doc(table_v1: Table, table_v2: Table) -> None: + COMMENT2 = "comment2" + for table in [table_v1, table_v2]: + original_schema = table.schema() + # update existing doc to a new doc + assert original_schema.find_field("y").doc == "comment" + new_schema = table.transaction().update_schema().update_column("y", doc=COMMENT2)._apply() + assert new_schema.find_field("y").doc == COMMENT2, "failed to update existing field doc" + + # update existing doc to an emtpy string + assert new_schema.find_field("y").doc == COMMENT2 + new_schema2 = table.transaction().update_schema().update_column("y", doc="")._apply() + assert new_schema2.find_field("y").doc == "", "failed to remove existing field doc" + + # assert the above two updates also works with union_by_name + assert ( + table.update_schema().union_by_name(new_schema)._apply() == new_schema + ), "failed to update existing field doc with union_by_name" + assert ( + table.update_schema().union_by_name(new_schema2)._apply() == new_schema2 + ), "failed to remove existing field doc with union_by_name" + + Review Comment: Could we add a case to verify your fix for the `required` field attribute as well? ########## pyiceberg/table/__init__.py: ########## @@ -2492,21 +2492,22 @@ def update_column( except ResolveError as e: raise ValidationError(f"Cannot change column type: {full_name}: {field.field_type} -> {field_type}") from e + # if other updates for the same field exist in one transaction: if updated := self._updates.get(field.field_id): self._updates[field.field_id] = NestedField( field_id=updated.field_id, name=updated.name, field_type=field_type or updated.field_type, - doc=doc or updated.doc, - required=updated.required, + doc=doc if doc is not None else updated.doc, + required=required or updated.required, Review Comment: 😱 This looks correct to me. Thank you for the fix! -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org