Fokko commented on code in PR #728: URL: https://github.com/apache/iceberg-python/pull/728#discussion_r1623640157
########## pyiceberg/table/__init__.py: ########## @@ -340,6 +341,86 @@ def set_properties(self, properties: Properties = EMPTY_DICT, **kwargs: Any) -> updates = properties or kwargs return self._apply((SetPropertiesUpdate(updates=updates),)) + @deprecated( Review Comment: Are we adding new methods and deprecating them right away? ########## pyiceberg/table/__init__.py: ########## @@ -1806,6 +1891,85 @@ def __enter__(self) -> U: return self # type: ignore +class ManageSnapshots(UpdateTableMetadata["ManageSnapshots"]): + """Run snapshot management operations using APIs.""" + + _updates: Tuple[TableUpdate, ...] = () + _requirements: Tuple[TableRequirement, ...] = () + _parent_snapshot_id: Optional[int] + + def _commit(self) -> UpdatesAndRequirements: + """Apply the pending changes and commit.""" + if self._updates: + self._requirements += ( + AssertRefSnapshotId(snapshot_id=self._parent_snapshot_id, ref="main"), + AssertTableUUID(uuid=self._transaction.table_metadata.table_uuid), + ) + return self._updates, self._requirements + + def create_tag(self, snapshot_id: int, tag_name: str, max_ref_age_ms: Optional[int] = None) -> ManageSnapshots: + """ Review Comment: We try to stick to the Google style docstrings: https://sphinxcontrib-napoleon.readthedocs.io/en/latest/example_google.html ########## pyiceberg/table/__init__.py: ########## @@ -1806,6 +1891,85 @@ def __enter__(self) -> U: return self # type: ignore +class ManageSnapshots(UpdateTableMetadata["ManageSnapshots"]): Review Comment: Can you add docs so folks know how to use this? ########## pyiceberg/table/__init__.py: ########## @@ -340,6 +341,86 @@ def set_properties(self, properties: Properties = EMPTY_DICT, **kwargs: Any) -> updates = properties or kwargs return self._apply((SetPropertiesUpdate(updates=updates),)) + @deprecated( + deprecated_in="0.7.0", + removed_in="0.8.0", + help_message="Please use one of the functions in ManageSnapshots instead", + ) + def add_snapshot(self, snapshot: Snapshot) -> Transaction: + """Add a new snapshot to the table. + + Returns: + The transaction with the add-snapshot staged. + """ + updates = (AddSnapshotUpdate(snapshot=snapshot),) + requirements = (AssertTableUUID(uuid=self._table.metadata.table_uuid),) + + return self._apply(updates, requirements) + + @deprecated( + deprecated_in="0.7.0", + removed_in="0.8.0", + help_message="Please use one of the functions in ManageSnapshots instead", + ) + def set_ref_snapshot( + self, + snapshot_id: int, + parent_snapshot_id: Optional[int], + ref_name: str, + type: str, + max_ref_age_ms: Optional[int] = None, + max_snapshot_age_ms: Optional[int] = None, + min_snapshots_to_keep: Optional[int] = None, + ) -> Transaction: + """Update a ref to a snapshot. + + Returns: + The transaction with the set-snapshot-ref staged + """ + updates = ( + SetSnapshotRefUpdate( + snapshot_id=snapshot_id, + ref_name=ref_name, + type=type, + max_ref_age_ms=max_ref_age_ms, + max_snapshot_age_ms=max_snapshot_age_ms, + min_snapshots_to_keep=min_snapshots_to_keep, + ), + ) + + requirements = ( + AssertRefSnapshotId(snapshot_id=parent_snapshot_id, ref="main"), + AssertTableUUID(uuid=self.table_metadata.table_uuid), Review Comment: I think the `AssertTableUUID` is a very coarse way of checking if there is a conflict. The UUID is unique per commit, but there are many situations where the commits can be de-conflicted. For example, if you set table properties and you update a branch. I noticed that in the PyIceberg code, and also in the open-api spec, the `type` is not properly defined. I took the liberty of creating a PR to fix that on the Iceberg side: https://github.com/apache/iceberg/pull/10423/ ########## pyiceberg/table/__init__.py: ########## @@ -340,6 +341,86 @@ def set_properties(self, properties: Properties = EMPTY_DICT, **kwargs: Any) -> updates = properties or kwargs return self._apply((SetPropertiesUpdate(updates=updates),)) + @deprecated( + deprecated_in="0.7.0", + removed_in="0.8.0", + help_message="Please use one of the functions in ManageSnapshots instead", + ) + def add_snapshot(self, snapshot: Snapshot) -> Transaction: + """Add a new snapshot to the table. + + Returns: + The transaction with the add-snapshot staged. + """ + updates = (AddSnapshotUpdate(snapshot=snapshot),) + requirements = (AssertTableUUID(uuid=self._table.metadata.table_uuid),) + + return self._apply(updates, requirements) + + @deprecated( + deprecated_in="0.7.0", + removed_in="0.8.0", + help_message="Please use one of the functions in ManageSnapshots instead", + ) + def set_ref_snapshot( + self, + snapshot_id: int, + parent_snapshot_id: Optional[int], + ref_name: str, + type: str, + max_ref_age_ms: Optional[int] = None, + max_snapshot_age_ms: Optional[int] = None, + min_snapshots_to_keep: Optional[int] = None, + ) -> Transaction: + """Update a ref to a snapshot. + + Returns: + The transaction with the set-snapshot-ref staged + """ + updates = ( + SetSnapshotRefUpdate( + snapshot_id=snapshot_id, + ref_name=ref_name, + type=type, + max_ref_age_ms=max_ref_age_ms, + max_snapshot_age_ms=max_snapshot_age_ms, + min_snapshots_to_keep=min_snapshots_to_keep, + ), + ) + + requirements = ( + AssertRefSnapshotId(snapshot_id=parent_snapshot_id, ref="main"), + AssertTableUUID(uuid=self.table_metadata.table_uuid), + ) + return self._apply(updates, requirements) + + def _set_ref_snapshot( + self, + snapshot_id: int, + ref_name: str, + type: str, + max_ref_age_ms: Optional[int] = None, + max_snapshot_age_ms: Optional[int] = None, + min_snapshots_to_keep: Optional[int] = None, + ) -> UpdatesAndRequirements: + """Update a ref to a snapshot. + + Returns: + The updates and requirements for the set-snapshot-ref staged + """ + updates = ( + SetSnapshotRefUpdate( + snapshot_id=snapshot_id, + ref_name=ref_name, + type=type, + max_ref_age_ms=max_ref_age_ms, + max_snapshot_age_ms=max_snapshot_age_ms, + min_snapshots_to_keep=min_snapshots_to_keep, + ), + ) + + return updates, () Review Comment: I think we want to require here as well that the ref didn't exist, or that the ref is at a certain snapshot. ########## pyiceberg/table/__init__.py: ########## @@ -1806,6 +1891,85 @@ def __enter__(self) -> U: return self # type: ignore +class ManageSnapshots(UpdateTableMetadata["ManageSnapshots"]): + """Run snapshot management operations using APIs.""" + + _updates: Tuple[TableUpdate, ...] = () + _requirements: Tuple[TableRequirement, ...] = () + _parent_snapshot_id: Optional[int] + + def _commit(self) -> UpdatesAndRequirements: + """Apply the pending changes and commit.""" + if self._updates: + self._requirements += ( + AssertRefSnapshotId(snapshot_id=self._parent_snapshot_id, ref="main"), + AssertTableUUID(uuid=self._transaction.table_metadata.table_uuid), + ) + return self._updates, self._requirements + + def create_tag(self, snapshot_id: int, tag_name: str, max_ref_age_ms: Optional[int] = None) -> ManageSnapshots: + """ + Create a tag at a snapshot. + + Args: + :param snapshot_id: snapshot id of the existing snapshot to tag Review Comment: ```suggestion snapshot_id (int): snapshot id of the existing snapshot to tag ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org