kevinjqliu commented on code in PR #2143:
URL: https://github.com/apache/iceberg-python/pull/2143#discussion_r2265420806
##########
pyiceberg/table/update/snapshot.py:
##########
@@ -920,87 +919,49 @@ class
ExpireSnapshots(UpdateTableMetadata["ExpireSnapshots"]):
_requirements: Tuple[TableRequirement, ...] = ()
def _commit(self) -> UpdatesAndRequirements:
- """
- Commit the staged updates and requirements.
+ """Commit the staged updates and requirements.
- This will remove the snapshots with the given IDs, but will always
skip protected snapshots (branch/tag heads).
+ This will remove the snapshots with the given IDs.
Returns:
Tuple of updates and requirements to be committed,
as required by the calling parent apply functions.
"""
- # Remove any protected snapshot IDs from the set to expire, just in
case
- protected_ids = self._get_protected_snapshot_ids()
- self._snapshot_ids_to_expire -= protected_ids
update =
RemoveSnapshotsUpdate(snapshot_ids=self._snapshot_ids_to_expire)
- self._updates += (update,)
- return self._updates, self._requirements
Review Comment:
i think we should keep the original implementation for this function
##########
mkdocs/docs/api.md:
##########
@@ -1287,6 +1334,67 @@ with table.manage_snapshots() as ms:
ms.create_branch(snapshot_id1, "Branch_A").create_tag(snapshot_id2,
"tag789")
```
+## Table Maintenance
Review Comment:
this section is great. lets keep this. can we revert all the other unrelated
changes?
##########
pyiceberg/table/inspect.py:
##########
Review Comment:
```
tests/integration/test_inspect_table.py:984: error: "InspectTable" has no
attribute "all_files"; maybe "_files"? [attr-defined]
```
is due to the changes in this PR, because the `_all_files` function is
removed.
you can run `git checkout main pyiceberg/table/inspect.py` to put it back
and then commit
##########
pyiceberg/table/update/snapshot.py:
##########
@@ -920,87 +919,49 @@ class
ExpireSnapshots(UpdateTableMetadata["ExpireSnapshots"]):
_requirements: Tuple[TableRequirement, ...] = ()
def _commit(self) -> UpdatesAndRequirements:
- """
- Commit the staged updates and requirements.
+ """Commit the staged updates and requirements.
- This will remove the snapshots with the given IDs, but will always
skip protected snapshots (branch/tag heads).
+ This will remove the snapshots with the given IDs.
Returns:
Tuple of updates and requirements to be committed,
as required by the calling parent apply functions.
"""
- # Remove any protected snapshot IDs from the set to expire, just in
case
- protected_ids = self._get_protected_snapshot_ids()
- self._snapshot_ids_to_expire -= protected_ids
update =
RemoveSnapshotsUpdate(snapshot_ids=self._snapshot_ids_to_expire)
- self._updates += (update,)
- return self._updates, self._requirements
+ return (update,), ()
def _get_protected_snapshot_ids(self) -> Set[int]:
- """
- Get the IDs of protected snapshots.
+ """Get the IDs of protected snapshots.
- These are the HEAD snapshots of all branches and all tagged snapshots.
These ids are to be excluded from expiration.
+ These are the HEAD snapshots of all branches and all tagged snapshots.
+ These ids are to be excluded from expiration.
Returns:
Set of protected snapshot IDs to exclude from expiration.
"""
- protected_ids: Set[int] = set()
+ protected_ids = set()
for ref in self._transaction.table_metadata.refs.values():
if ref.snapshot_ref_type in [SnapshotRefType.TAG,
SnapshotRefType.BRANCH]:
protected_ids.add(ref.snapshot_id)
return protected_ids
- def expire_snapshot_by_id(self, snapshot_id: int) -> ExpireSnapshots:
- """
- Expire a snapshot by its ID.
-
- This will mark the snapshot for expiration.
+ def by_id(self, snapshot_id: int) -> ExpireSnapshots:
+ """Expire a snapshot by its ID.
Args:
snapshot_id (int): The ID of the snapshot to expire.
+
Returns:
This for method chaining.
"""
if self._transaction.table_metadata.snapshot_by_id(snapshot_id) is
None:
raise ValueError(f"Snapshot with ID {snapshot_id} does not exist.")
- if snapshot_id in self._get_protected_snapshot_ids():
- raise ValueError(f"Snapshot with ID {snapshot_id} is protected and
cannot be expired.")
+ protected_ids = self._get_protected_snapshot_ids()
+ if snapshot_id in protected_ids:
+ raise ValueError(f"Cannot expire snapshot {snapshot_id} as it is
referenced by a branch or tag.")
self._snapshot_ids_to_expire.add(snapshot_id)
-
- return self
-
- def expire_snapshots_by_ids(self, snapshot_ids: List[int]) ->
"ExpireSnapshots":
- """
- Expire multiple snapshots by their IDs.
-
- This will mark the snapshots for expiration.
-
- Args:
- snapshot_ids (List[int]): List of snapshot IDs to expire.
- Returns:
- This for method chaining.
- """
- for snapshot_id in snapshot_ids:
- self.expire_snapshot_by_id(snapshot_id)
- return self
-
- def expire_snapshots_older_than(self, timestamp_ms: int) ->
"ExpireSnapshots":
Review Comment:
maybe keep this as rename as `by_ids` and `older_than`?
##########
tests/table/test_maintenance_api.py:
##########
Review Comment:
this test file isnt really testing the expire snapshots functionality.
wdyt about removing it and reusing the previous
`tests/table/test_expire_snapshots.py` file? we can just change any reference
to `.expire_snapshots()` to `maintenance.expire_snapshots()`
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]