Fokko commented on code in PR #1561:
URL: https://github.com/apache/iceberg-python/pull/1561#discussion_r1939206950


##########
pyiceberg/table/update/__init__.py:
##########
@@ -455,6 +456,57 @@ def _(update: SetSnapshotRefUpdate, base_metadata: 
TableMetadata, context: _Tabl
     return base_metadata.model_copy(update=metadata_updates)
 
 
+@_apply_table_update.register(RemoveSnapshotsUpdate)
+def _(update: RemoveSnapshotsUpdate, base_metadata: TableMetadata, context: 
_TableMetadataUpdateContext) -> TableMetadata:
+    for remove_snapshot_id in update.snapshot_ids:
+        if not any(s.snapshot_id == remove_snapshot_id for s in 
base_metadata.snapshots):
+            raise ValueError(f"Snapshot with snapshot id {remove_snapshot_id} 
does not exist: {base_metadata.snapshots}")
+
+    snapshots = [
+        (s.model_copy(update={"parent_snapshot_id": None}) if 
s.parent_snapshot_id in update.snapshot_ids else s)
+        for s in base_metadata.snapshots

Review Comment:
   Can we make this a little more verbose:
   ```suggestion
           for snapshot in base_metadata.snapshots
   ```



##########
pyiceberg/table/update/__init__.py:
##########
@@ -455,6 +456,61 @@ def _(update: SetSnapshotRefUpdate, base_metadata: 
TableMetadata, context: _Tabl
     return base_metadata.model_copy(update=metadata_updates)
 
 
+@_apply_table_update.register(RemoveSnapshotsUpdate)
+def _(update: RemoveSnapshotsUpdate, base_metadata: TableMetadata, context: 
_TableMetadataUpdateContext) -> TableMetadata:
+    for remove_snapshot_id in update.snapshot_ids:
+        if remove_snapshot_id == base_metadata.current_snapshot_id:
+            raise ValueError(f"Can't remove current snapshot id 
{remove_snapshot_id}")
+        if not any(s.snapshot_id == remove_snapshot_id for s in 
base_metadata.snapshots):
+            raise ValueError(f"Snapshot with snapshot id {remove_snapshot_id} 
does not exist: {base_metadata.snapshots}")
+
+    snapshots = [
+        (s.model_copy(update={"parent_snapshot_id": None}) if 
s.parent_snapshot_id in update.snapshot_ids else s)
+        for s in base_metadata.snapshots
+        if s.snapshot_id not in update.snapshot_ids
+    ]
+    snapshot_log = [
+        snapshot_log_entry
+        for snapshot_log_entry in base_metadata.snapshot_log
+        if snapshot_log_entry.snapshot_id not in update.snapshot_ids
+    ]
+
+    remove_ref_updates = (
+        RemoveSnapshotRefUpdate(ref_name=ref_name)
+        for ref_name, ref in base_metadata.refs.items()
+        if ref.snapshot_id in update.snapshot_ids
+    )
+    remove_statistics_updates = (
+        RemoveStatisticsUpdate(statistics_file.snapshot_id)
+        for statistics_file in base_metadata.statistics
+        if statistics_file.snapshot_id in update.snapshot_ids
+    )
+    updates = itertools.chain(remove_ref_updates, remove_statistics_updates)
+    new_metadata = base_metadata
+    for upd in updates:
+        new_metadata = _apply_table_update(upd, new_metadata, context)

Review Comment:
   This can potentially be pretty expensive, but I like the solution here 
actually.



##########
pyiceberg/table/update/__init__.py:
##########
@@ -455,6 +456,57 @@ def _(update: SetSnapshotRefUpdate, base_metadata: 
TableMetadata, context: _Tabl
     return base_metadata.model_copy(update=metadata_updates)
 
 
+@_apply_table_update.register(RemoveSnapshotsUpdate)
+def _(update: RemoveSnapshotsUpdate, base_metadata: TableMetadata, context: 
_TableMetadataUpdateContext) -> TableMetadata:
+    for remove_snapshot_id in update.snapshot_ids:
+        if not any(s.snapshot_id == remove_snapshot_id for s in 
base_metadata.snapshots):
+            raise ValueError(f"Snapshot with snapshot id {remove_snapshot_id} 
does not exist: {base_metadata.snapshots}")
+
+    snapshots = [
+        (s.model_copy(update={"parent_snapshot_id": None}) if 
s.parent_snapshot_id in update.snapshot_ids else s)
+        for s in base_metadata.snapshots
+        if s.snapshot_id not in update.snapshot_ids
+    ]
+    snapshot_log = [
+        snapshot_log_entry
+        for snapshot_log_entry in base_metadata.snapshot_log
+        if snapshot_log_entry.snapshot_id not in update.snapshot_ids
+    ]
+
+    remove_ref_updates = (
+        RemoveSnapshotRefUpdate(ref_name=ref_name)
+        for ref_name, ref in base_metadata.refs.items()
+        if ref.snapshot_id in update.snapshot_ids
+    )
+    remove_statistics_updates = (
+        RemoveStatisticsUpdate(statistics_file.snapshot_id)
+        for statistics_file in base_metadata.statistics
+        if statistics_file.snapshot_id in update.snapshot_ids
+    )
+    updates = itertools.chain(remove_ref_updates, remove_statistics_updates)
+    new_metadata = base_metadata
+    for upd in updates:
+        new_metadata = _apply_table_update(upd, new_metadata, context)
+
+    context.add_update(update)
+    return new_metadata.model_copy(update={"snapshots": snapshots, 
"snapshot_log": snapshot_log})
+
+
+@_apply_table_update.register(RemoveSnapshotRefUpdate)
+def _(update: RemoveSnapshotRefUpdate, base_metadata: TableMetadata, context: 
_TableMetadataUpdateContext) -> TableMetadata:

Review Comment:
   Let's remove this once 
https://github.com/apache/iceberg-python/pull/1598/files is in :)



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org
For additional commands, e-mail: issues-h...@iceberg.apache.org

Reply via email to