Fokko commented on code in PR #3287:
URL: https://github.com/apache/iceberg-python/pull/3287#discussion_r3216260972


##########
pyiceberg/table/__init__.py:
##########
@@ -861,6 +861,28 @@ def upsert(
 
         return UpsertResult(rows_updated=update_row_cnt, 
rows_inserted=insert_row_cnt)
 
+    def _find_referenced_data_files(self, file_paths: list[str]) -> list[str]:
+        """Return file_paths already referenced by data files in the current 
snapshot."""
+        snapshot = self.table_metadata.current_snapshot()
+        if snapshot is None:
+            return []
+
+        candidates = set(file_paths)
+        io = self._table.io
+        data_manifests = [m for m in snapshot.manifests(io) if m.content == 
ManifestContent.DATA]
+
+        def path_filter(data_file: DataFile) -> bool:
+            return data_file.file_path in candidates
+
+        executor = ExecutorFactory.get_or_create()
+        entries = chain.from_iterable(
+            executor.map(
+                lambda args: _open_manifest(*args),
+                [(io, manifest, path_filter, lambda _: True) for manifest in 
data_manifests],

Review Comment:
   Nice, this is pretty neat 👍 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to