jayceslesar commented on code in PR #1938: URL: https://github.com/apache/iceberg-python/pull/1938#discussion_r2083253070
########## pyiceberg/table/update/validate.py: ########## @@ -69,3 +75,74 @@ def validation_history( raise ValidationException("No matching snapshot found.") return manifests_files, snapshots + + +def deleted_data_files( + table: Table, + starting_snapshot: Snapshot, + data_filter: Optional[BooleanExpression], + parent_snapshot: Optional[Snapshot], + partition_set: Optional[set[Record]], +) -> Iterator[ManifestEntry]: + """Find deleted data files matching a filter since a starting snapshot. + + Args: + table: Table to validate + starting_snapshot: Snapshot current at the start of the operation + data_filter: Expression used to find deleted data files + partition_set: a set of partitions to find deleted data files + parent_snapshot: Ending snapshot on the branch being validated + + Returns: + List of deleted data files matching the filter + """ + # if there is no current table state, no files have been deleted + if parent_snapshot is None: + return + + manifests, snapshot_ids = validation_history( + table, + starting_snapshot, + parent_snapshot, + VALIDATE_DATA_FILES_EXIST_OPERATIONS, + ManifestContent.DATA, + ) + + if data_filter is not None: + evaluator = _StrictMetricsEvaluator(table.schema(), data_filter).eval + + for manifest in manifests: + for entry in manifest.fetch_manifest_entry(table.io, discard_deleted=False): + if entry.snapshot_id not in snapshot_ids: + continue + + if entry.status != ManifestEntryStatus.DELETED: + continue + + if data_filter is not None and not evaluator(entry.data_file): Review Comment: believe this has been fixed -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org