aokolnychyi commented on code in PR #11131: URL: https://github.com/apache/iceberg/pull/11131#discussion_r1851138290
########## core/src/main/java/org/apache/iceberg/ManifestFilterManager.java: ########## @@ -327,62 +354,71 @@ private ManifestFile filterManifest(Schema tableSchema, ManifestFile manifest) { // this assumes that the manifest doesn't have files to remove and streams through the // manifest without copying data. if a manifest does have a file to remove, this will break // out of the loop and move on to filtering the manifest. - boolean hasDeletedFiles = manifestHasDeletedFiles(evaluator, reader); - if (!hasDeletedFiles) { + if (manifestHasDeletedFiles(evaluator, reader)) { + return filterManifestWithDeletedFiles(evaluator, manifest, reader); + } else { filteredManifests.put(manifest, manifest); return manifest; } - - return filterManifestWithDeletedFiles(evaluator, manifest, reader); - } catch (IOException e) { throw new RuntimeIOException(e, "Failed to close manifest: %s", manifest); } } - private boolean canContainDeletedFiles(ManifestFile manifest) { - boolean canContainExpressionDeletes; + private boolean canContainDeletedFiles(ManifestFile manifest, boolean trustManifestReferences) { + if (hasNoLiveFiles(manifest)) { + return false; + } + + if (trustManifestReferences) { + return manifestsReferencedForDeletes.contains(manifest.path()); + } + + return canContainDroppedFiles(manifest) + || canContainExpressionDeletes(manifest) + || canContainDroppedPartitions(manifest); + } + + private boolean hasNoLiveFiles(ManifestFile manifest) { + return !manifest.hasAddedFiles() && !manifest.hasExistingFiles(); + } + + private boolean canContainExpressionDeletes(ManifestFile manifest) { if (deleteExpression != null && deleteExpression != Expressions.alwaysFalse()) { ManifestEvaluator manifestEvaluator = ManifestEvaluator.forRowFilter( deleteExpression, specsById.get(manifest.partitionSpecId()), caseSensitive); - canContainExpressionDeletes = manifestEvaluator.eval(manifest); - } else { - canContainExpressionDeletes = false; + return manifestEvaluator.eval(manifest); } - boolean canContainDroppedPartitions; + return false; + } + + private boolean canContainDroppedPartitions(ManifestFile manifest) { if (!dropPartitions.isEmpty()) { - canContainDroppedPartitions = - ManifestFileUtil.canContainAny(manifest, dropPartitions, specsById); - } else { - canContainDroppedPartitions = false; + return ManifestFileUtil.canContainAny(manifest, dropPartitions, specsById); } - boolean canContainDroppedFiles; + return false; + } + + private boolean canContainDroppedFiles(ManifestFile manifest) { if (!deletePaths.isEmpty()) { - canContainDroppedFiles = true; + return true; } else if (!deleteFiles.isEmpty()) { - // because there were no path-only deletes, the set of deleted file partitions is valid - canContainDroppedFiles = - ManifestFileUtil.canContainAny(manifest, deleteFilePartitions, specsById); - } else { - canContainDroppedFiles = false; + return ManifestFileUtil.canContainAny(manifest, deleteFilePartitions, specsById); } - boolean canContainDropBySeq = - manifest.content() == ManifestContent.DELETES - && manifest.minSequenceNumber() < minSequenceNumber; - - return canContainExpressionDeletes - || canContainDroppedPartitions - || canContainDroppedFiles - || canContainDropBySeq; + return false; } @SuppressWarnings({"CollectionUndefinedEquality", "checkstyle:CyclomaticComplexity"}) private boolean manifestHasDeletedFiles( PartitionAndMetricsEvaluator evaluator, ManifestReader<F> reader) { + if (manifestsReferencedForDeletes.contains(reader.file().location())) { Review Comment: It was my bad to suggest `reader.file().location()`. It will be fragile as the location may undergo some validation or parsing in `FileIO`, which we don't control here. Probably better to pass `ManifestFile` to this method and simply use `manifest.path()`. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org