nastra commented on code in PR #9230: URL: https://github.com/apache/iceberg/pull/9230#discussion_r1418954500
########## core/src/main/java/org/apache/iceberg/MergingSnapshotProducer.java: ########## @@ -895,7 +895,7 @@ private void cleanUncommittedAppends(Set<ManifestFile> committed) { } } - this.cachedNewDataManifests = committedNewDataManifests; + this.cachedNewDataManifests = null; Review Comment: rather than setting this to `null`, I think a better approach would be to handle this exactly like it's done for `cachedNewDeleteManifests`, where `cachedNewDeleteManifests` is initialized to be a linked list and then is iterated over and files are removed. I did a quick check with ``` --- a/core/src/main/java/org/apache/iceberg/MergingSnapshotProducer.java +++ b/core/src/main/java/org/apache/iceberg/MergingSnapshotProducer.java @@ -94,7 +94,7 @@ abstract class MergingSnapshotProducer<ThisT> extends SnapshotProducer<ThisT> { private PartitionSpec dataSpec; // cache new data manifests after writing - private List<ManifestFile> cachedNewDataManifests = null; + private final List<ManifestFile> cachedNewDataManifests = Lists.newLinkedList(); private boolean hasNewDataFiles = false; // cache new manifests for delete files @@ -885,17 +885,15 @@ abstract class MergingSnapshotProducer<ThisT> extends SnapshotProducer<ThisT> { } private void cleanUncommittedAppends(Set<ManifestFile> committed) { - if (cachedNewDataManifests != null) { - List<ManifestFile> committedNewDataManifests = Lists.newArrayList(); - for (ManifestFile manifest : cachedNewDataManifests) { - if (committed.contains(manifest)) { - committedNewDataManifests.add(manifest); - } else { - deleteFile(manifest.path()); + if (!cachedNewDataManifests.isEmpty()) { + ListIterator<ManifestFile> dataManifestsIterator = cachedNewDataManifests.listIterator(); + while (dataManifestsIterator.hasNext()) { + ManifestFile dataManifest = dataManifestsIterator.next(); + if (!committed.contains(dataManifest)) { + deleteFile(dataManifest.path()); + dataManifestsIterator.remove(); } } - - this.cachedNewDataManifests = null; } ListIterator<ManifestFile> deleteManifestsIterator = cachedNewDeleteManifests.listIterator(); @@ -950,12 +948,12 @@ abstract class MergingSnapshotProducer<ThisT> extends SnapshotProducer<ThisT> { } private List<ManifestFile> newDataFilesAsManifests() { - if (hasNewDataFiles && cachedNewDataManifests != null) { + if (hasNewDataFiles && !cachedNewDataManifests.isEmpty()) { cachedNewDataManifests.forEach(file -> deleteFile(file.path())); - cachedNewDataManifests = null; + cachedNewDataManifests.clear(); } - if (cachedNewDataManifests == null) { + if (cachedNewDataManifests.isEmpty()) { try { RollingManifestWriter<DataFile> writer = newRollingManifestWriter(dataSpec()); try { @@ -968,7 +966,7 @@ abstract class MergingSnapshotProducer<ThisT> extends SnapshotProducer<ThisT> { writer.close(); } - this.cachedNewDataManifests = writer.toManifestFiles(); + this.cachedNewDataManifests.addAll(writer.toManifestFiles()); ``` and that seemed to work. We would probably want to use the same approach in `FastAppend` (but I haven't checked that part for `FastAppend`) -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org