nastra commented on code in PR #9230:
URL: https://github.com/apache/iceberg/pull/9230#discussion_r1418954500
##########
core/src/main/java/org/apache/iceberg/MergingSnapshotProducer.java:
##########
@@ -895,7 +895,7 @@ private void cleanUncommittedAppends(Set<ManifestFile>
committed) {
}
}
- this.cachedNewDataManifests = committedNewDataManifests;
+ this.cachedNewDataManifests = null;
Review Comment:
rather than setting this to `null`, I think a better approach would be to
handle this exactly like it's done for `cachedNewDeleteManifests`, where
`cachedNewDeleteManifests` is initialized to be a linked list and then is
iterated over and files are removed.
I did a quick check with
```
--- a/core/src/main/java/org/apache/iceberg/MergingSnapshotProducer.java
+++ b/core/src/main/java/org/apache/iceberg/MergingSnapshotProducer.java
@@ -94,7 +94,7 @@ abstract class MergingSnapshotProducer<ThisT> extends
SnapshotProducer<ThisT> {
private PartitionSpec dataSpec;
// cache new data manifests after writing
- private List<ManifestFile> cachedNewDataManifests = null;
+ private final List<ManifestFile> cachedNewDataManifests =
Lists.newLinkedList();
private boolean hasNewDataFiles = false;
// cache new manifests for delete files
@@ -885,17 +885,15 @@ abstract class MergingSnapshotProducer<ThisT> extends
SnapshotProducer<ThisT> {
}
private void cleanUncommittedAppends(Set<ManifestFile> committed) {
- if (cachedNewDataManifests != null) {
- List<ManifestFile> committedNewDataManifests = Lists.newArrayList();
- for (ManifestFile manifest : cachedNewDataManifests) {
- if (committed.contains(manifest)) {
- committedNewDataManifests.add(manifest);
- } else {
- deleteFile(manifest.path());
+ if (!cachedNewDataManifests.isEmpty()) {
+ ListIterator<ManifestFile> dataManifestsIterator =
cachedNewDataManifests.listIterator();
+ while (dataManifestsIterator.hasNext()) {
+ ManifestFile dataManifest = dataManifestsIterator.next();
+ if (!committed.contains(dataManifest)) {
+ deleteFile(dataManifest.path());
+ dataManifestsIterator.remove();
}
}
-
- this.cachedNewDataManifests = null;
}
ListIterator<ManifestFile> deleteManifestsIterator =
cachedNewDeleteManifests.listIterator();
@@ -950,12 +948,12 @@ abstract class MergingSnapshotProducer<ThisT> extends
SnapshotProducer<ThisT> {
}
private List<ManifestFile> newDataFilesAsManifests() {
- if (hasNewDataFiles && cachedNewDataManifests != null) {
+ if (hasNewDataFiles && !cachedNewDataManifests.isEmpty()) {
cachedNewDataManifests.forEach(file -> deleteFile(file.path()));
- cachedNewDataManifests = null;
+ cachedNewDataManifests.clear();
}
- if (cachedNewDataManifests == null) {
+ if (cachedNewDataManifests.isEmpty()) {
try {
RollingManifestWriter<DataFile> writer =
newRollingManifestWriter(dataSpec());
try {
@@ -968,7 +966,7 @@ abstract class MergingSnapshotProducer<ThisT> extends
SnapshotProducer<ThisT> {
writer.close();
}
- this.cachedNewDataManifests = writer.toManifestFiles();
+ this.cachedNewDataManifests.addAll(writer.toManifestFiles());
```
and that seemed to work. We would probably want to use the same approach in
`FastAppend` (but I haven't checked that part for `FastAppend`)
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]