aokolnychyi commented on code in PR #11086: URL: https://github.com/apache/iceberg/pull/11086#discussion_r1757647083
########## core/src/main/java/org/apache/iceberg/SnapshotProducer.java: ########## @@ -554,6 +562,84 @@ protected boolean cleanupAfterCommit() { return true; } + protected List<ManifestFile> writeDataManifests(List<DataFile> files, PartitionSpec spec) { + return writeDataManifests(files, null /* inherit data seq */, spec); + } + + protected List<ManifestFile> writeDataManifests( + List<DataFile> files, Long dataSeq, PartitionSpec spec) { + return writeManifests(files, group -> writeDataFileGroup(group, dataSeq, spec)); + } + + private List<ManifestFile> writeDataFileGroup( + List<DataFile> files, Long dataSeq, PartitionSpec spec) { + RollingManifestWriter<DataFile> writer = newRollingManifestWriter(spec); + + try (RollingManifestWriter<DataFile> closableWriter = writer) { + if (dataSeq != null) { + files.forEach(file -> closableWriter.add(file, dataSeq)); + } else { + files.forEach(closableWriter::add); + } + } catch (IOException e) { + throw new RuntimeIOException(e, "Failed to write data manifests"); + } + + return writer.toManifestFiles(); + } + + protected List<ManifestFile> writeDeleteManifests( + List<DeleteFileHolder> files, PartitionSpec spec) { + return writeManifests(files, group -> writeDeleteFileGroup(group, spec)); + } + + private List<ManifestFile> writeDeleteFileGroup( + List<DeleteFileHolder> files, PartitionSpec spec) { + RollingManifestWriter<DeleteFile> writer = newRollingDeleteManifestWriter(spec); + + try (RollingManifestWriter<DeleteFile> closableWriter = writer) { + for (DeleteFileHolder file : files) { + if (file.dataSequenceNumber() != null) { + closableWriter.add(file.deleteFile(), file.dataSequenceNumber()); + } else { + closableWriter.add(file.deleteFile()); + } + } + } catch (IOException e) { + throw new RuntimeIOException(e, "Failed to write delete manifests"); + } + + return writer.toManifestFiles(); + } + + private static <F> List<ManifestFile> writeManifests( + List<F> files, Function<List<F>, List<ManifestFile>> writeFunc) { + int groupSize = manifestFileGroupSize(ThreadPools.WORKER_THREAD_POOL_SIZE, files.size()); + List<List<F>> groups = Lists.partition(files, groupSize); + Queue<ManifestFile> manifests = Queues.newConcurrentLinkedQueue(); + Tasks.foreach(groups) + .stopOnFailure() + .throwFailureWhenFinished() Review Comment: It is essentially as it was before as we never cleaned up potentially produced files by the rolling writer. We can try to cover that in the future but I am not sure it would be worth the extra complexity. Such files will be cleaned up by table maintenance anyway. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org