aokolnychyi commented on code in PR #11495: URL: https://github.com/apache/iceberg/pull/11495#discussion_r1835614371
########## core/src/main/java/org/apache/iceberg/MergingSnapshotProducer.java: ########## @@ -769,6 +794,82 @@ protected void validateDataFilesExist( } } + // validates there are no concurrently added DVs for referenced data files + protected void validateAddedDVs( + TableMetadata base, + Long startingSnapshotId, + Expression conflictDetectionFilter, + Snapshot parent) { + // skip if there is no current table state or table format doesn't support DVs + if (parent == null || base.formatVersion() < 3) { + return; + } + + // skip if this operation doesn't add new DVs + Set<String> dvRefs = dvRefs(); + if (dvRefs.isEmpty()) { + return; + } + + Pair<List<ManifestFile>, Set<Long>> history = + validationHistory( + base, + startingSnapshotId, + VALIDATE_ADDED_DVS_OPERATIONS, + ManifestContent.DELETES, + parent); + List<ManifestFile> newDeleteManifests = history.first(); + Set<Long> newSnapshotIds = history.second(); + + Tasks.foreach(newDeleteManifests) + .stopOnFailure() + .throwFailureWhenFinished() + .executeWith(workerPool()) + .run(m -> validateAddedDVs(m, conflictDetectionFilter, newSnapshotIds, dvRefs)); + } + + private void validateAddedDVs( + ManifestFile manifest, + Expression conflictDetectionFilter, + Set<Long> newSnapshotIds, + Set<String> dvRefs) { + try (CloseableIterable<ManifestEntry<DeleteFile>> entries = + ManifestFiles.readDeleteManifest(manifest, ops.io(), ops.current().specsById()) + .filterRows(conflictDetectionFilter) + .caseSensitive(caseSensitive) + .liveEntries()) { + + for (ManifestEntry<DeleteFile> entry : entries) { + DeleteFile file = entry.file(); + if (newSnapshotIds.contains(entry.snapshotId()) && ContentFileUtil.isDV(file)) { + ValidationException.check( + !dvRefs.contains(file.referencedDataFile()), + "Found concurrently added DV for %s: %s", + file.referencedDataFile(), + ContentFileUtil.dvDesc(file)); + } + } + } catch (IOException e) { + throw new UncheckedIOException(e); + } + } + + // builds a set of data file locations referenced by new DVs + private Set<String> dvRefs() { Review Comment: We need to have `dv` in the name. If so, I'd prefer a shorter var name to avoid splitting statements on multiple lines. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org