rdblue commented on code in PR #12593: URL: https://github.com/apache/iceberg/pull/12593#discussion_r2012806564
########## core/src/main/java/org/apache/iceberg/SnapshotProducer.java: ########## @@ -283,32 +284,55 @@ public Snapshot apply() { throw new RuntimeIOException(e, "Failed to write manifest list file"); } + Map<String, String> summary = summary(); + String operation = operation(); + Long addedRows = null; - Long lastRowId = null; - if (base.rowLineageEnabled()) { - addedRows = calculateAddedRows(manifests); - lastRowId = base.nextRowId(); + Long firstRowId = null; + if (base.formatVersion() >= 3) { + addedRows = calculateAddedRows(operation, summary, manifests); + firstRowId = base.nextRowId(); } return new BaseSnapshot( sequenceNumber, snapshotId(), parentSnapshotId, System.currentTimeMillis(), - operation(), - summary(base), + operation, + summaryWithTotals(base, summary), base.currentSchemaId(), manifestList.location(), - lastRowId, + firstRowId, addedRows); } - private Long calculateAddedRows(List<ManifestFile> manifests) { + private Long calculateAddedRows( + String operation, Map<String, String> summary, List<ManifestFile> manifests) { + if (summary != null) { + long addedRecords = + PropertyUtil.propertyAsLong(summary, SnapshotSummary.ADDED_RECORDS_PROP, 0L); Review Comment: We can trust the summary to be correct given that the operation produces it. However, I think it is best not to rely on this and to use actual rows assigned by the writers instead. I'll follow up with a fix. ########## core/src/main/java/org/apache/iceberg/SnapshotProducer.java: ########## @@ -283,32 +284,55 @@ public Snapshot apply() { throw new RuntimeIOException(e, "Failed to write manifest list file"); } + Map<String, String> summary = summary(); + String operation = operation(); + Long addedRows = null; - Long lastRowId = null; - if (base.rowLineageEnabled()) { - addedRows = calculateAddedRows(manifests); - lastRowId = base.nextRowId(); + Long firstRowId = null; + if (base.formatVersion() >= 3) { + addedRows = calculateAddedRows(operation, summary, manifests); + firstRowId = base.nextRowId(); } return new BaseSnapshot( sequenceNumber, snapshotId(), parentSnapshotId, System.currentTimeMillis(), - operation(), - summary(base), + operation, + summaryWithTotals(base, summary), base.currentSchemaId(), manifestList.location(), - lastRowId, + firstRowId, addedRows); } - private Long calculateAddedRows(List<ManifestFile> manifests) { + private Long calculateAddedRows( + String operation, Map<String, String> summary, List<ManifestFile> manifests) { + if (summary != null) { + long addedRecords = + PropertyUtil.propertyAsLong(summary, SnapshotSummary.ADDED_RECORDS_PROP, 0L); Review Comment: We can trust the summary to be correct given that the operation produces it. However, I think it is best not to rely on this and to use actual rows assigned by the writers instead. I'll follow up with a fix. For now, I've reverted these changes. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org