amogh-jahagirdar commented on code in PR #12593:
URL: https://github.com/apache/iceberg/pull/12593#discussion_r2011084383


##########
core/src/main/java/org/apache/iceberg/SnapshotProducer.java:
##########
@@ -283,32 +284,55 @@ public Snapshot apply() {
       throw new RuntimeIOException(e, "Failed to write manifest list file");
     }
 
+    Map<String, String> summary = summary();
+    String operation = operation();
+
     Long addedRows = null;
-    Long lastRowId = null;
-    if (base.rowLineageEnabled()) {
-      addedRows = calculateAddedRows(manifests);
-      lastRowId = base.nextRowId();
+    Long firstRowId = null;

Review Comment:
   Hm should this still be called `lastRowId`? 



##########
core/src/main/java/org/apache/iceberg/SnapshotProducer.java:
##########
@@ -283,32 +284,55 @@ public Snapshot apply() {
       throw new RuntimeIOException(e, "Failed to write manifest list file");
     }
 
+    Map<String, String> summary = summary();
+    String operation = operation();
+
     Long addedRows = null;
-    Long lastRowId = null;
-    if (base.rowLineageEnabled()) {
-      addedRows = calculateAddedRows(manifests);
-      lastRowId = base.nextRowId();
+    Long firstRowId = null;
+    if (base.formatVersion() >= 3) {
+      addedRows = calculateAddedRows(operation, summary, manifests);
+      firstRowId = base.nextRowId();
     }
 
     return new BaseSnapshot(
         sequenceNumber,
         snapshotId(),
         parentSnapshotId,
         System.currentTimeMillis(),
-        operation(),
-        summary(base),
+        operation,
+        summaryWithTotals(base, summary),
         base.currentSchemaId(),
         manifestList.location(),
-        lastRowId,
+        firstRowId,
         addedRows);
   }
 
-  private Long calculateAddedRows(List<ManifestFile> manifests) {
+  private Long calculateAddedRows(
+      String operation, Map<String, String> summary, List<ManifestFile> 
manifests) {
+    if (summary != null) {
+      long addedRecords =
+          PropertyUtil.propertyAsLong(summary, 
SnapshotSummary.ADDED_RECORDS_PROP, 0L);
+      if (DataOperations.REPLACE.equals(operation)) {
+        long replacedRecords =
+            PropertyUtil.propertyAsLong(summary, 
SnapshotSummary.DELETED_RECORDS_PROP, 0L);
+        // added may be less than replaced when records are already deleted by 
delete files
+        Preconditions.checkArgument(
+            addedRecords <= replacedRecords,
+            "Invalid REPLACE operation: %s added records > %s replaced 
records",
+            addedRecords,
+            replacedRecords);
+        return 0L;
+      }
+
+      return addedRecords;
+    }
+
     return manifests.stream()
         .filter(
             manifest ->
                 manifest.snapshotId() == null
                     || Objects.equals(manifest.snapshotId(), this.snapshotId))
+        .filter(manifest -> manifest.content() == ManifestContent.DATA)

Review Comment:
   Good catch 



##########
core/src/test/java/org/apache/iceberg/TestTableMetadata.java:
##########
@@ -232,8 +231,6 @@ public void testJsonConversion() throws Exception {
     assertThat(metadata.statisticsFiles()).isEqualTo(statisticsFiles);
     
assertThat(metadata.partitionStatisticsFiles()).isEqualTo(partitionStatisticsFiles);
     assertThat(metadata.refs()).isEqualTo(refs);
-    
assertThat(metadata.rowLineageEnabled()).isEqualTo(expected.rowLineageEnabled());
-    assertThat(metadata.nextRowId()).isEqualTo(expected.nextRowId());

Review Comment:
   Yeah I think the `nextRowId` assertions should still be retained 



##########
core/src/main/java/org/apache/iceberg/SnapshotProducer.java:
##########
@@ -283,32 +284,55 @@ public Snapshot apply() {
       throw new RuntimeIOException(e, "Failed to write manifest list file");
     }
 
+    Map<String, String> summary = summary();
+    String operation = operation();
+
     Long addedRows = null;
-    Long lastRowId = null;
-    if (base.rowLineageEnabled()) {
-      addedRows = calculateAddedRows(manifests);
-      lastRowId = base.nextRowId();
+    Long firstRowId = null;

Review Comment:
   Ah I see, no it really is the the first row ID of the new snapshot



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org
For additional commands, e-mail: issues-h...@iceberg.apache.org

Reply via email to