Re: [PR] Core: Use time-travel schema when resolving partition spec in scan [iceberg]

via GitHub Sat, 18 Oct 2025 06:03:20 -0700


chenjian2664 commented on code in PR #13301:
URL: https://github.com/apache/iceberg/pull/13301#discussion_r2442415133



##########
core/src/test/java/org/apache/iceberg/TestScansAndSchemaEvolution.java:
##########
@@ -111,6 +118,133 @@ public void testPartitionSourceRename() throws 
IOException {
     tasks = Lists.newArrayList(table.newScan().filter(Expressions.equal("p", 
"one")).planFiles());
 
     assertThat(tasks).hasSize(1);
+
+    // create a new commit
+    table.newAppend().appendFile(createDataFile("three")).commit();
+
+    // use fiter with previous partition name
+    tasks =
+        Lists.newArrayList(
+            table
+                .newScan()
+                .useSnapshot(firstSnapshotId)
+                .filter(Expressions.equal("part", "one"))
+                .planFiles());
+
+    assertThat(tasks).hasSize(1);
+  }
+
+  @TestTemplate
+  public void testPartitionSourceDrop() throws IOException {
+    Table table = TestTables.create(temp, "test", SCHEMA, SPEC, formatVersion);
+
+    DataFile fileOne = createDataFile("one");
+    DataFile fileTwo = createDataFile("two");
+
+    table.newAppend().appendFile(fileOne).appendFile(fileTwo).commit();
+    long firstSnapshotId = table.currentSnapshot().snapshotId();
+
+    table.updateSpec().addField("id").commit();
+
+    List<FileScanTask> tasks =
+        Lists.newArrayList(
+            
table.newScan().filter(Expressions.not(Expressions.isNull("id"))).planFiles());
+
+    assertThat(tasks).hasSize(2);
+
+    DataFile fileThree = createDataFile("three", table.schema(), table.spec());
+    table.newAppend().appendFile(fileThree).commit();
+
+    // remove one field from spec and drop the column
+    table.updateSpec().removeField("id").commit();
+    table.updateSchema().deleteColumn("id").commit();
+
+    List<FileScanTask> tasksAtFirstSnapshotId =
+        Lists.newArrayList(
+            table
+                .newScan()
+                .useSnapshot(firstSnapshotId)
+                .filter(Expressions.not(Expressions.isNull("id")))
+                .planFiles());
+
+    assertThat(
+            tasksAtFirstSnapshotId.stream()
+                .map(ContentScanTask::file)
+                .map(ContentFile::location)
+                .collect(Collectors.toList()))
+        .isEqualTo(
+            tasks.stream()
+                .map(ContentScanTask::file)
+                .map(ContentFile::location)
+                .collect(Collectors.toList()));
+  }
+
+  @TestTemplate
+  public void testColumnRename() throws IOException {
+    Table table = TestTables.create(temp, "test", SCHEMA, SPEC, formatVersion);
+
+    DataFile fileOne = createDataFile("one");
+    DataFile fileTwo = createDataFile("two");
+
+    table.newAppend().appendFile(fileOne).appendFile(fileTwo).commit();
+    long firstSnapshotId = table.currentSnapshot().snapshotId();
+
+    table.updateSchema().renameColumn("data", "renamed_data").commit();
+
+    DataFile fileThree = createDataFile("three", table.schema(), table.spec());
+    table.newAppend().appendFile(fileThree).commit();
+    long secondSnapshotId = table.currentSnapshot().snapshotId();
+
+    // generate a new commit
+    DataFile fileFour = createDataFile("four", table.schema(), table.spec());
+    table.newAppend().appendFile(fileFour).commit();
+
+    // running successfully with the new filter on previous column name
+    List<FileScanTask> tasks =
+        Lists.newArrayList(
+            table
+                .newScan()
+                .useSnapshot(firstSnapshotId)

Review Comment:
   Added case without specifying the snapshot in all the tests:
   For the renamed tests, uses the renamed column as the filter, for the 
dropped column, uses the `part`(one of the remaining) column instead.



##########
core/src/test/java/org/apache/iceberg/TestScansAndSchemaEvolution.java:
##########
@@ -111,6 +118,133 @@ public void testPartitionSourceRename() throws 
IOException {
     tasks = Lists.newArrayList(table.newScan().filter(Expressions.equal("p", 
"one")).planFiles());
 
     assertThat(tasks).hasSize(1);
+
+    // create a new commit
+    table.newAppend().appendFile(createDataFile("three")).commit();
+
+    // use fiter with previous partition name
+    tasks =
+        Lists.newArrayList(
+            table
+                .newScan()
+                .useSnapshot(firstSnapshotId)
+                .filter(Expressions.equal("part", "one"))
+                .planFiles());
+
+    assertThat(tasks).hasSize(1);
+  }
+
+  @TestTemplate
+  public void testPartitionSourceDrop() throws IOException {
+    Table table = TestTables.create(temp, "test", SCHEMA, SPEC, formatVersion);
+
+    DataFile fileOne = createDataFile("one");
+    DataFile fileTwo = createDataFile("two");
+
+    table.newAppend().appendFile(fileOne).appendFile(fileTwo).commit();
+    long firstSnapshotId = table.currentSnapshot().snapshotId();
+
+    table.updateSpec().addField("id").commit();
+
+    List<FileScanTask> tasks =
+        Lists.newArrayList(
+            
table.newScan().filter(Expressions.not(Expressions.isNull("id"))).planFiles());
+
+    assertThat(tasks).hasSize(2);
+
+    DataFile fileThree = createDataFile("three", table.schema(), table.spec());
+    table.newAppend().appendFile(fileThree).commit();
+
+    // remove one field from spec and drop the column
+    table.updateSpec().removeField("id").commit();
+    table.updateSchema().deleteColumn("id").commit();
+
+    List<FileScanTask> tasksAtFirstSnapshotId =
+        Lists.newArrayList(
+            table
+                .newScan()
+                .useSnapshot(firstSnapshotId)
+                .filter(Expressions.not(Expressions.isNull("id")))
+                .planFiles());
+
+    assertThat(
+            tasksAtFirstSnapshotId.stream()
+                .map(ContentScanTask::file)
+                .map(ContentFile::location)
+                .collect(Collectors.toList()))
+        .isEqualTo(
+            tasks.stream()
+                .map(ContentScanTask::file)
+                .map(ContentFile::location)
+                .collect(Collectors.toList()));
+  }
+
+  @TestTemplate
+  public void testColumnRename() throws IOException {
+    Table table = TestTables.create(temp, "test", SCHEMA, SPEC, formatVersion);
+
+    DataFile fileOne = createDataFile("one");
+    DataFile fileTwo = createDataFile("two");
+
+    table.newAppend().appendFile(fileOne).appendFile(fileTwo).commit();
+    long firstSnapshotId = table.currentSnapshot().snapshotId();
+
+    table.updateSchema().renameColumn("data", "renamed_data").commit();
+
+    DataFile fileThree = createDataFile("three", table.schema(), table.spec());
+    table.newAppend().appendFile(fileThree).commit();
+    long secondSnapshotId = table.currentSnapshot().snapshotId();
+
+    // generate a new commit
+    DataFile fileFour = createDataFile("four", table.schema(), table.spec());
+    table.newAppend().appendFile(fileFour).commit();
+
+    // running successfully with the new filter on previous column name
+    List<FileScanTask> tasks =
+        Lists.newArrayList(
+            table
+                .newScan()
+                .useSnapshot(firstSnapshotId)

Review Comment:
   Added case without specifying the snapshot in all the tests:
   For the renamed tests, uses the renamed column as the filter, for the 
dropped column tests, uses the `part`(one of the remaining) column instead.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Re: [PR] Core: Use time-travel schema when resolving partition spec in scan [iceberg]

Reply via email to