nastra commented on code in PR #13537: URL: https://github.com/apache/iceberg/pull/13537#discussion_r2207157157
########## core/src/test/java/org/apache/iceberg/TestScansAndSchemaEvolution.java: ########## @@ -113,4 +116,80 @@ public void testPartitionSourceRename() throws IOException { assertThat(tasks).hasSize(1); } + + @TestTemplate + public void testAddColumnWithDefaultValueAndQuery() throws IOException { + assumeThat(V3_AND_ABOVE.contains(formatVersion)) + .withFailMessage( + "Only enable the test for versions above V3 since default values require V3+.") + .isTrue(); + File location = Files.createTempDirectory(temp, "junit").toFile(); + assertThat(location.delete()).isTrue(); // should be created by table create + + Table table = TestTables.create(location, "test", SCHEMA, SPEC, formatVersion); + + // Write initial data + DataFile fileOne = createDataFile("one"); + DataFile fileTwo = createDataFile("two"); + table.newAppend().appendFile(fileOne).appendFile(fileTwo).commit(); + + // Add a new column with an initial default value + String defaultValue = "default_category"; + table + .updateSchema() + .addColumn("category", Types.StringType.get(), "Product category", Literal.of(defaultValue)) + .commit(); + + // Verify the schema includes the new column with default value + Schema updatedSchema = table.schema(); + Types.NestedField categoryField = updatedSchema.findField("category"); + assertThat(categoryField).isNotNull(); + assertThat(categoryField.initialDefault()).isEqualTo(defaultValue); + assertThat(categoryField.writeDefault()).isEqualTo(defaultValue); + + // Verify scan planning works with the new column that has default value + List<FileScanTask> tasks = Lists.newArrayList(table.newScan().planFiles()); + assertThat(tasks).hasSize(2); + + // Test that scan with projection includes the new column with default value + Schema projectionSchema = table.schema().select("id", "data", "category"); + List<FileScanTask> projectionTasks = + Lists.newArrayList(table.newScan().project(projectionSchema).planFiles()); + assertThat(projectionTasks).hasSize(2); + + // Verify that each task has the correct schema with the default column + for (FileScanTask task : projectionTasks) { + assertThat(task.schema().findField("category")).isNotNull(); + assertThat(task.schema().findField("category").initialDefault()).isEqualTo(defaultValue); + } + + // Test scan with filter on the new default column + List<FileScanTask> filteredTasks = + Lists.newArrayList( + table.newScan().filter(Expressions.equal("category", defaultValue)).planFiles()); + assertThat(filteredTasks).hasSize(2); // All files should match since default applies to all + + // Test scan with filter on a value that is different than default. + List<FileScanTask> nonDefaultTasks = + Lists.newArrayList( + table.newScan().filter(Expressions.equal("category", "non_default")).planFiles()); + assertThat(nonDefaultTasks).hasSize(2); // Files are returned, filtering happens during read + + // Write new data after schema evolution + DataFile fileThree = createDataFile("three"); + table.newAppend().appendFile(fileThree).commit(); + + // Verify scan planning works with all files (old and new) + List<FileScanTask> allTasks = Lists.newArrayList(table.newScan().planFiles()); + assertThat(allTasks).hasSize(3); + + // Test that all tasks have access to the column with default value + for (FileScanTask task : allTasks) { Review Comment: this should be simplified to ``` assertThat(table.newScan().planFiles()) .hasSize(3) .allSatisfy( task -> { Schema taskSchema = task.schema(); Types.NestedField categoryFieldInTask = taskSchema.findField("category"); assertThat(categoryFieldInTask).isNotNull(); assertThat(categoryFieldInTask.initialDefault()).isEqualTo(defaultValue); assertThat(categoryFieldInTask.writeDefault()).isEqualTo(defaultValue); }); ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org