yyanyy commented on code in PR #12886: URL: https://github.com/apache/iceberg/pull/12886#discussion_r2082578755
########## spark/v3.4/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestAddFilesProcedure.java: ########## @@ -1268,6 +1323,23 @@ private void createCompositePartitionedTableWithNullValueInPartitionColumn(Strin unionedDF.write().insertInto(sourceTableName); } + private void createPartitionedTableWithNullValueInPartitionColumn(String format) { + String createParquet = + "CREATE TABLE %s (id Integer, name String, dept String, subdept String) USING %s " + + "PARTITIONED BY (dept) LOCATION '%s'"; + sql(createParquet, sourceTableName, format, fileTableDir.getAbsolutePath()); + + Dataset<Row> unionedDF = + unpartitionedDF() + .select("id", "name", "subdept", "dept") + .unionAll(singleNullRecordDF().select("id", "name", "subdept", "dept")) + .select("id", "name", "subdept", "dept") Review Comment: nit - this line might not be needed? ########## spark/v3.4/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestAddFilesProcedure.java: ########## @@ -632,6 +632,61 @@ public void addFilteredPartitionsToPartitionedWithNullValueFilteringOnDept() { sql("SELECT id, name, dept, subdept FROM %s ORDER BY id", tableName)); } + @TestTemplate + public void addAllPartitionsToPartitionedWithNullValue() { + createCompositePartitionedTableWithNullValueInPartitionColumn("parquet"); + + createIcebergTable( + "id Integer, name String, dept String, subdept String", "PARTITIONED BY (id, dept)"); + + // Add all partitions including null partitions. + List<Object[]> result = + sql( + "CALL %s.system.add_files('%s', '`parquet`.`%s`')", + catalogName, tableName, fileTableDir.getAbsolutePath()); + + assertOutput(result, 10L, 5L); + + assertEquals( + "Iceberg table contains correct data", + sql("SELECT id, name, dept, subdept FROM %s ORDER BY id", sourceTableName), + sql("SELECT id, name, dept, subdept FROM %s ORDER BY id", tableName)); + } + + @TestTemplate + public void addPartitionsWithNullValueShouldAddFilesToNullPartition() { + // This test is to ensure that "null" string partition is not incorrectly created. + + createPartitionedTableWithNullValueInPartitionColumn("parquet"); + + createIcebergTable( + "id Integer, name String, dept String, subdept String", "PARTITIONED BY (dept)"); + + // Add all partitions including null partitions. + List<Object[]> result = + sql( + "CALL %s.system.add_files('%s', '`parquet`.`%s`')", + catalogName, tableName, fileTableDir.getAbsolutePath()); + + assertOutput(result, 6L, 3L); + + assertEquals( + "Iceberg table contains correct data", + sql("SELECT id, name, dept, subdept FROM %s ORDER BY id", sourceTableName), + sql("SELECT id, name, dept, subdept FROM %s ORDER BY id", tableName)); + + // Check if correct partitions are created + List<Object[]> actualRows = + sql("SELECT partition from %s.partitions ORDER BY partition", tableName); + assertEquals( + "Other partitions should match", + ImmutableList.of( + row(new Object[] {new Object[] {null}}), Review Comment: nit - as we were seeing failure when the partition column type is not a string, do we want to test the case where the table partitions with non-string column? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org