hemanthboyina commented on code in PR #16125:
URL: https://github.com/apache/iceberg/pull/16125#discussion_r3196541527
##########
spark/v4.1/spark/src/test/java/org/apache/iceberg/spark/source/TestIcebergSourceTablesBase.java:
##########
@@ -2483,4 +2493,71 @@ private void assertDataFilePartitions(
.isEqualTo(expectedPartitionIds.get(i).intValue());
}
}
+
+ @Test
+ public void testPartitionsTableDvCountColumn() {
+ TableIdentifier tableIdentifier = TableIdentifier.of("db",
"partitions_dv_count_test");
+ createTable(tableIdentifier, SCHEMA, SPEC);
+
+ Dataset<Row> df =
+ spark.createDataFrame(
+ Lists.newArrayList(new SimpleRecord(1, "a"), new SimpleRecord(2,
"b")),
+ SimpleRecord.class);
+
+ df.select("id", "data")
+ .write()
+ .format("iceberg")
+ .mode("append")
+ .save(loadLocation(tableIdentifier));
+
+ // Query the partitions table and verify dv_count column exists with value 0
+ Dataset<Row> partitionsDF =
+ spark.read().format("iceberg").load(loadLocation(tableIdentifier,
"partitions"));
+
+ // dv_count column should be present in the schema
+ assertThat(partitionsDF.schema().fieldNames())
+ .as("Partitions table should contain dv_count column")
+ .contains("dv_count");
+
+ // dv_count should be 0 for data-only files (no DVs)
+ List<Row> rows = partitionsDF.select("dv_count").collectAsList();
+ assertThat(rows).isNotEmpty();
+ for (Row row : rows) {
+ assertThat(row.getInt(0)).as("dv_count should be 0 when no DVs are
present").isEqualTo(0);
+ }
+ }
+
+ @Test
+ public void testPartitionsTableDvCountWithPositionDeleteFiles() {
Review Comment:
done, updated
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]