nastra commented on code in PR #14081:
URL: https://github.com/apache/iceberg/pull/14081#discussion_r2364015645
##########
data/src/test/java/org/apache/iceberg/data/TestMetricsRowGroupFilter.java:
##########
@@ -988,6 +1004,67 @@ public void testTransformFilter() {
.isTrue();
}
+ @TestTemplate
+ public void testVariantFieldMixedValuesNotNull() throws IOException {
+ assumeThat(format).isEqualTo(FileFormat.PARQUET);
+
+ List<GenericRecord> records = Lists.newArrayList();
+ for (int i = 0; i < 10; i++) {
+ GenericRecord record = GenericRecord.create(VARIANT_SCHEMA);
+ record.setField("id", i);
+ if (i % 2 == 0) {
+ VariantMetadata metadata = Variants.metadata("field");
+ ShreddedObject obj = Variants.object(metadata);
+ obj.put("field", Variants.of("value" + i));
+ Variant variant = Variant.of(metadata, obj);
+ record.setField("variant_field", variant);
+ }
+ records.add(record);
+ }
+
+ File parquetFile = writeParquetFile("test-variant", VARIANT_SCHEMA,
records);
+ InputFile inFile = Files.localInput(parquetFile);
+ try (ParquetFileReader reader =
ParquetFileReader.open(parquetInputFile(inFile))) {
+ BlockMetaData blockMetaData = reader.getRowGroups().get(0);
+ MessageType fileSchema = reader.getFileMetaData().getSchema();
+ ParquetMetricsRowGroupFilter rowGroupFilter =
+ new ParquetMetricsRowGroupFilter(VARIANT_SCHEMA,
notNull("variant_field"), true);
+ boolean shouldRead = rowGroupFilter.shouldRead(fileSchema,
blockMetaData);
+ assertThat(shouldRead)
+ .as("Should read: variant notNull filters must be evaluated post
scan")
+ .isTrue();
+ }
+ }
+
+ @TestTemplate
+ public void testVariantFieldAllNullsNotNull() throws IOException {
+ assumeThat(format).isEqualTo(FileFormat.PARQUET);
+
+ List<GenericRecord> records = Lists.newArrayListWithExpectedSize(10);
+ for (int i = 0; i < 10; i++) {
+ GenericRecord record = GenericRecord.create(VARIANT_SCHEMA);
+ record.setField("id", i);
+ record.setField("variant_field", null);
+ records.add(record);
+ }
+
+ File parquetFile = writeParquetFile("test-variant-nulls", VARIANT_SCHEMA,
records);
+ InputFile inFile = Files.localInput(parquetFile);
+
+ try (ParquetFileReader reader =
ParquetFileReader.open(parquetInputFile(inFile))) {
+ BlockMetaData blockMetaData = reader.getRowGroups().get(0);
+ MessageType fileSchema = reader.getFileMetaData().getSchema();
+
+ ParquetMetricsRowGroupFilter rowGroupFilter =
+ new ParquetMetricsRowGroupFilter(VARIANT_SCHEMA,
notNull("variant_field"), true);
+ boolean shouldRead = rowGroupFilter.shouldRead(fileSchema,
blockMetaData);
+
+ assertThat(shouldRead)
Review Comment:
```suggestion
assertThat(rowGroupFilter.shouldRead(fileSchema, blockMetaData))
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]