rdblue commented on code in PR #12512: URL: https://github.com/apache/iceberg/pull/12512#discussion_r1996429723
########## parquet/src/main/java/org/apache/iceberg/parquet/ParquetVariantReaders.java: ########## @@ -332,6 +347,91 @@ public void setPageSource(PageReadStore pageStore) { } } + private static class ShreddedArrayReader implements VariantValueReader { + private final int valueDL; + private final VariantValueReader valueReader; + private final int repeatedDL; + private final int repeatedRL; + private final VariantValueReader elementReader; + private final TripleIterator<?> valueColumn; + private final TripleIterator<?> elementColumn; + private final List<TripleIterator<?>> children; + + private ShreddedArrayReader( + int valueDL, + VariantValueReader valueReader, + int typedDL, + int typedRL, + VariantValueReader elementReader) { + this.valueDL = valueDL; + this.valueReader = valueReader; + this.repeatedDL = typedDL + 1; + this.repeatedRL = typedRL + 1; + this.elementReader = elementReader; + this.elementColumn = this.elementReader.column(); + this.valueColumn = valueReader != null ? valueReader.column() : elementColumn; + this.children = + children(Iterables.concat(Arrays.asList(valueReader), Arrays.asList(elementReader))); + } + + @Override + public VariantValue read(VariantMetadata metadata) { + // if the current definition level is less to the definition level of the repeated + // type, i.e. typed_value is null, then it's not an array + boolean isArray = elementColumn.currentDefinitionLevel() >= repeatedDL; + VariantValue value = ParquetVariantReaders.read(metadata, valueReader, valueDL); + + if (isArray) { + Preconditions.checkArgument( + value == MISSING, "Invalid variant, non-array value: %s", value); Review Comment: This should be filled in with `Variants.ofNull()`. From the spec: > If a Variant is missing in a context where a value is required, readers must return a Variant null (`00`) This is a context where the value is required. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org