ajantha-bhat commented on code in PR #12102: URL: https://github.com/apache/iceberg/pull/12102#discussion_r1931637285
########## parquet/src/main/java/org/apache/iceberg/parquet/ParquetValueReaders.java: ########## @@ -52,6 +59,81 @@ public static <T> ParquetValueReader<T> option( return reader; } + public static ParquetValueReader<Integer> unboxed(ColumnDescriptor desc) { + return new UnboxedReader<>(desc); + } + + public static ParquetValueReader<String> strings(ColumnDescriptor desc) { + return new StringReader(desc); + } + + public static ParquetValueReader<ByteBuffer> byteBuffers(ColumnDescriptor desc) { + return new BytesReader(desc); + } + + public static ParquetValueReader<Long> intsAsLongs(ColumnDescriptor desc) { + return new IntAsLongReader(desc); + } + + public static ParquetValueReader<Double> floatsAsDoubles(ColumnDescriptor desc) { + return new FloatAsDoubleReader(desc); + } + + public static ParquetValueReader<BigDecimal> bigDecimals(ColumnDescriptor desc) { + LogicalTypeAnnotation decimal = desc.getPrimitiveType().getLogicalTypeAnnotation(); + Preconditions.checkArgument( + decimal instanceof DecimalLogicalTypeAnnotation, + "Invalid timestamp logical type: " + decimal); + + int scale = ((DecimalLogicalTypeAnnotation) decimal).getScale(); + + switch (desc.getPrimitiveType().getPrimitiveTypeName()) { + case FIXED_LEN_BYTE_ARRAY: + case BINARY: + return new BinaryAsDecimalReader(desc, scale); + case INT64: + return new LongAsDecimalReader(desc, scale); + case INT32: + return new IntegerAsDecimalReader(desc, scale); + } + throw new IllegalArgumentException( Review Comment: nit: we can add a new line after switch block ########## parquet/src/main/java/org/apache/iceberg/data/parquet/BaseParquetReaders.java: ########## @@ -148,96 +167,81 @@ public ParquetValueReader<?> struct( } } - private class LogicalTypeAnnotationParquetValueReaderVisitor - implements LogicalTypeAnnotation.LogicalTypeAnnotationVisitor<ParquetValueReader<?>> { + private class LogicalTypeReadBuilder + implements LogicalTypeAnnotationVisitor<ParquetValueReader<?>> { private final ColumnDescriptor desc; private final org.apache.iceberg.types.Type.PrimitiveType expected; - private final PrimitiveType primitive; - LogicalTypeAnnotationParquetValueReaderVisitor( - ColumnDescriptor desc, - org.apache.iceberg.types.Type.PrimitiveType expected, - PrimitiveType primitive) { + LogicalTypeReadBuilder( + ColumnDescriptor desc, org.apache.iceberg.types.Type.PrimitiveType expected) { this.desc = desc; this.expected = expected; - this.primitive = primitive; } @Override - public Optional<ParquetValueReader<?>> visit( - LogicalTypeAnnotation.StringLogicalTypeAnnotation stringLogicalType) { - return Optional.of(new ParquetValueReaders.StringReader(desc)); + public Optional<ParquetValueReader<?>> visit(StringLogicalTypeAnnotation stringLogicalType) { + return Optional.of(ParquetValueReaders.strings(desc)); } @Override - public Optional<ParquetValueReader<?>> visit( - LogicalTypeAnnotation.EnumLogicalTypeAnnotation enumLogicalType) { - return Optional.of(new ParquetValueReaders.StringReader(desc)); + public Optional<ParquetValueReader<?>> visit(EnumLogicalTypeAnnotation enumLogicalType) { + return Optional.of(ParquetValueReaders.strings(desc)); } @Override public Optional<ParquetValueReader<?>> visit(DecimalLogicalTypeAnnotation decimalLogicalType) { - switch (primitive.getPrimitiveTypeName()) { - case BINARY: - case FIXED_LEN_BYTE_ARRAY: - return Optional.of( - new ParquetValueReaders.BinaryAsDecimalReader(desc, decimalLogicalType.getScale())); - case INT64: - return Optional.of( - new ParquetValueReaders.LongAsDecimalReader(desc, decimalLogicalType.getScale())); - case INT32: - return Optional.of( - new ParquetValueReaders.IntegerAsDecimalReader(desc, decimalLogicalType.getScale())); - default: - throw new UnsupportedOperationException( - "Unsupported base type for decimal: " + primitive.getPrimitiveTypeName()); - } + return Optional.of(ParquetValueReaders.bigDecimals(desc)); } @Override - public Optional<ParquetValueReader<?>> visit( - LogicalTypeAnnotation.DateLogicalTypeAnnotation dateLogicalType) { + public Optional<ParquetValueReader<?>> visit(DateLogicalTypeAnnotation dateLogicalType) { return Optional.of(dateReader(desc)); } @Override - public Optional<ParquetValueReader<?>> visit( - LogicalTypeAnnotation.TimeLogicalTypeAnnotation timeLogicalType) { - return Optional.of(timeReader(desc, timeLogicalType.getUnit())); + public Optional<ParquetValueReader<?>> visit(TimeLogicalTypeAnnotation timeLogicalType) { + return Optional.of(timeReader(desc)); } @Override public Optional<ParquetValueReader<?>> visit( - LogicalTypeAnnotation.TimestampLogicalTypeAnnotation timestampLogicalType) { + TimestampLogicalTypeAnnotation timestampLogicalType) { return Optional.of( - timestampReader( - desc, - timestampLogicalType.getUnit(), - ((Types.TimestampType) expected).shouldAdjustToUTC())); + timestampReader(desc, ((Types.TimestampType) expected).shouldAdjustToUTC())); } @Override - public Optional<ParquetValueReader<?>> visit( - LogicalTypeAnnotation.IntLogicalTypeAnnotation intLogicalType) { + public Optional<ParquetValueReader<?>> visit(IntLogicalTypeAnnotation intLogicalType) { if (intLogicalType.getBitWidth() == 64) { + if (intLogicalType.isSigned()) { + // this will throw an UnsupportedOperationException Review Comment: nit: Just wondering why not replace it with preconditions check like newly added code below, with that we can have a valid error message that "64 bit int logical type must be unsigned" . Now it just says unsupported logical type INTEGER and it might confuse the user. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org