rdblue commented on code in PR #11904: URL: https://github.com/apache/iceberg/pull/11904#discussion_r1906132860
########## parquet/src/main/java/org/apache/iceberg/data/parquet/GenericParquetReaders.java: ########## @@ -92,4 +127,232 @@ protected void set(Record struct, int pos, Object value) { struct.set(pos, value); } } + + private class LogicalTypeAnnotationParquetValueReaderVisitor + implements LogicalTypeAnnotation.LogicalTypeAnnotationVisitor<ParquetValueReader<?>> { + + private final ColumnDescriptor desc; + private final org.apache.iceberg.types.Type.PrimitiveType expected; + private final PrimitiveType primitive; + + LogicalTypeAnnotationParquetValueReaderVisitor( + ColumnDescriptor desc, + org.apache.iceberg.types.Type.PrimitiveType expected, + PrimitiveType primitive) { + this.desc = desc; + this.expected = expected; + this.primitive = primitive; + } + + @Override + public Optional<ParquetValueReader<?>> visit( + LogicalTypeAnnotation.StringLogicalTypeAnnotation stringLogicalType) { + return Optional.of(new ParquetValueReaders.StringReader(desc)); + } + + @Override + public Optional<ParquetValueReader<?>> visit( + LogicalTypeAnnotation.EnumLogicalTypeAnnotation enumLogicalType) { + return Optional.of(new ParquetValueReaders.StringReader(desc)); + } + + @Override + public Optional<ParquetValueReader<?>> visit( + LogicalTypeAnnotation.DecimalLogicalTypeAnnotation decimalLogicalType) { + switch (primitive.getPrimitiveTypeName()) { + case BINARY: + case FIXED_LEN_BYTE_ARRAY: + return Optional.of( + new ParquetValueReaders.BinaryAsDecimalReader(desc, decimalLogicalType.getScale())); + case INT64: + return Optional.of( + new ParquetValueReaders.LongAsDecimalReader(desc, decimalLogicalType.getScale())); + case INT32: + return Optional.of( + new ParquetValueReaders.IntegerAsDecimalReader(desc, decimalLogicalType.getScale())); + default: + throw new UnsupportedOperationException( + "Unsupported base type for decimal: " + primitive.getPrimitiveTypeName()); + } + } + + @Override + public Optional<ParquetValueReader<?>> visit( + LogicalTypeAnnotation.DateLogicalTypeAnnotation dateLogicalType) { + return Optional.of(new DateReader(desc)); + } + + @Override + public Optional<ParquetValueReader<?>> visit( + LogicalTypeAnnotation.TimeLogicalTypeAnnotation timeLogicalType) { + if (timeLogicalType.getUnit() == LogicalTypeAnnotation.TimeUnit.MICROS) { + return Optional.of(new TimeReader(desc)); + } else if (timeLogicalType.getUnit() == LogicalTypeAnnotation.TimeUnit.MILLIS) { + return Optional.of(new TimeMillisReader(desc)); + } + + return Optional.empty(); + } + + @Override + public Optional<ParquetValueReader<?>> visit( + LogicalTypeAnnotation.TimestampLogicalTypeAnnotation timestampLogicalType) { + if (timestampLogicalType.getUnit() == LogicalTypeAnnotation.TimeUnit.MICROS) { + Types.TimestampType tsMicrosType = (Types.TimestampType) expected; + return tsMicrosType.shouldAdjustToUTC() + ? Optional.of(new TimestamptzReader(desc)) + : Optional.of(new TimestampReader(desc)); + } else if (timestampLogicalType.getUnit() == LogicalTypeAnnotation.TimeUnit.MILLIS) { + Types.TimestampType tsMillisType = (Types.TimestampType) expected; + return tsMillisType.shouldAdjustToUTC() + ? Optional.of(new TimestamptzMillisReader(desc)) + : Optional.of(new TimestampMillisReader(desc)); + } + + return LogicalTypeAnnotation.LogicalTypeAnnotationVisitor.super.visit(timestampLogicalType); + } + + @Override + public Optional<ParquetValueReader<?>> visit( + LogicalTypeAnnotation.IntLogicalTypeAnnotation intLogicalType) { + if (intLogicalType.getBitWidth() == 64) { + return Optional.of(new ParquetValueReaders.UnboxedReader<>(desc)); + } + return (expected.typeId() == org.apache.iceberg.types.Type.TypeID.LONG) + ? Optional.of(new ParquetValueReaders.IntAsLongReader(desc)) + : Optional.of(new ParquetValueReaders.UnboxedReader<>(desc)); + } + + @Override + public Optional<ParquetValueReader<?>> visit( + LogicalTypeAnnotation.JsonLogicalTypeAnnotation jsonLogicalType) { + return Optional.of(new ParquetValueReaders.StringReader(desc)); + } + + @Override + public Optional<ParquetValueReader<?>> visit( + LogicalTypeAnnotation.BsonLogicalTypeAnnotation bsonLogicalType) { + return Optional.of(new ParquetValueReaders.BytesReader(desc)); + } + } + + private static final OffsetDateTime EPOCH = Instant.ofEpochSecond(0).atOffset(ZoneOffset.UTC); + private static final LocalDate EPOCH_DAY = EPOCH.toLocalDate(); + + private static class DateReader extends ParquetValueReaders.PrimitiveReader<LocalDate> { Review Comment: I agree with moving the date/time reader classes here. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org