pvary commented on code in PR #13562: URL: https://github.com/apache/iceberg/pull/13562#discussion_r2236814564
########## arrow/src/main/java/org/apache/iceberg/arrow/vectorized/VectorizedArrowReader.java: ########## @@ -508,6 +433,179 @@ public static VectorizedReader<?> replaceWithMetadataReader( return reader; } + private final class VectorizedArrowReaderLogicalTypeAnnotationVisitor + implements LogicalTypeAnnotation.LogicalTypeAnnotationVisitor<Object> { + private final Field arrowField; + private final PrimitiveType primitive; + + VectorizedArrowReaderLogicalTypeAnnotationVisitor(Field arrowField, PrimitiveType primitive) { + this.arrowField = arrowField; + this.primitive = primitive; + } + + @Override + public Optional<Object> visit( + LogicalTypeAnnotation.StringLogicalTypeAnnotation stringLogicalType) { + return visitEnumJsonBsonString(); + } + + @Override + public Optional<Object> visit(LogicalTypeAnnotation.EnumLogicalTypeAnnotation enumLogicalType) { + return visitEnumJsonBsonString(); + } + + @Override + public Optional<Object> visit(LogicalTypeAnnotation.UUIDLogicalTypeAnnotation uuidLogicalType) { + // Fallback to allocation based on primitive type name + VectorizedArrowReader.this.allocateVectorBasedOnTypeName( + VectorizedArrowReader.this.columnDescriptor.getPrimitiveType(), arrowField); + return Optional.empty(); + } + + @Override + public Optional<Object> visit( + LogicalTypeAnnotation.DecimalLogicalTypeAnnotation decimalLogicalType) { + VectorizedArrowReader.this.vec = + arrowField.createVector(VectorizedArrowReader.this.rootAlloc); + switch (primitive.getPrimitiveTypeName()) { + case BINARY: + case FIXED_LEN_BYTE_ARRAY: + ((FixedSizeBinaryVector) VectorizedArrowReader.this.vec) + .allocateNew(VectorizedArrowReader.this.batchSize); + VectorizedArrowReader.this.readType = ReadType.FIXED_LENGTH_DECIMAL; + VectorizedArrowReader.this.typeWidth = primitive.getTypeLength(); + break; + case INT64: + ((BigIntVector) VectorizedArrowReader.this.vec) + .allocateNew(VectorizedArrowReader.this.batchSize); + VectorizedArrowReader.this.readType = ReadType.LONG_BACKED_DECIMAL; + VectorizedArrowReader.this.typeWidth = (int) BigIntVector.TYPE_WIDTH; + break; + case INT32: + ((IntVector) VectorizedArrowReader.this.vec) + .allocateNew(VectorizedArrowReader.this.batchSize); + VectorizedArrowReader.this.readType = ReadType.INT_BACKED_DECIMAL; + VectorizedArrowReader.this.typeWidth = (int) IntVector.TYPE_WIDTH; + break; + default: + throw new UnsupportedOperationException( + "Unsupported base type for decimal: " + primitive.getPrimitiveTypeName()); + } + return Optional.empty(); + } + + @Override + public Optional<Object> visit(LogicalTypeAnnotation.DateLogicalTypeAnnotation dateLogicalType) { + VectorizedArrowReader.this.vec = + arrowField.createVector(VectorizedArrowReader.this.rootAlloc); + ((DateDayVector) VectorizedArrowReader.this.vec) + .allocateNew(VectorizedArrowReader.this.batchSize); + VectorizedArrowReader.this.readType = ReadType.INT; + VectorizedArrowReader.this.typeWidth = (int) IntVector.TYPE_WIDTH; + return Optional.empty(); + } + + @Override + public Optional<Object> visit(LogicalTypeAnnotation.TimeLogicalTypeAnnotation timeLogicalType) { + VectorizedArrowReader.this.vec = + arrowField.createVector(VectorizedArrowReader.this.rootAlloc); + ((TimeMicroVector) VectorizedArrowReader.this.vec) + .allocateNew(VectorizedArrowReader.this.batchSize); + VectorizedArrowReader.this.readType = ReadType.LONG; + VectorizedArrowReader.this.typeWidth = (int) TimeMicroVector.TYPE_WIDTH; + return Optional.empty(); + } + + @Override + public Optional<Object> visit( + LogicalTypeAnnotation.TimestampLogicalTypeAnnotation timestampLogicalType) { + switch (timestampLogicalType.getUnit()) { + case MILLIS: + VectorizedArrowReader.this.vec = + arrowField.createVector(VectorizedArrowReader.this.rootAlloc); + ((BigIntVector) VectorizedArrowReader.this.vec) + .allocateNew(VectorizedArrowReader.this.batchSize); + VectorizedArrowReader.this.readType = ReadType.TIMESTAMP_MILLIS; + VectorizedArrowReader.this.typeWidth = (int) BigIntVector.TYPE_WIDTH; + break; + case MICROS: + VectorizedArrowReader.this.vec = + arrowField.createVector(VectorizedArrowReader.this.rootAlloc); + if (((Types.TimestampType) VectorizedArrowReader.this.icebergField.type()) + .shouldAdjustToUTC()) { + ((TimeStampMicroTZVector) VectorizedArrowReader.this.vec) + .allocateNew(VectorizedArrowReader.this.batchSize); + } else { + ((TimeStampMicroVector) VectorizedArrowReader.this.vec) + .allocateNew(VectorizedArrowReader.this.batchSize); + } + VectorizedArrowReader.this.readType = ReadType.LONG; + VectorizedArrowReader.this.typeWidth = (int) BigIntVector.TYPE_WIDTH; + break; + case NANOS: + VectorizedArrowReader.this.vec = + arrowField.createVector(VectorizedArrowReader.this.rootAlloc); + if (((Types.TimestampNanoType) VectorizedArrowReader.this.icebergField.type()) + .shouldAdjustToUTC()) { + ((TimeStampNanoTZVector) VectorizedArrowReader.this.vec) + .allocateNew(VectorizedArrowReader.this.batchSize); + } else { + ((TimeStampNanoVector) VectorizedArrowReader.this.vec) + .allocateNew(VectorizedArrowReader.this.batchSize); + } + VectorizedArrowReader.this.readType = ReadType.LONG; + VectorizedArrowReader.this.typeWidth = (int) BigIntVector.TYPE_WIDTH; + break; + } + return Optional.empty(); + } + + @Override + public Optional<Object> visit(LogicalTypeAnnotation.IntLogicalTypeAnnotation intLogicalType) { + VectorizedArrowReader.this.vec = + arrowField.createVector(VectorizedArrowReader.this.rootAlloc); + + if (intLogicalType.getBitWidth() == 8 + || intLogicalType.getBitWidth() == 16 + || intLogicalType.getBitWidth() == 32) { + ((IntVector) VectorizedArrowReader.this.vec) + .allocateNew(VectorizedArrowReader.this.batchSize); + VectorizedArrowReader.this.readType = ReadType.INT; + VectorizedArrowReader.this.typeWidth = (int) IntVector.TYPE_WIDTH; + } + if (intLogicalType.getBitWidth() == 64) { Review Comment: Could be in the `else` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org