kevinjqliu commented on code in PR #13445:
URL: https://github.com/apache/iceberg/pull/13445#discussion_r2296801684
##########
spark/v4.0/spark/src/main/java/org/apache/iceberg/spark/data/SparkParquetWriters.java:
##########
@@ -610,16 +609,45 @@ public Stream<FieldMetrics<?>> metrics() {
}
private static class InternalRowWriter extends
ParquetValueWriters.StructWriter<InternalRow> {
- private final DataType[] types;
+ static InternalRowWriter create(StructType struct,
List<ParquetValueWriter<?>> writers) {
+ int[] fieldIndexes = writerToFieldIndex(struct, writers.size());
+ return new InternalRowWriter(struct, fieldIndexes, writers);
+ }
+
+ private final StructField[] fields;
+ private final int[] fieldIndexes;
- private InternalRowWriter(List<ParquetValueWriter<?>> writers,
List<DataType> types) {
- super(writers);
- this.types = types.toArray(new DataType[0]);
+ private InternalRowWriter(
+ StructType struct, int[] fieldIndexes, List<ParquetValueWriter<?>>
writers) {
+ super(fieldIndexes, writers);
+ this.fields = struct.fields();
+ this.fieldIndexes = fieldIndexes;
}
@Override
protected Object get(InternalRow struct, int index) {
- return struct.get(index, types[index]);
+ return struct.get(index, fields[fieldIndexes[index]].dataType());
+ }
+ }
+
+ /** Returns a mapping from writer index to field index, skipping Unknown
columns. */
+ private static int[] writerToFieldIndex(StructType struct, int numWriters) {
+ if (null == struct) {
+ return IntStream.rangeClosed(0, numWriters).toArray();
}
Review Comment:
`rangeClosed` is inclusive on both ends, the resulting array will be of size
`numWriters + 1`, is that right?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]