nastra commented on code in PR #10953: URL: https://github.com/apache/iceberg/pull/10953#discussion_r1726810395
########## arrow/src/main/java/org/apache/iceberg/arrow/vectorized/VectorizedArrowReader.java: ########## @@ -463,12 +460,16 @@ public static VectorizedArrowReader positionsWithSetArrowValidityVector() { return new PositionVectorReader(true); } - private static final class NullVectorReader extends VectorizedArrowReader { - private static final NullVectorReader INSTANCE = new NullVectorReader(); + public static final class NullVectorReader extends VectorizedArrowReader { + + public NullVectorReader(Types.NestedField icebergField) { + super(icebergField); + } @Override public VectorHolder read(VectorHolder reuse, int numValsToRead) { - return VectorHolder.dummyHolder(numValsToRead); + NullVector vector = new NullVector(icebergField().name(), numValsToRead); + return new VectorHolder.NullVectorHolder(vector, icebergField(), numValsToRead); Review Comment: there are places in the codebase that relied on `ConstantVectorHolder` so I'm not sure whether we should introduce a separate `NullVectorHolder`. I tried it with the following diff, which should work too: ``` --- a/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/VectorHolder.java +++ b/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/VectorHolder.java @@ -19,6 +19,7 @@ package org.apache.iceberg.arrow.vectorized; import org.apache.arrow.vector.FieldVector; +import org.apache.arrow.vector.NullVector; import org.apache.iceberg.MetadataColumns; import org.apache.iceberg.relocated.com.google.common.base.Preconditions; import org.apache.iceberg.types.Type; @@ -138,16 +139,19 @@ public class VectorHolder { public static class ConstantVectorHolder<T> extends VectorHolder { private final T constantValue; private final int numRows; + private final FieldVector vector; public ConstantVectorHolder(int numRows) { this.numRows = numRows; this.constantValue = null; + this.vector = new NullVector("_dummy_", numRows); } public ConstantVectorHolder(Types.NestedField icebergField, int numRows, T constantValue) { super(icebergField); this.numRows = numRows; this.constantValue = constantValue; + this.vector = new NullVector(icebergField.name(), numRows); } @Override @@ -158,6 +162,11 @@ public class VectorHolder { public Object getConstant() { return constantValue; } + + @Override + public FieldVector vector() { + return vector; + } ``` ########## arrow/src/main/java/org/apache/iceberg/arrow/vectorized/VectorHolder.java: ########## @@ -131,6 +131,20 @@ public boolean isDummy() { return vector == null; } + public static class NullVectorHolder extends VectorHolder { Review Comment: see my other comment on whether we should actually introduce this class vs modify `ConstantVectorHolder`to hold a `NullVector` ########## arrow/src/main/java/org/apache/iceberg/arrow/vectorized/VectorizedReaderBuilder.java: ########## @@ -97,7 +98,7 @@ public VectorizedReader<?> message( } else if (reader != null) { reorderedFields.add(reader); } else { - reorderedFields.add(VectorizedArrowReader.nulls()); Review Comment: based on my other comments, this should most likely call `VectorizedArrowReader.nulls(field)` ########## arrow/src/test/java/org/apache/iceberg/arrow/vectorized/ArrowReaderTest.java: ########## @@ -19,7 +19,7 @@ package org.apache.iceberg.arrow.vectorized; import static org.apache.iceberg.Files.localInput; -import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.*; Review Comment: we don't support * imports ########## arrow/src/main/java/org/apache/iceberg/arrow/vectorized/VectorizedArrowReader.java: ########## @@ -451,10 +452,6 @@ public String toString() { return columnDescriptor.toString(); } - public static VectorizedArrowReader nulls() { Review Comment: I don't think we should remove this method. How about rather doing this: ``` --- a/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/VectorizedArrowReader.java +++ b/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/VectorizedArrowReader.java @@ -455,6 +455,10 @@ public class VectorizedArrowReader implements VectorizedReader<VectorHolder> { return NullVectorReader.INSTANCE; } + public static VectorizedArrowReader nulls(Types.NestedField icebergField) { + return new NullVectorReader(icebergField); + } + public static VectorizedArrowReader positions() { return new PositionVectorReader(false); } @@ -464,11 +468,15 @@ public class VectorizedArrowReader implements VectorizedReader<VectorHolder> { } private static final class NullVectorReader extends VectorizedArrowReader { - private static final NullVectorReader INSTANCE = new NullVectorReader(); + private static final NullVectorReader INSTANCE = new NullVectorReader(null); + + private NullVectorReader(Types.NestedField icebergField) { + super(icebergField); + } @Override public VectorHolder read(VectorHolder reuse, int numValsToRead) { - return VectorHolder.dummyHolder(numValsToRead); + return new VectorHolder.ConstantVectorHolder<>(icebergField(), numValsToRead, null); } ``` ########## arrow/src/test/java/org/apache/iceberg/arrow/vectorized/GenericArrowVectorAccessorFactoryTest.java: ########## @@ -0,0 +1,86 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.arrow.vectorized; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +import java.math.BigDecimal; +import java.util.function.Supplier; +import org.apache.arrow.vector.IntVector; +import org.apache.iceberg.types.Types; +import org.apache.parquet.column.ColumnDescriptor; +import org.apache.parquet.schema.PrimitiveType; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.mockito.InjectMocks; +import org.mockito.Mock; +import org.mockito.MockitoAnnotations; + +class GenericArrowVectorAccessorFactoryTest { Review Comment: I'm not sure it's worth adding this test, unless you want to handle all vectors (which can also be a follow-up PR) -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org