This is an automated email from the ASF dual-hosted git repository.
xiangfu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git
The following commit(s) were added to refs/heads/master by this push:
new bdd834797a7 Fixing ColumnReader interface to return validity bitset in
case of multi-value primitive type (#17387)
bdd834797a7 is described below
commit bdd834797a7cef5f647b84b9fdf395f3e2678475
Author: 9aman <[email protected]>
AuthorDate: Fri Dec 19 17:34:58 2025 +0530
Fixing ColumnReader interface to return validity bitset in case of
multi-value primitive type (#17387)
* Fixing ColumnReader interface to return validity bitset in case of
multi-value primitive type
* Minor formatting improvement
* 1. Improve test cases to assert that nulti-value columns have no nulls.
2. Add documentation stating the reason for passing null BitSet validity
for multi-value primitive columns.
* Renaming validity to nulls to capture the intent of the variable better
---
.../segment/readers/DefaultValueColumnReader.java | 33 +++---
.../readers/PinotSegmentColumnReaderImpl.java | 36 +++---
.../readers/DefaultValueColumnReaderTest.java | 42 ++++---
.../readers/PinotSegmentColumnReaderImplTest.java | 54 +++++++--
.../pinot/spi/data/readers/ColumnReader.java | 61 ++++++++--
.../pinot/spi/data/readers/MultiValueResult.java | 123 +++++++++++++++++++++
6 files changed, 279 insertions(+), 70 deletions(-)
diff --git
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/readers/DefaultValueColumnReader.java
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/readers/DefaultValueColumnReader.java
index 6e97958f96d..4392fe9cd04 100644
---
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/readers/DefaultValueColumnReader.java
+++
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/readers/DefaultValueColumnReader.java
@@ -21,6 +21,7 @@ package org.apache.pinot.segment.local.segment.readers;
import javax.annotation.Nullable;
import org.apache.pinot.spi.data.FieldSpec;
import org.apache.pinot.spi.data.readers.ColumnReader;
+import org.apache.pinot.spi.data.readers.MultiValueResult;
/**
@@ -238,39 +239,39 @@ public class DefaultValueColumnReader implements
ColumnReader {
}
@Override
- public int[] nextIntMV() {
+ public MultiValueResult<int[]> nextIntMV() {
if (!hasNext()) {
throw new IllegalStateException("No more values available");
}
_currentIndex++;
- return _defaultIntMV;
+ return MultiValueResult.of(_defaultIntMV, null);
}
@Override
- public long[] nextLongMV() {
+ public MultiValueResult<long[]> nextLongMV() {
if (!hasNext()) {
throw new IllegalStateException("No more values available");
}
_currentIndex++;
- return _defaultLongMV;
+ return MultiValueResult.of(_defaultLongMV, null);
}
@Override
- public float[] nextFloatMV() {
+ public MultiValueResult<float[]> nextFloatMV() {
if (!hasNext()) {
throw new IllegalStateException("No more values available");
}
_currentIndex++;
- return _defaultFloatMV;
+ return MultiValueResult.of(_defaultFloatMV, null);
}
@Override
- public double[] nextDoubleMV() {
+ public MultiValueResult<double[]> nextDoubleMV() {
if (!hasNext()) {
throw new IllegalStateException("No more values available");
}
_currentIndex++;
- return _defaultDoubleMV;
+ return MultiValueResult.of(_defaultDoubleMV, null);
}
@Override
@@ -354,27 +355,27 @@ public class DefaultValueColumnReader implements
ColumnReader {
// Multi-value accessors
@Override
- public int[] getIntMV(int docId) {
+ public MultiValueResult<int[]> getIntMV(int docId) {
validateDocId(docId);
- return _defaultIntMV;
+ return MultiValueResult.of(_defaultIntMV, null);
}
@Override
- public long[] getLongMV(int docId) {
+ public MultiValueResult<long[]> getLongMV(int docId) {
validateDocId(docId);
- return _defaultLongMV;
+ return MultiValueResult.of(_defaultLongMV, null);
}
@Override
- public float[] getFloatMV(int docId) {
+ public MultiValueResult<float[]> getFloatMV(int docId) {
validateDocId(docId);
- return _defaultFloatMV;
+ return MultiValueResult.of(_defaultFloatMV, null);
}
@Override
- public double[] getDoubleMV(int docId) {
+ public MultiValueResult<double[]> getDoubleMV(int docId) {
validateDocId(docId);
- return _defaultDoubleMV;
+ return MultiValueResult.of(_defaultDoubleMV, null);
}
@Override
diff --git
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/readers/PinotSegmentColumnReaderImpl.java
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/readers/PinotSegmentColumnReaderImpl.java
index 2af71982e56..b34a0e53efd 100644
---
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/readers/PinotSegmentColumnReaderImpl.java
+++
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/readers/PinotSegmentColumnReaderImpl.java
@@ -23,6 +23,7 @@ import javax.annotation.Nullable;
import org.apache.pinot.segment.spi.IndexSegment;
import org.apache.pinot.spi.data.FieldSpec;
import org.apache.pinot.spi.data.readers.ColumnReader;
+import org.apache.pinot.spi.data.readers.MultiValueResult;
/**
@@ -204,44 +205,47 @@ public class PinotSegmentColumnReaderImpl implements
ColumnReader {
return value;
}
+ // For all multi-value primitive type methods (nextIntMV, nextLongMV,
nextFloatMV, nextDoubleMV,
+ // getIntMV, getLongMV, getFloatMV, getDoubleMV), we pass null for the
validity bitset since
+ // multi-value primitive types cannot have null elements. Nulls are removed
by NullValueTransformer
@Override
- public int[] nextIntMV() {
+ public MultiValueResult<int[]> nextIntMV() {
if (!hasNext()) {
throw new IllegalStateException("No more values available");
}
int[] value = _segmentColumnReader.getIntMV(_currentIndex);
_currentIndex++;
- return value;
+ return MultiValueResult.of(value, null);
}
@Override
- public long[] nextLongMV() {
+ public MultiValueResult<long[]> nextLongMV() {
if (!hasNext()) {
throw new IllegalStateException("No more values available");
}
long[] value = _segmentColumnReader.getLongMV(_currentIndex);
_currentIndex++;
- return value;
+ return MultiValueResult.of(value, null);
}
@Override
- public float[] nextFloatMV() {
+ public MultiValueResult<float[]> nextFloatMV() {
if (!hasNext()) {
throw new IllegalStateException("No more values available");
}
float[] value = _segmentColumnReader.getFloatMV(_currentIndex);
_currentIndex++;
- return value;
+ return MultiValueResult.of(value, null);
}
@Override
- public double[] nextDoubleMV() {
+ public MultiValueResult<double[]> nextDoubleMV() {
if (!hasNext()) {
throw new IllegalStateException("No more values available");
}
double[] value = _segmentColumnReader.getDoubleMV(_currentIndex);
_currentIndex++;
- return value;
+ return MultiValueResult.of(value, null);
}
@Override
@@ -328,23 +332,23 @@ public class PinotSegmentColumnReaderImpl implements
ColumnReader {
// Multi-value accessors
@Override
- public int[] getIntMV(int docId) {
- return _segmentColumnReader.getIntMV(docId);
+ public MultiValueResult<int[]> getIntMV(int docId) {
+ return MultiValueResult.of(_segmentColumnReader.getIntMV(docId), null);
}
@Override
- public long[] getLongMV(int docId) {
- return _segmentColumnReader.getLongMV(docId);
+ public MultiValueResult<long[]> getLongMV(int docId) {
+ return MultiValueResult.of(_segmentColumnReader.getLongMV(docId), null);
}
@Override
- public float[] getFloatMV(int docId) {
- return _segmentColumnReader.getFloatMV(docId);
+ public MultiValueResult<float[]> getFloatMV(int docId) {
+ return MultiValueResult.of(_segmentColumnReader.getFloatMV(docId), null);
}
@Override
- public double[] getDoubleMV(int docId) {
- return _segmentColumnReader.getDoubleMV(docId);
+ public MultiValueResult<double[]> getDoubleMV(int docId) {
+ return MultiValueResult.of(_segmentColumnReader.getDoubleMV(docId), null);
}
@Override
diff --git
a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/readers/DefaultValueColumnReaderTest.java
b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/readers/DefaultValueColumnReaderTest.java
index 482b29c5432..abe05a48af8 100644
---
a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/readers/DefaultValueColumnReaderTest.java
+++
b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/readers/DefaultValueColumnReaderTest.java
@@ -23,6 +23,7 @@ import java.util.Arrays;
import org.apache.pinot.spi.data.DimensionFieldSpec;
import org.apache.pinot.spi.data.FieldSpec;
import org.apache.pinot.spi.data.MetricFieldSpec;
+import org.apache.pinot.spi.data.readers.MultiValueResult;
import org.testng.Assert;
import org.testng.annotations.Test;
@@ -222,21 +223,23 @@ public class DefaultValueColumnReaderTest {
int[] expectedArray = new int[]{expectedValue};
for (int i = 0; i < NUM_DOCS; i++) {
Assert.assertTrue(reader.hasNext());
- int[] result = reader.nextIntMV();
- Assert.assertTrue(Arrays.equals(result, expectedArray));
+ MultiValueResult<int[]> mvResult = reader.nextIntMV();
+ Assert.assertFalse(mvResult.hasNulls());
+ Assert.assertTrue(Arrays.equals(mvResult.getValues(), expectedArray));
}
// Test random access
reader.rewind();
for (int i = 0; i < NUM_DOCS; i++) {
- int[] result = reader.getIntMV(i);
- Assert.assertTrue(Arrays.equals(result, expectedArray));
+ MultiValueResult<int[]> mvResult = reader.getIntMV(i);
+ Assert.assertFalse(mvResult.hasNulls());
+ Assert.assertTrue(Arrays.equals(mvResult.getValues(), expectedArray));
}
// Test that the same array instance is returned (optimization)
reader.rewind();
- int[] firstCall = reader.getIntMV(0);
- int[] secondCall = reader.getIntMV(1);
+ int[] firstCall = reader.getIntMV(0).getValues();
+ int[] secondCall = reader.getIntMV(1).getValues();
Assert.assertSame(firstCall, secondCall, "Multi-value arrays should be
reused");
reader.close();
@@ -254,14 +257,15 @@ public class DefaultValueColumnReaderTest {
long expectedValue = ((Number)
fieldSpec.getDefaultNullValue()).longValue();
long[] expectedArray = new long[]{expectedValue};
for (int i = 0; i < NUM_DOCS; i++) {
- long[] result = reader.nextLongMV();
- Assert.assertTrue(Arrays.equals(result, expectedArray));
+ MultiValueResult<long[]> mvResult = reader.nextLongMV();
+ Assert.assertFalse(mvResult.hasNulls());
+ Assert.assertTrue(Arrays.equals(mvResult.getValues(), expectedArray));
}
// Test random access and array reuse
reader.rewind();
- long[] firstCall = reader.getLongMV(0);
- long[] secondCall = reader.getLongMV(1);
+ long[] firstCall = reader.getLongMV(0).getValues();
+ long[] secondCall = reader.getLongMV(1).getValues();
Assert.assertTrue(Arrays.equals(firstCall, expectedArray));
Assert.assertSame(firstCall, secondCall, "Multi-value arrays should be
reused");
@@ -277,14 +281,15 @@ public class DefaultValueColumnReaderTest {
float expectedValue = ((Number)
fieldSpec.getDefaultNullValue()).floatValue();
float[] expectedArray = new float[]{expectedValue};
for (int i = 0; i < NUM_DOCS; i++) {
- float[] result = reader.nextFloatMV();
- Assert.assertTrue(Arrays.equals(result, expectedArray));
+ MultiValueResult<float[]> mvResult = reader.nextFloatMV();
+ Assert.assertFalse(mvResult.hasNulls());
+ Assert.assertTrue(Arrays.equals(mvResult.getValues(), expectedArray));
}
// Test random access and array reuse
reader.rewind();
- float[] firstCall = reader.getFloatMV(0);
- float[] secondCall = reader.getFloatMV(1);
+ float[] firstCall = reader.getFloatMV(0).getValues();
+ float[] secondCall = reader.getFloatMV(1).getValues();
Assert.assertSame(firstCall, secondCall, "Multi-value arrays should be
reused");
reader.close();
@@ -299,14 +304,15 @@ public class DefaultValueColumnReaderTest {
double expectedValue = ((Number)
fieldSpec.getDefaultNullValue()).doubleValue();
double[] expectedArray = new double[]{expectedValue};
for (int i = 0; i < NUM_DOCS; i++) {
- double[] result = reader.nextDoubleMV();
- Assert.assertTrue(Arrays.equals(result, expectedArray));
+ MultiValueResult<double[]> mvResult = reader.nextDoubleMV();
+ Assert.assertFalse(mvResult.hasNulls());
+ Assert.assertTrue(Arrays.equals(mvResult.getValues(), expectedArray));
}
// Test random access and array reuse
reader.rewind();
- double[] firstCall = reader.getDoubleMV(0);
- double[] secondCall = reader.getDoubleMV(1);
+ double[] firstCall = reader.getDoubleMV(0).getValues();
+ double[] secondCall = reader.getDoubleMV(1).getValues();
Assert.assertSame(firstCall, secondCall, "Multi-value arrays should be
reused");
reader.close();
diff --git
a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/readers/PinotSegmentColumnReaderImplTest.java
b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/readers/PinotSegmentColumnReaderImplTest.java
index b2c7c5932ac..f3e8d0141ff 100644
---
a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/readers/PinotSegmentColumnReaderImplTest.java
+++
b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/readers/PinotSegmentColumnReaderImplTest.java
@@ -28,6 +28,7 @@ import org.apache.pinot.spi.config.table.TableConfig;
import org.apache.pinot.spi.data.FieldSpec;
import org.apache.pinot.spi.data.Schema;
import org.apache.pinot.spi.data.readers.GenericRow;
+import org.apache.pinot.spi.data.readers.MultiValueResult;
import org.apache.pinot.spi.utils.ReadMode;
import org.testng.Assert;
import org.testng.annotations.AfterClass;
@@ -359,13 +360,39 @@ public class PinotSegmentColumnReaderImplTest extends
ColumnarSegmentBuildingTes
@DataProvider(name = "multiValueAccessorProvider")
public Object[][] multiValueAccessorProvider() {
// Base column configurations
+ // For primitive MV types, we need to extract .getValues() from
MultiValueResult
+ // and verify that hasNulls() is false (nulls are removed by
NullValueTransformer for MV primitive types)
Object[][] baseConfigs = new Object[][] {
- {MV_INT_COL, (MultiValueGetter) PinotSegmentColumnReaderImpl::getIntMV,
- (MultiValueSequentialGetter)
PinotSegmentColumnReaderImpl::nextIntMV, null},
- {MV_LONG_COL, (MultiValueGetter)
PinotSegmentColumnReaderImpl::getLongMV,
- (MultiValueSequentialGetter)
PinotSegmentColumnReaderImpl::nextLongMV, null},
- {MV_FLOAT_COL, (MultiValueGetter)
PinotSegmentColumnReaderImpl::getFloatMV,
- (MultiValueSequentialGetter)
PinotSegmentColumnReaderImpl::nextFloatMV,
+ {MV_INT_COL, (MultiValueGetter) (reader, docId) -> {
+ MultiValueResult<int[]> result = reader.getIntMV(docId);
+ Assert.assertFalse(result.hasNulls(), "Multi-value primitive
types should not have nulls");
+ return result.getValues();
+ },
+ (MultiValueSequentialGetter) reader -> {
+ MultiValueResult<int[]> result = reader.nextIntMV();
+ Assert.assertFalse(result.hasNulls(), "Multi-value primitive
types should not have nulls");
+ return result.getValues();
+ }, null},
+ {MV_LONG_COL, (MultiValueGetter) (reader, docId) -> {
+ MultiValueResult<long[]> result = reader.getLongMV(docId);
+ Assert.assertFalse(result.hasNulls(), "Multi-value primitive
types should not have nulls");
+ return result.getValues();
+ },
+ (MultiValueSequentialGetter) reader -> {
+ MultiValueResult<long[]> result = reader.nextLongMV();
+ Assert.assertFalse(result.hasNulls(), "Multi-value primitive
types should not have nulls");
+ return result.getValues();
+ }, null},
+ {MV_FLOAT_COL, (MultiValueGetter) (reader, docId) -> {
+ MultiValueResult<float[]> result = reader.getFloatMV(docId);
+ Assert.assertFalse(result.hasNulls(), "Multi-value primitive
types should not have nulls");
+ return result.getValues();
+ },
+ (MultiValueSequentialGetter) reader -> {
+ MultiValueResult<float[]> result = reader.nextFloatMV();
+ Assert.assertFalse(result.hasNulls(), "Multi-value primitive
types should not have nulls");
+ return result.getValues();
+ },
(Function<Object[], Object>) expectedArray -> {
float[] expectedFloatArray = new float[expectedArray.length];
for (int i = 0; i < expectedArray.length; i++) {
@@ -373,8 +400,16 @@ public class PinotSegmentColumnReaderImplTest extends
ColumnarSegmentBuildingTes
}
return expectedFloatArray;
} },
- {MV_DOUBLE_COL, (MultiValueGetter)
PinotSegmentColumnReaderImpl::getDoubleMV,
- (MultiValueSequentialGetter)
PinotSegmentColumnReaderImpl::nextDoubleMV, null},
+ {MV_DOUBLE_COL, (MultiValueGetter) (reader, docId) -> {
+ MultiValueResult<double[]> result = reader.getDoubleMV(docId);
+ Assert.assertFalse(result.hasNulls(), "Multi-value primitive
types should not have nulls");
+ return result.getValues();
+ },
+ (MultiValueSequentialGetter) reader -> {
+ MultiValueResult<double[]> result = reader.nextDoubleMV();
+ Assert.assertFalse(result.hasNulls(), "Multi-value primitive
types should not have nulls");
+ return result.getValues();
+ }, null},
{MV_STRING_COL, (MultiValueGetter)
PinotSegmentColumnReaderImpl::getStringMV,
(MultiValueSequentialGetter)
PinotSegmentColumnReaderImpl::nextStringMV, null},
{MV_BYTES_COL, (MultiValueGetter)
PinotSegmentColumnReaderImpl::getBytesMV,
@@ -647,7 +682,8 @@ public class PinotSegmentColumnReaderImplTest extends
ColumnarSegmentBuildingTes
// First, read all values using random access
int[][] randomAccessValues = new int[totalDocs][];
for (int docId = 0; docId < totalDocs; docId++) {
- randomAccessValues[docId] = reader.getIntMV(docId);
+ MultiValueResult<int[]> result = reader.getIntMV(docId);
+ randomAccessValues[docId] = result.getValues();
}
// Now read using iterator pattern
diff --git
a/pinot-spi/src/main/java/org/apache/pinot/spi/data/readers/ColumnReader.java
b/pinot-spi/src/main/java/org/apache/pinot/spi/data/readers/ColumnReader.java
index 28bc576acb4..3c05c26ebaf 100644
---
a/pinot-spi/src/main/java/org/apache/pinot/spi/data/readers/ColumnReader.java
+++
b/pinot-spi/src/main/java/org/apache/pinot/spi/data/readers/ColumnReader.java
@@ -57,7 +57,7 @@ import javax.annotation.Nullable;
* } catch (Exception e) {
* // Handle exception / log
* continue;
-* }
+ * }
* if (value != null) {
* // Process non-null value
* processValue(value);
@@ -200,10 +200,15 @@ public interface ColumnReader extends Closeable,
Serializable {
* Otherwise, clients should use next() and cast the result.
*/
boolean isInt();
+
boolean isLong();
+
boolean isFloat();
+
boolean isDouble();
+
boolean isString();
+
boolean isBytes();
/**
@@ -212,24 +217,44 @@ public interface ColumnReader extends Closeable,
Serializable {
* @throws IOException If an I/O error occurs while reading
*/
int nextInt() throws IOException;
+
long nextLong() throws IOException;
+
float nextFloat() throws IOException;
+
double nextDouble() throws IOException;
+
String nextString() throws IOException;
+
byte[] nextBytes() throws IOException;
/**
* Get the next int[] / long[] / float[] / double[] / string[] / bytes[][]
values for multi-value columns.
* Should be called only if isNextNull() returns false.
*
+ * <p>For primitive types (int, long, float, double), returns a {@link
MultiValueResult} that includes
+ * element-level null validity tracking. Use {@link
MultiValueResult#hasNulls()} and
+ * {@link MultiValueResult#isNull(int)} to check for null elements within
the array.
+ *
* @throws IOException If an I/O error occurs while reading
*/
- int[] nextIntMV() throws IOException;
- long[] nextLongMV() throws IOException;
- float[] nextFloatMV() throws IOException;
- double[] nextDoubleMV() throws IOException;
- String[] nextStringMV() throws IOException;
- byte[][] nextBytesMV() throws IOException;
+ MultiValueResult<int[]> nextIntMV()
+ throws IOException;
+
+ MultiValueResult<long[]> nextLongMV()
+ throws IOException;
+
+ MultiValueResult<float[]> nextFloatMV()
+ throws IOException;
+
+ MultiValueResult<double[]> nextDoubleMV()
+ throws IOException;
+
+ String[] nextStringMV()
+ throws IOException;
+
+ byte[][] nextBytesMV()
+ throws IOException;
/**
* Rewind the reader to start reading from the first value again.
@@ -274,10 +299,15 @@ public interface ColumnReader extends Closeable,
Serializable {
* @throws IOException If an I/O error occurs while reading
*/
int getInt(int docId) throws IOException;
+
long getLong(int docId) throws IOException;
+
float getFloat(int docId) throws IOException;
+
double getDouble(int docId) throws IOException;
+
String getString(int docId) throws IOException;
+
byte[] getBytes(int docId) throws IOException;
/**
@@ -302,14 +332,23 @@ public interface ColumnReader extends Closeable,
Serializable {
* Should be called only if isNull(docId) returns false.
* <p>Document ID is 0-based. Valid values are 0 to {@link #getTotalDocs()}
- 1.
*
+ * <p>For primitive types (int, long, float, double), returns a {@link
MultiValueResult} that includes
+ * element-level null validity tracking. Use {@link
MultiValueResult#hasNulls()} and
+ * {@link MultiValueResult#isNull(int)} to check for null elements within
the array.
+ *
* @param docId Document ID (0-based)
* @throws IndexOutOfBoundsException If docId is out of range
* @throws IOException If an I/O error occurs while reading
*/
- int[] getIntMV(int docId) throws IOException;
- long[] getLongMV(int docId) throws IOException;
- float[] getFloatMV(int docId) throws IOException;
- double[] getDoubleMV(int docId) throws IOException;
+ MultiValueResult<int[]> getIntMV(int docId) throws IOException;
+
+ MultiValueResult<long[]> getLongMV(int docId) throws IOException;
+
+ MultiValueResult<float[]> getFloatMV(int docId) throws IOException;
+
+ MultiValueResult<double[]> getDoubleMV(int docId) throws IOException;
+
String[] getStringMV(int docId) throws IOException;
+
byte[][] getBytesMV(int docId) throws IOException;
}
diff --git
a/pinot-spi/src/main/java/org/apache/pinot/spi/data/readers/MultiValueResult.java
b/pinot-spi/src/main/java/org/apache/pinot/spi/data/readers/MultiValueResult.java
new file mode 100644
index 00000000000..3517f7ba02a
--- /dev/null
+++
b/pinot-spi/src/main/java/org/apache/pinot/spi/data/readers/MultiValueResult.java
@@ -0,0 +1,123 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.spi.data.readers;
+
+import java.util.BitSet;
+import javax.annotation.Nullable;
+
+
+/**
+ * Result wrapper for multi-value column reads that tracks element-level nulls.
+ *
+ * <p>This class addresses a limitation where bulk reads from columnar formats
(like Arrow)
+ * don't check the validity bitmap for null elements. By returning both the
values array
+ * and a nulls BitSet, callers can properly handle null elements within
multi-value columns.
+ *
+ * <p><b>BitSet Semantics:</b>
+ * <ul>
+ * <li>Set bit (1) = null element</li>
+ * <li>Unset bit (0) = valid/non-null element</li>
+ * <li>Null nulls BitSet = no nulls in the range (fast path)</li>
+ * </ul>
+ *
+ * <p><b>Usage Example:</b>
+ * <pre>{@code
+ * MultiValueResult<int[]> result = columnReader.nextIntMV();
+ * int[] values = result.getValues();
+ *
+ * if (result.hasNulls()) {
+ * for (int i = 0; i < values.length; i++) {
+ * if (result.isNull(i)) {
+ * // Handle null element - values[i] contains default value (0 for int)
+ * } else {
+ * // Use values[i]
+ * }
+ * }
+ * } else {
+ * // Fast path: no nulls, use values array directly
+ * }
+ * }</pre>
+ *
+ * @param <T> The array type (int[], long[], float[], double[])
+ */
+public class MultiValueResult<T> {
+ private final T _values;
+ @Nullable
+ private final BitSet _nulls;
+
+ private MultiValueResult(T values, @Nullable BitSet nulls) {
+ _values = values;
+ _nulls = nulls;
+ }
+
+ /**
+ * Create a MultiValueResult with optional nulls information.
+ *
+ * @param values The values array (int[], long[], float[], double[])
+ * @param nulls BitSet where set bits indicate null elements,
+ * or null if no nulls exist in the range
+ * @param <T> The array type
+ * @return A new MultiValueResult instance
+ */
+ public static <T> MultiValueResult<T> of(T values, @Nullable BitSet nulls) {
+ return new MultiValueResult<>(values, nulls);
+ }
+
+ /**
+ * Check if any elements in this result are null.
+ *
+ * @return true if there are null elements, false otherwise
+ */
+ public boolean hasNulls() {
+ return _nulls != null;
+ }
+
+ /**
+ * Check if a specific element is null.
+ *
+ * @param index The index of the element to check
+ * @return true if the element is null, false if it's valid
+ */
+ public boolean isNull(int index) {
+ return _nulls != null && _nulls.get(index);
+ }
+
+ /**
+ * Get the values array.
+ *
+ * <p>Note: If {@link #hasNulls()} returns true, some elements in this array
+ * may contain default values (0 for numeric types) for null positions.
+ * Use {@link #isNull(int)} to check individual elements.
+ *
+ * @return The values array
+ */
+ public T getValues() {
+ return _values;
+ }
+
+ /**
+ * Get the nulls BitSet.
+ *
+ * @return The nulls BitSet where set bits indicate null elements, or null
if no nulls exist
+ */
+ @Nullable
+ public BitSet getNulls() {
+ return _nulls;
+ }
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]