This is an automated email from the ASF dual-hosted git repository. jackie pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/pinot.git
The following commit(s) were added to refs/heads/master by this push: new 641ec601d5 [feature] [null support # 3] Add default implementation for null support in transform function (#10386) 641ec601d5 is described below commit 641ec601d577d09eeb339487cd4dd7dc0441cc99 Author: Yao Liu <y...@startree.ai> AuthorDate: Thu Mar 30 21:27:04 2023 -0700 [feature] [null support # 3] Add default implementation for null support in transform function (#10386) --- .../function/AdditionTransformFunction.java | 1 + .../transform/function/BaseTransformFunction.java | 725 ++++++++++++++++++++- .../function/IdentifierTransformFunction.java | 6 + .../function/LiteralTransformFunction.java | 13 + .../transform/function/TransformFunction.java | 122 ++++ .../function/AdditionTransformFunctionTest.java | 37 ++ .../function/BaseTransformFunctionTest.java | 48 +- .../function/IdentifierTransformFunctionTest.java | 110 ++++ .../function/LiteralTransformFunctionTest.java | 43 ++ 9 files changed, 1103 insertions(+), 2 deletions(-) diff --git a/pinot-core/src/main/java/org/apache/pinot/core/operator/transform/function/AdditionTransformFunction.java b/pinot-core/src/main/java/org/apache/pinot/core/operator/transform/function/AdditionTransformFunction.java index 8780939846..fe5cbdfda3 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/operator/transform/function/AdditionTransformFunction.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/operator/transform/function/AdditionTransformFunction.java @@ -45,6 +45,7 @@ public class AdditionTransformFunction extends BaseTransformFunction { @Override public void init(List<TransformFunction> arguments, Map<String, ColumnContext> columnContextMap) { + super.init(arguments, columnContextMap); // Check that there are more than 1 arguments if (arguments.size() < 2) { throw new IllegalArgumentException("At least 2 arguments are required for ADD transform function"); diff --git a/pinot-core/src/main/java/org/apache/pinot/core/operator/transform/function/BaseTransformFunction.java b/pinot-core/src/main/java/org/apache/pinot/core/operator/transform/function/BaseTransformFunction.java index 2d18d6bf62..5d641a2658 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/operator/transform/function/BaseTransformFunction.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/operator/transform/function/BaseTransformFunction.java @@ -19,12 +19,19 @@ package org.apache.pinot.core.operator.transform.function; import java.math.BigDecimal; +import java.util.List; +import java.util.Map; import javax.annotation.Nullable; +import org.apache.commons.lang3.tuple.ImmutablePair; +import org.apache.commons.lang3.tuple.Pair; +import org.apache.pinot.common.utils.DataSchema; +import org.apache.pinot.core.operator.ColumnContext; import org.apache.pinot.core.operator.blocks.ValueBlock; import org.apache.pinot.core.operator.transform.TransformResultMetadata; import org.apache.pinot.segment.spi.index.reader.Dictionary; import org.apache.pinot.spi.data.FieldSpec.DataType; import org.apache.pinot.spi.utils.ArrayCopyUtils; +import org.roaringbitmap.RoaringBitmap; /** @@ -92,7 +99,14 @@ public abstract class BaseTransformFunction implements TransformFunction { protected String[][] _stringValuesMV; protected byte[][][] _bytesValuesMV; - @Nullable + protected List<TransformFunction> _arguments; + + // NOTE: this init has to be called for default getNullBitmap() implementation to be effective. + @Override + public void init(List<TransformFunction> arguments, Map<String, ColumnContext> columnContextMap) { + _arguments = arguments; + } + @Override public Dictionary getDictionary() { return null; @@ -141,6 +155,12 @@ public abstract class BaseTransformFunction implements TransformFunction { String[] stringValues = transformToStringValuesSV(valueBlock); ArrayCopyUtils.copy(stringValues, _intValuesSV, length); break; + case UNKNOWN: + // Copy the values to ensure behaviour consistency with non null-handling. + for (int i = 0; i < length; i++) { + _intValuesSV[i] = (int) DataSchema.ColumnDataType.INT.getNullPlaceholder(); + } + break; default: throw new IllegalStateException(String.format("Cannot read SV %s as INT", resultDataType)); } @@ -148,6 +168,57 @@ public abstract class BaseTransformFunction implements TransformFunction { return _intValuesSV; } + public Pair<int[], RoaringBitmap> transformToIntValuesSVWithNull(ValueBlock valueBlock) { + int length = valueBlock.getNumDocs(); + if (_intValuesSV == null) { + _intValuesSV = new int[length]; + } + RoaringBitmap bitmap; + DataType resultDataType = getResultMetadata().getDataType(); + switch (resultDataType.getStoredType()) { + case INT: + _intValuesSV = transformToIntValuesSV(valueBlock); + bitmap = getNullBitmap(valueBlock); + break; + case LONG: + Pair<long[], RoaringBitmap> longResult = transformToLongValuesSVWithNull(valueBlock); + bitmap = longResult.getRight(); + ArrayCopyUtils.copy(longResult.getLeft(), _intValuesSV, length); + break; + case FLOAT: + Pair<float[], RoaringBitmap> floatResult = transformToFloatValuesSVWithNull(valueBlock); + bitmap = floatResult.getRight(); + ArrayCopyUtils.copy(floatResult.getLeft(), _intValuesSV, length); + break; + case DOUBLE: + Pair<double[], RoaringBitmap> doubleResult = transformToDoubleValuesSVWithNull(valueBlock); + bitmap = doubleResult.getRight(); + ArrayCopyUtils.copy(doubleResult.getLeft(), _intValuesSV, length); + break; + case BIG_DECIMAL: + Pair<BigDecimal[], RoaringBitmap> bigDecimalResult = transformToBigDecimalValuesSVWithNull(valueBlock); + bitmap = bigDecimalResult.getRight(); + ArrayCopyUtils.copy(bigDecimalResult.getLeft(), _intValuesSV, length); + break; + case STRING: + Pair<String[], RoaringBitmap> stringResult = transformToStringValuesSVWithNull(valueBlock); + bitmap = stringResult.getRight(); + ArrayCopyUtils.copy(stringResult.getLeft(), _intValuesSV, length); + break; + case UNKNOWN: + bitmap = new RoaringBitmap(); + bitmap.add(0L, length); + // Copy the values to ensure behaviour consistency with non null-handling. + for (int i = 0; i < length; i++) { + _intValuesSV[i] = (int) DataSchema.ColumnDataType.INT.getNullPlaceholder(); + } + break; + default: + throw new IllegalStateException(String.format("Cannot read SV %s as INT", resultDataType)); + } + return ImmutablePair.of(_intValuesSV, bitmap); + } + @Override public long[] transformToLongValuesSV(ValueBlock valueBlock) { int length = valueBlock.getNumDocs(); @@ -181,6 +252,12 @@ public abstract class BaseTransformFunction implements TransformFunction { String[] stringValues = transformToStringValuesSV(valueBlock); ArrayCopyUtils.copy(stringValues, _longValuesSV, length); break; + case UNKNOWN: + // Copy the values to ensure behaviour consistency with non null-handling. + for (int i = 0; i < length; i++) { + _longValuesSV[i] = (long) DataSchema.ColumnDataType.LONG.getNullPlaceholder(); + } + break; default: throw new IllegalStateException(String.format("Cannot read SV %s as LONG", resultDataType)); } @@ -188,6 +265,58 @@ public abstract class BaseTransformFunction implements TransformFunction { return _longValuesSV; } + @Override + public Pair<long[], RoaringBitmap> transformToLongValuesSVWithNull(ValueBlock valueBlock) { + int length = valueBlock.getNumDocs(); + if (_longValuesSV == null) { + _longValuesSV = new long[length]; + } + RoaringBitmap bitmap; + DataType resultDataType = getResultMetadata().getDataType(); + switch (resultDataType.getStoredType()) { + case INT: + Pair<int[], RoaringBitmap> intResults = transformToIntValuesSVWithNull(valueBlock); + bitmap = intResults.getRight(); + ArrayCopyUtils.copy(intResults.getLeft(), _longValuesSV, length); + break; + case LONG: + _longValuesSV = transformToLongValuesSV(valueBlock); + bitmap = getNullBitmap(valueBlock); + break; + case FLOAT: + Pair<float[], RoaringBitmap> floatResult = transformToFloatValuesSVWithNull(valueBlock); + bitmap = floatResult.getRight(); + ArrayCopyUtils.copy(floatResult.getLeft(), _longValuesSV, length); + break; + case DOUBLE: + Pair<double[], RoaringBitmap> doubleResult = transformToDoubleValuesSVWithNull(valueBlock); + bitmap = doubleResult.getRight(); + ArrayCopyUtils.copy(doubleResult.getLeft(), _longValuesSV, length); + break; + case BIG_DECIMAL: + Pair<BigDecimal[], RoaringBitmap> bigDecimalResult = transformToBigDecimalValuesSVWithNull(valueBlock); + bitmap = bigDecimalResult.getRight(); + ArrayCopyUtils.copy(bigDecimalResult.getLeft(), _longValuesSV, length); + break; + case STRING: + Pair<String[], RoaringBitmap> stringResult = transformToStringValuesSVWithNull(valueBlock); + bitmap = stringResult.getRight(); + ArrayCopyUtils.copy(stringResult.getLeft(), _longValuesSV, length); + break; + case UNKNOWN: + bitmap = new RoaringBitmap(); + bitmap.add(0L, length); + // Copy the values to ensure behaviour consistency with non null-handling. + for (int i = 0; i < length; i++) { + _longValuesSV[i] = (long) DataSchema.ColumnDataType.LONG.getNullPlaceholder(); + } + break; + default: + throw new IllegalStateException(String.format("Cannot read SV %s as LONG", resultDataType)); + } + return ImmutablePair.of(_longValuesSV, bitmap); + } + @Override public float[] transformToFloatValuesSV(ValueBlock valueBlock) { int length = valueBlock.getNumDocs(); @@ -221,6 +350,12 @@ public abstract class BaseTransformFunction implements TransformFunction { String[] stringValues = transformToStringValuesSV(valueBlock); ArrayCopyUtils.copy(stringValues, _floatValuesSV, length); break; + case UNKNOWN: + // Copy the values to ensure behaviour consistency with non null-handling. + for (int i = 0; i < length; i++) { + _floatValuesSV[i] = (float) DataSchema.ColumnDataType.FLOAT.getNullPlaceholder(); + } + break; default: throw new IllegalStateException(String.format("Cannot read SV %s as FLOAT", resultDataType)); } @@ -228,6 +363,58 @@ public abstract class BaseTransformFunction implements TransformFunction { return _floatValuesSV; } + @Override + public Pair<float[], RoaringBitmap> transformToFloatValuesSVWithNull(ValueBlock valueBlock) { + int length = valueBlock.getNumDocs(); + if (_floatValuesSV == null) { + _floatValuesSV = new float[length]; + } + RoaringBitmap bitmap; + DataType resultDataType = getResultMetadata().getDataType(); + switch (resultDataType.getStoredType()) { + case INT: + Pair<int[], RoaringBitmap> intResult = transformToIntValuesSVWithNull(valueBlock); + bitmap = intResult.getRight(); + ArrayCopyUtils.copy(intResult.getLeft(), _floatValuesSV, length); + break; + case LONG: + Pair<long[], RoaringBitmap> longResult = transformToLongValuesSVWithNull(valueBlock); + bitmap = longResult.getRight(); + ArrayCopyUtils.copy(longResult.getLeft(), _floatValuesSV, length); + break; + case FLOAT: + _floatValuesSV = transformToFloatValuesSV(valueBlock); + bitmap = getNullBitmap(valueBlock); + break; + case DOUBLE: + Pair<double[], RoaringBitmap> doubleResult = transformToDoubleValuesSVWithNull(valueBlock); + bitmap = doubleResult.getRight(); + ArrayCopyUtils.copy(doubleResult.getLeft(), _floatValuesSV, length); + break; + case BIG_DECIMAL: + Pair<BigDecimal[], RoaringBitmap> bigDecimalResult = transformToBigDecimalValuesSVWithNull(valueBlock); + bitmap = bigDecimalResult.getRight(); + ArrayCopyUtils.copy(bigDecimalResult.getLeft(), _floatValuesSV, length); + break; + case STRING: + Pair<String[], RoaringBitmap> stringResult = transformToStringValuesSVWithNull(valueBlock); + bitmap = stringResult.getRight(); + ArrayCopyUtils.copy(stringResult.getLeft(), _floatValuesSV, length); + break; + case UNKNOWN: + bitmap = new RoaringBitmap(); + bitmap.add(0L, length); + // Copy the values to ensure behaviour consistency with non null-handling. + for (int i = 0; i < length; i++) { + _floatValuesSV[i] = (float) DataSchema.ColumnDataType.FLOAT.getNullPlaceholder(); + } + break; + default: + throw new IllegalStateException(String.format("Cannot read SV %s as FLOAT", resultDataType)); + } + return ImmutablePair.of(_floatValuesSV, bitmap); + } + @Override public double[] transformToDoubleValuesSV(ValueBlock valueBlock) { int length = valueBlock.getNumDocs(); @@ -261,6 +448,12 @@ public abstract class BaseTransformFunction implements TransformFunction { String[] stringValues = transformToStringValuesSV(valueBlock); ArrayCopyUtils.copy(stringValues, _doubleValuesSV, length); break; + case UNKNOWN: + // Copy the values to ensure behaviour consistency with non null-handling. + for (int i = 0; i < length; i++) { + _doubleValuesSV[i] = (double) DataSchema.ColumnDataType.DOUBLE.getNullPlaceholder(); + } + break; default: throw new IllegalStateException(String.format("Cannot read SV %s as DOUBLE", resultDataType)); } @@ -268,6 +461,58 @@ public abstract class BaseTransformFunction implements TransformFunction { return _doubleValuesSV; } + @Override + public Pair<double[], RoaringBitmap> transformToDoubleValuesSVWithNull(ValueBlock valueBlock) { + int length = valueBlock.getNumDocs(); + if (_doubleValuesSV == null) { + _doubleValuesSV = new double[length]; + } + RoaringBitmap bitmap; + DataType resultDataType = getResultMetadata().getDataType(); + switch (resultDataType.getStoredType()) { + case INT: + Pair<int[], RoaringBitmap> intResult = transformToIntValuesSVWithNull(valueBlock); + bitmap = intResult.getRight(); + ArrayCopyUtils.copy(intResult.getLeft(), _doubleValuesSV, length); + break; + case LONG: + Pair<long[], RoaringBitmap> longResult = transformToLongValuesSVWithNull(valueBlock); + bitmap = longResult.getRight(); + ArrayCopyUtils.copy(longResult.getLeft(), _doubleValuesSV, length); + break; + case FLOAT: + Pair<float[], RoaringBitmap> floatResult = transformToFloatValuesSVWithNull(valueBlock); + bitmap = floatResult.getRight(); + ArrayCopyUtils.copy(floatResult.getLeft(), _doubleValuesSV, length); + break; + case DOUBLE: + _doubleValuesSV = transformToDoubleValuesSV(valueBlock); + bitmap = getNullBitmap(valueBlock); + break; + case BIG_DECIMAL: + Pair<BigDecimal[], RoaringBitmap> bigDecimalResult = transformToBigDecimalValuesSVWithNull(valueBlock); + bitmap = bigDecimalResult.getRight(); + ArrayCopyUtils.copy(bigDecimalResult.getLeft(), _doubleValuesSV, length); + break; + case STRING: + Pair<String[], RoaringBitmap> stringResult = transformToStringValuesSVWithNull(valueBlock); + bitmap = stringResult.getRight(); + ArrayCopyUtils.copy(stringResult.getLeft(), _doubleValuesSV, length); + break; + case UNKNOWN: + bitmap = new RoaringBitmap(); + bitmap.add(0L, length); + // Copy the values to ensure behaviour consistency with non null-handling. + for (int i = 0; i < length; i++) { + _doubleValuesSV[i] = (double) DataSchema.ColumnDataType.DOUBLE.getNullPlaceholder(); + } + break; + default: + throw new IllegalStateException(String.format("Cannot read SV %s as DOUBLE", resultDataType)); + } + return ImmutablePair.of(_doubleValuesSV, bitmap); + } + @Override public BigDecimal[] transformToBigDecimalValuesSV(ValueBlock valueBlock) { int length = valueBlock.getNumDocs(); @@ -305,6 +550,12 @@ public abstract class BaseTransformFunction implements TransformFunction { byte[][] bytesValues = transformToBytesValuesSV(valueBlock); ArrayCopyUtils.copy(bytesValues, _bigDecimalValuesSV, length); break; + case UNKNOWN: + // Copy the values to ensure behaviour consistency with non null-handling. + for (int i = 0; i < length; i++) { + _bigDecimalValuesSV[i] = (BigDecimal) DataSchema.ColumnDataType.BIG_DECIMAL.getNullPlaceholder(); + } + break; default: throw new IllegalStateException(String.format("Cannot read SV %s as BIG_DECIMAL", resultDataType)); } @@ -312,6 +563,62 @@ public abstract class BaseTransformFunction implements TransformFunction { return _bigDecimalValuesSV; } + public Pair<BigDecimal[], RoaringBitmap> transformToBigDecimalValuesSVWithNull(ValueBlock valueBlock) { + int length = valueBlock.getNumDocs(); + if (_bigDecimalValuesSV == null) { + _bigDecimalValuesSV = new BigDecimal[length]; + } + RoaringBitmap bitmap; + DataType resultDataType = getResultMetadata().getDataType(); + switch (resultDataType.getStoredType()) { + case INT: + Pair<int[], RoaringBitmap> intResult = transformToIntValuesSVWithNull(valueBlock); + bitmap = intResult.getRight(); + ArrayCopyUtils.copy(intResult.getLeft(), _bigDecimalValuesSV, length); + break; + case LONG: + Pair<long[], RoaringBitmap> longResult = transformToLongValuesSVWithNull(valueBlock); + bitmap = longResult.getRight(); + ArrayCopyUtils.copy(longResult.getLeft(), _bigDecimalValuesSV, length); + break; + case FLOAT: + Pair<float[], RoaringBitmap> floatResult = transformToFloatValuesSVWithNull(valueBlock); + bitmap = floatResult.getRight(); + ArrayCopyUtils.copy(floatResult.getLeft(), _bigDecimalValuesSV, length); + break; + case DOUBLE: + Pair<double[], RoaringBitmap> doubleResult = transformToDoubleValuesSVWithNull(valueBlock); + bitmap = doubleResult.getRight(); + ArrayCopyUtils.copy(doubleResult.getLeft(), _bigDecimalValuesSV, length); + break; + case BIG_DECIMAL: + _bigDecimalValuesSV = transformToBigDecimalValuesSV(valueBlock); + bitmap = getNullBitmap(valueBlock); + break; + case STRING: + Pair<String[], RoaringBitmap> stringResult = transformToStringValuesSVWithNull(valueBlock); + bitmap = stringResult.getRight(); + ArrayCopyUtils.copy(stringResult.getLeft(), _bigDecimalValuesSV, length); + break; + case BYTES: + Pair<byte[][], RoaringBitmap> byteResult = transformToBytesValuesSVWithNull(valueBlock); + bitmap = byteResult.getRight(); + ArrayCopyUtils.copy(byteResult.getLeft(), _bigDecimalValuesSV, length); + break; + case UNKNOWN: + bitmap = new RoaringBitmap(); + bitmap.add(0L, length); + // Copy the values to ensure behaviour consistency with non null-handling. + for (int i = 0; i < length; i++) { + _bigDecimalValuesSV[i] = (BigDecimal) DataSchema.ColumnDataType.BIG_DECIMAL.getNullPlaceholder(); + } + break; + default: + throw new IllegalStateException(String.format("Cannot read SV %s as BIG_DECIMAL", resultDataType)); + } + return ImmutablePair.of(_bigDecimalValuesSV, bitmap); + } + @Override public String[] transformToStringValuesSV(ValueBlock valueBlock) { int length = valueBlock.getNumDocs(); @@ -349,6 +656,12 @@ public abstract class BaseTransformFunction implements TransformFunction { byte[][] bytesValues = transformToBytesValuesSV(valueBlock); ArrayCopyUtils.copy(bytesValues, _stringValuesSV, length); break; + case UNKNOWN: + // Copy the values to ensure behaviour consistency with non null-handling. + for (int i = 0; i < length; i++) { + _stringValuesSV[i] = (String) DataSchema.ColumnDataType.STRING.getNullPlaceholder(); + } + break; default: throw new IllegalStateException(String.format("Cannot read SV %s as STRING", resultDataType)); } @@ -356,6 +669,62 @@ public abstract class BaseTransformFunction implements TransformFunction { return _stringValuesSV; } + public Pair<String[], RoaringBitmap> transformToStringValuesSVWithNull(ValueBlock valueBlock) { + int length = valueBlock.getNumDocs(); + if (_stringValuesSV == null) { + _stringValuesSV = new String[length]; + } + RoaringBitmap bitmap; + DataType resultDataType = getResultMetadata().getDataType(); + switch (resultDataType.getStoredType()) { + case INT: + Pair<int[], RoaringBitmap> intResult = transformToIntValuesSVWithNull(valueBlock); + bitmap = intResult.getRight(); + ArrayCopyUtils.copy(intResult.getLeft(), _stringValuesSV, length); + break; + case LONG: + Pair<long[], RoaringBitmap> longResult = transformToLongValuesSVWithNull(valueBlock); + bitmap = longResult.getRight(); + ArrayCopyUtils.copy(longResult.getLeft(), _stringValuesSV, length); + break; + case FLOAT: + Pair<float[], RoaringBitmap> floatResult = transformToFloatValuesSVWithNull(valueBlock); + bitmap = floatResult.getRight(); + ArrayCopyUtils.copy(floatResult.getLeft(), _stringValuesSV, length); + break; + case DOUBLE: + Pair<double[], RoaringBitmap> doubleResult = transformToDoubleValuesSVWithNull(valueBlock); + bitmap = doubleResult.getRight(); + ArrayCopyUtils.copy(doubleResult.getLeft(), _stringValuesSV, length); + break; + case BIG_DECIMAL: + Pair<BigDecimal[], RoaringBitmap> bigDecimalResult = transformToBigDecimalValuesSVWithNull(valueBlock); + bitmap = bigDecimalResult.getRight(); + ArrayCopyUtils.copy(bigDecimalResult.getLeft(), _stringValuesSV, length); + break; + case STRING: + _stringValuesSV = transformToStringValuesSV(valueBlock); + bitmap = getNullBitmap(valueBlock); + break; + case BYTES: + Pair<byte[][], RoaringBitmap> byteResult = transformToBytesValuesSVWithNull(valueBlock); + bitmap = byteResult.getRight(); + ArrayCopyUtils.copy(byteResult.getLeft(), _stringValuesSV, length); + break; + case UNKNOWN: + bitmap = new RoaringBitmap(); + bitmap.add(0L, length); + // Copy the values to ensure behaviour consistency with non null-handling. + for (int i = 0; i < length; i++) { + _stringValuesSV[i] = (String) DataSchema.ColumnDataType.STRING.getNullPlaceholder(); + } + break; + default: + throw new IllegalStateException(String.format("Cannot read SV %s as STRING", resultDataType)); + } + return ImmutablePair.of(_stringValuesSV, bitmap); + } + @Override public byte[][] transformToBytesValuesSV(ValueBlock valueBlock) { int length = valueBlock.getNumDocs(); @@ -377,6 +746,12 @@ public abstract class BaseTransformFunction implements TransformFunction { String[] stringValues = transformToStringValuesSV(valueBlock); ArrayCopyUtils.copy(stringValues, _bytesValuesSV, length); break; + case UNKNOWN: + // Copy the values to ensure behaviour consistency with non null-handling. + for (int i = 0; i < length; i++) { + _bytesValuesSV[i] = (byte[]) DataSchema.ColumnDataType.BYTES.getNullPlaceholder(); + } + break; default: throw new IllegalStateException(String.format("Cannot read SV %s as BYTES", resultDataType)); } @@ -384,6 +759,43 @@ public abstract class BaseTransformFunction implements TransformFunction { return _bytesValuesSV; } + @Override + public Pair<byte[][], RoaringBitmap> transformToBytesValuesSVWithNull(ValueBlock valueBlock) { + int length = valueBlock.getNumDocs(); + if (_bytesValuesSV == null) { + _bytesValuesSV = new byte[length][]; + } + RoaringBitmap bitmap; + DataType resultDataType = getResultMetadata().getDataType(); + switch (resultDataType.getStoredType()) { + case BIG_DECIMAL: + Pair<BigDecimal[], RoaringBitmap> bigDecimalResult = transformToBigDecimalValuesSVWithNull(valueBlock); + bitmap = bigDecimalResult.getRight(); + ArrayCopyUtils.copy(bigDecimalResult.getLeft(), _bytesValuesSV, length); + break; + case STRING: + Pair<String[], RoaringBitmap> stringResult = transformToStringValuesSVWithNull(valueBlock); + bitmap = stringResult.getRight(); + ArrayCopyUtils.copy(stringResult.getLeft(), _bytesValuesSV, length); + break; + case BYTES: + _bytesValuesSV = transformToBytesValuesSV(valueBlock); + bitmap = getNullBitmap(valueBlock); + break; + case UNKNOWN: + // Copy the values to ensure behaviour consistency with non null-handling. + bitmap = new RoaringBitmap(); + bitmap.add(0L, length); + for (int i = 0; i < length; i++) { + _bytesValuesSV[i] = (byte[]) DataSchema.ColumnDataType.BYTES.getNullPlaceholder(); + } + break; + default: + throw new IllegalStateException(String.format("Cannot read SV %s as BYTES", resultDataType)); + } + return ImmutablePair.of(_bytesValuesSV, bitmap); + } + @Override public int[][] transformToIntValuesMV(ValueBlock valueBlock) { int length = valueBlock.getNumDocs(); @@ -419,6 +831,12 @@ public abstract class BaseTransformFunction implements TransformFunction { String[][] stringValuesMV = transformToStringValuesMV(valueBlock); ArrayCopyUtils.copy(stringValuesMV, _intValuesMV, length); break; + case UNKNOWN: + // Copy the values to ensure behaviour consistency with non null-handling. + for (int i = 0; i < length; i++) { + _intValuesMV[i] = (int[]) DataSchema.ColumnDataType.INT_ARRAY.getNullPlaceholder(); + } + break; default: throw new IllegalStateException(String.format("Cannot read MV %s as INT", resultDataType)); } @@ -426,6 +844,53 @@ public abstract class BaseTransformFunction implements TransformFunction { return _intValuesMV; } + @Override + public Pair<int[][], RoaringBitmap> transformToIntValuesMVWithNull(ValueBlock valueBlock) { + int length = valueBlock.getNumDocs(); + if (_intValuesMV == null) { + _intValuesMV = new int[length][]; + } + RoaringBitmap bitmap; + DataType resultDataType = getResultMetadata().getDataType(); + switch (resultDataType.getStoredType()) { + case INT: + _intValuesMV = transformToIntValuesMV(valueBlock); + bitmap = getNullBitmap(valueBlock); + break; + case LONG: + Pair<long[][], RoaringBitmap> longResult = transformToLongValuesMVWithNull(valueBlock); + bitmap = longResult.getRight(); + ArrayCopyUtils.copy(longResult.getLeft(), _intValuesMV, length); + break; + case FLOAT: + Pair<float[][], RoaringBitmap> floatResult = transformToFloatValuesMVWithNull(valueBlock); + bitmap = floatResult.getRight(); + ArrayCopyUtils.copy(floatResult.getLeft(), _intValuesMV, length); + break; + case DOUBLE: + Pair<double[][], RoaringBitmap> doubleResult = transformToDoubleValuesMVWithNull(valueBlock); + bitmap = doubleResult.getRight(); + ArrayCopyUtils.copy(doubleResult.getLeft(), _intValuesMV, length); + break; + case STRING: + Pair<String[][], RoaringBitmap> stringResult = transformToStringValuesMVWithNull(valueBlock); + bitmap = stringResult.getRight(); + ArrayCopyUtils.copy(stringResult.getLeft(), _intValuesMV, length); + break; + case UNKNOWN: + // Copy the values to ensure behaviour consistency with non null-handling. + bitmap = new RoaringBitmap(); + bitmap.add(0L, length); + for (int i = 0; i < length; i++) { + _intValuesMV[i] = (int[]) DataSchema.ColumnDataType.INT_ARRAY.getNullPlaceholder(); + } + break; + default: + throw new IllegalStateException(String.format("Cannot read MV %s as INT", resultDataType)); + } + return ImmutablePair.of(_intValuesMV, bitmap); + } + @Override public long[][] transformToLongValuesMV(ValueBlock valueBlock) { int length = valueBlock.getNumDocs(); @@ -461,6 +926,12 @@ public abstract class BaseTransformFunction implements TransformFunction { String[][] stringValuesMV = transformToStringValuesMV(valueBlock); ArrayCopyUtils.copy(stringValuesMV, _longValuesMV, length); break; + case UNKNOWN: + // Copy the values to ensure behaviour consistency with non null-handling. + for (int i = 0; i < length; i++) { + _longValuesMV[i] = (long[]) DataSchema.ColumnDataType.LONG_ARRAY.getNullPlaceholder(); + } + break; default: throw new IllegalStateException(String.format("Cannot read MV %s as LONG", resultDataType)); } @@ -468,6 +939,49 @@ public abstract class BaseTransformFunction implements TransformFunction { return _longValuesMV; } + @Override + public Pair<long[][], RoaringBitmap> transformToLongValuesMVWithNull(ValueBlock valueBlock) { + int length = valueBlock.getNumDocs(); + if (_longValuesMV == null) { + _longValuesMV = new long[length][]; + } + RoaringBitmap bitmap; + DataType resultDataType = getResultMetadata().getDataType(); + switch (resultDataType.getStoredType()) { + case INT: + Pair<int[][], RoaringBitmap> intResult = transformToIntValuesMVWithNull(valueBlock); + bitmap = intResult.getRight(); + ArrayCopyUtils.copy(intResult.getLeft(), _longValuesMV, length); + break; + case FLOAT: + Pair<float[][], RoaringBitmap> floatResult = transformToFloatValuesMVWithNull(valueBlock); + bitmap = floatResult.getRight(); + ArrayCopyUtils.copy(floatResult.getLeft(), _longValuesMV, length); + break; + case DOUBLE: + Pair<double[][], RoaringBitmap> doubleResult = transformToDoubleValuesMVWithNull(valueBlock); + bitmap = doubleResult.getRight(); + ArrayCopyUtils.copy(doubleResult.getLeft(), _longValuesMV, length); + break; + case STRING: + Pair<String[][], RoaringBitmap> stringResult = transformToStringValuesMVWithNull(valueBlock); + bitmap = stringResult.getRight(); + ArrayCopyUtils.copy(stringResult.getLeft(), _longValuesMV, length); + break; + case UNKNOWN: + bitmap = new RoaringBitmap(); + bitmap.add(0L, length); + // Copy the values to ensure behaviour consistency with non null-handling. + for (int i = 0; i < length; i++) { + _longValuesMV[i] = (long[]) DataSchema.ColumnDataType.LONG_ARRAY.getNullPlaceholder(); + } + break; + default: + throw new IllegalStateException(String.format("Cannot read MV %s as LONG", resultDataType)); + } + return ImmutablePair.of(_longValuesMV, bitmap); + } + @Override public float[][] transformToFloatValuesMV(ValueBlock valueBlock) { int length = valueBlock.getNumDocs(); @@ -503,6 +1017,12 @@ public abstract class BaseTransformFunction implements TransformFunction { String[][] stringValuesMV = transformToStringValuesMV(valueBlock); ArrayCopyUtils.copy(stringValuesMV, _floatValuesMV, length); break; + case UNKNOWN: + // Copy the values to ensure behaviour consistency with non null-handling. + for (int i = 0; i < length; i++) { + _floatValuesMV[i] = (float[]) DataSchema.ColumnDataType.FLOAT_ARRAY.getNullPlaceholder(); + } + break; default: throw new IllegalStateException(String.format("Cannot read MV %s as FLOAT", resultDataType)); } @@ -510,6 +1030,53 @@ public abstract class BaseTransformFunction implements TransformFunction { return _floatValuesMV; } + @Override + public Pair<float[][], RoaringBitmap> transformToFloatValuesMVWithNull(ValueBlock valueBlock) { + int length = valueBlock.getNumDocs(); + if (_floatValuesMV == null) { + _floatValuesMV = new float[length][]; + } + RoaringBitmap bitmap; + DataType resultDataType = getResultMetadata().getDataType(); + switch (resultDataType.getStoredType()) { + case INT: + Pair<int[][], RoaringBitmap> intResult = transformToIntValuesMVWithNull(valueBlock); + bitmap = intResult.getRight(); + ArrayCopyUtils.copy(intResult.getLeft(), _floatValuesMV, length); + break; + case LONG: + Pair<long[][], RoaringBitmap> longResult = transformToLongValuesMVWithNull(valueBlock); + bitmap = longResult.getRight(); + ArrayCopyUtils.copy(longResult.getLeft(), _floatValuesMV, length); + break; + case FLOAT: + _floatValuesMV = transformToFloatValuesMV(valueBlock); + bitmap = getNullBitmap(valueBlock); + break; + case DOUBLE: + Pair<double[][], RoaringBitmap> doubleResult = transformToDoubleValuesMVWithNull(valueBlock); + bitmap = doubleResult.getRight(); + ArrayCopyUtils.copy(doubleResult.getLeft(), _floatValuesMV, length); + break; + case STRING: + Pair<String[][], RoaringBitmap> stringResult = transformToStringValuesMVWithNull(valueBlock); + bitmap = stringResult.getRight(); + ArrayCopyUtils.copy(stringResult.getLeft(), _floatValuesMV, length); + break; + case UNKNOWN: + bitmap = new RoaringBitmap(); + bitmap.add(0L, length); + // Copy the values to ensure behaviour consistency with non null-handling. + for (int i = 0; i < length; i++) { + _floatValuesMV[i] = (float[]) DataSchema.ColumnDataType.FLOAT_ARRAY.getNullPlaceholder(); + } + break; + default: + throw new IllegalStateException(String.format("Cannot read MV %s as FLOAT", resultDataType)); + } + return ImmutablePair.of(_floatValuesMV, bitmap); + } + @Override public double[][] transformToDoubleValuesMV(ValueBlock valueBlock) { int length = valueBlock.getNumDocs(); @@ -545,6 +1112,12 @@ public abstract class BaseTransformFunction implements TransformFunction { String[][] stringValuesMV = transformToStringValuesMV(valueBlock); ArrayCopyUtils.copy(stringValuesMV, _doubleValuesMV, length); break; + case UNKNOWN: + // Copy the values to ensure behaviour consistency with non null-handling. + for (int i = 0; i < length; i++) { + _doubleValuesMV[i] = (double[]) DataSchema.ColumnDataType.DOUBLE_ARRAY.getNullPlaceholder(); + } + break; default: throw new IllegalStateException(String.format("Cannot read MV %s as DOUBLE", resultDataType)); } @@ -552,6 +1125,52 @@ public abstract class BaseTransformFunction implements TransformFunction { return _doubleValuesMV; } + public Pair<double[][], RoaringBitmap> transformToDoubleValuesMVWithNull(ValueBlock valueBlock) { + int length = valueBlock.getNumDocs(); + if (_doubleValuesMV == null) { + _doubleValuesMV = new double[length][]; + } + RoaringBitmap bitmap; + DataType resultDataType = getResultMetadata().getDataType(); + switch (resultDataType.getStoredType()) { + case INT: + Pair<int[][], RoaringBitmap> intResult = transformToIntValuesMVWithNull(valueBlock); + bitmap = intResult.getRight(); + ArrayCopyUtils.copy(intResult.getLeft(), _doubleValuesMV, length); + break; + case LONG: + Pair<long[][], RoaringBitmap> longResult = transformToLongValuesMVWithNull(valueBlock); + bitmap = longResult.getRight(); + ArrayCopyUtils.copy(longResult.getLeft(), _doubleValuesMV, length); + break; + case FLOAT: + Pair<float[][], RoaringBitmap> floatResult = transformToFloatValuesMVWithNull(valueBlock); + bitmap = floatResult.getRight(); + ArrayCopyUtils.copy(floatResult.getLeft(), _doubleValuesMV, length); + break; + case DOUBLE: + _doubleValuesMV = transformToDoubleValuesMV(valueBlock); + bitmap = getNullBitmap(valueBlock); + break; + case STRING: + Pair<String[][], RoaringBitmap> stringResult = transformToStringValuesMVWithNull(valueBlock); + bitmap = stringResult.getRight(); + ArrayCopyUtils.copy(stringResult.getLeft(), _doubleValuesMV, length); + break; + case UNKNOWN: + bitmap = new RoaringBitmap(); + bitmap.add(0L, length); + // Copy the values to ensure behaviour consistency with non null-handling. + for (int i = 0; i < length; i++) { + _doubleValuesMV[i] = (double[]) DataSchema.ColumnDataType.DOUBLE_ARRAY.getNullPlaceholder(); + } + break; + default: + throw new IllegalStateException(String.format("Cannot read MV %s as DOUBLE", resultDataType)); + } + return ImmutablePair.of(_doubleValuesMV, bitmap); + } + @Override public String[][] transformToStringValuesMV(ValueBlock valueBlock) { int length = valueBlock.getNumDocs(); @@ -587,6 +1206,12 @@ public abstract class BaseTransformFunction implements TransformFunction { double[][] doubleValuesMV = transformToDoubleValuesMV(valueBlock); ArrayCopyUtils.copy(doubleValuesMV, _stringValuesMV, length); break; + case UNKNOWN: + // Copy the values to ensure behaviour consistency with non null-handling. + for (int i = 0; i < length; i++) { + _stringValuesMV[i] = (String[]) DataSchema.ColumnDataType.STRING_ARRAY.getNullPlaceholder(); + } + break; default: throw new IllegalStateException(String.format("Cannot read MV %s as STRING", resultDataType)); } @@ -594,6 +1219,52 @@ public abstract class BaseTransformFunction implements TransformFunction { return _stringValuesMV; } + public Pair<String[][], RoaringBitmap> transformToStringValuesMVWithNull(ValueBlock valueBlock) { + int length = valueBlock.getNumDocs(); + if (_stringValuesMV == null) { + _stringValuesMV = new String[length][]; + } + RoaringBitmap bitmap; + DataType resultDataType = getResultMetadata().getDataType(); + switch (resultDataType) { + case INT: + Pair<int[][], RoaringBitmap> intResult = transformToIntValuesMVWithNull(valueBlock); + bitmap = intResult.getRight(); + ArrayCopyUtils.copy(intResult.getLeft(), _stringValuesMV, length); + break; + case LONG: + Pair<long[][], RoaringBitmap> longResult = transformToLongValuesMVWithNull(valueBlock); + bitmap = longResult.getRight(); + ArrayCopyUtils.copy(longResult.getLeft(), _stringValuesMV, length); + break; + case FLOAT: + Pair<float[][], RoaringBitmap> floatResult = transformToFloatValuesMVWithNull(valueBlock); + bitmap = floatResult.getRight(); + ArrayCopyUtils.copy(floatResult.getLeft(), _stringValuesMV, length); + break; + case DOUBLE: + Pair<double[][], RoaringBitmap> doubleResult = transformToDoubleValuesMVWithNull(valueBlock); + bitmap = doubleResult.getRight(); + ArrayCopyUtils.copy(doubleResult.getLeft(), _stringValuesMV, length); + break; + case STRING: + _stringValuesMV = transformToStringValuesMV(valueBlock); + bitmap = getNullBitmap(valueBlock); + break; + case UNKNOWN: + bitmap = new RoaringBitmap(); + bitmap.add(0L, length); + // Copy the values to ensure behaviour consistency with non null-handling. + for (int i = 0; i < length; i++) { + _stringValuesMV[i] = (String[]) DataSchema.ColumnDataType.STRING_ARRAY.getNullPlaceholder(); + } + break; + default: + throw new IllegalStateException(String.format("Cannot read MV %s as STRING", resultDataType)); + } + return ImmutablePair.of(_stringValuesMV, bitmap); + } + @Override public byte[][][] transformToBytesValuesMV(ValueBlock valueBlock) { int length = valueBlock.getNumDocs(); @@ -617,4 +1288,56 @@ public abstract class BaseTransformFunction implements TransformFunction { } return _bytesValuesMV; } + + @Override + public Pair<byte[][][], RoaringBitmap> transformToBytesValuesMVWithNull(ValueBlock valueBlock) { + int length = valueBlock.getNumDocs(); + if (_bytesValuesMV == null) { + _bytesValuesMV = new byte[length][][]; + } + RoaringBitmap bitmap; + DataType resultDataType = getResultMetadata().getDataType(); + switch (resultDataType) { + case STRING: + Pair<String[][], RoaringBitmap> stringResult = transformToStringValuesMVWithNull(valueBlock); + bitmap = stringResult.getRight(); + ArrayCopyUtils.copy(stringResult.getLeft(), _bytesValuesMV, length); + break; + case BYTES: + _bytesValuesMV = transformToBytesValuesMV(valueBlock); + bitmap = getNullBitmap(valueBlock); + break; + case UNKNOWN: + bitmap = new RoaringBitmap(); + bitmap.add(0L, length); + // Copy the values to ensure behaviour consistency with non null-handling. + for (int i = 0; i < length; i++) { + _bytesValuesMV[i] = (byte[][]) DataSchema.ColumnDataType.BYTES_ARRAY.getNullPlaceholder(); + } + break; + default: + throw new IllegalStateException(String.format("Cannot read MV %s as bytes", resultDataType)); + } + return ImmutablePair.of(_bytesValuesMV, bitmap); + } + + @Nullable + @Override + public RoaringBitmap getNullBitmap(ValueBlock valueBlock) { + // TODO: _arguments shouldn't be null if all the transform functions call the init(). + if (_arguments == null) { + return null; + } + RoaringBitmap bitmap = new RoaringBitmap(); + for (TransformFunction arg : _arguments) { + RoaringBitmap argBitmap = arg.getNullBitmap(valueBlock); + if (argBitmap != null) { + bitmap.or(argBitmap); + } + } + if (bitmap.isEmpty()) { + return null; + } + return bitmap; + } } diff --git a/pinot-core/src/main/java/org/apache/pinot/core/operator/transform/function/IdentifierTransformFunction.java b/pinot-core/src/main/java/org/apache/pinot/core/operator/transform/function/IdentifierTransformFunction.java index 52bae96e9f..1d0cd84fb5 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/operator/transform/function/IdentifierTransformFunction.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/operator/transform/function/IdentifierTransformFunction.java @@ -28,6 +28,7 @@ import org.apache.pinot.core.operator.blocks.ValueBlock; import org.apache.pinot.core.operator.transform.TransformResultMetadata; import org.apache.pinot.segment.spi.evaluator.TransformEvaluator; import org.apache.pinot.segment.spi.index.reader.Dictionary; +import org.roaringbitmap.RoaringBitmap; /** @@ -206,4 +207,9 @@ public class IdentifierTransformFunction implements TransformFunction, PushDownT String[][] buffer) { projectionBlock.fillValues(_columnName, evaluator, buffer); } + + @Override + public RoaringBitmap getNullBitmap(ValueBlock valueBlock) { + return valueBlock.getBlockValueSet(_columnName).getNullBitmap(); + } } diff --git a/pinot-core/src/main/java/org/apache/pinot/core/operator/transform/function/LiteralTransformFunction.java b/pinot-core/src/main/java/org/apache/pinot/core/operator/transform/function/LiteralTransformFunction.java index 28adbaf814..46a2639bd4 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/operator/transform/function/LiteralTransformFunction.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/operator/transform/function/LiteralTransformFunction.java @@ -32,6 +32,7 @@ import org.apache.pinot.segment.spi.index.reader.Dictionary; import org.apache.pinot.spi.data.FieldSpec.DataType; import org.apache.pinot.spi.utils.BooleanUtils; import org.apache.pinot.spi.utils.BytesUtils; +import org.roaringbitmap.RoaringBitmap; /** @@ -246,4 +247,16 @@ public class LiteralTransformFunction implements TransformFunction { public byte[][][] transformToBytesValuesMV(ValueBlock valueBlock) { throw new UnsupportedOperationException(); } + + @Override + public RoaringBitmap getNullBitmap(ValueBlock valueBlock) { + // Treat all unknown type values as null regardless of the value. + if (_dataType != DataType.UNKNOWN) { + return null; + } + int length = valueBlock.getNumDocs(); + RoaringBitmap bitmap = new RoaringBitmap(); + bitmap.add(0L, length); + return bitmap; + } } diff --git a/pinot-core/src/main/java/org/apache/pinot/core/operator/transform/function/TransformFunction.java b/pinot-core/src/main/java/org/apache/pinot/core/operator/transform/function/TransformFunction.java index fc26a44475..7f3e59009d 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/operator/transform/function/TransformFunction.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/operator/transform/function/TransformFunction.java @@ -22,10 +22,13 @@ import java.math.BigDecimal; import java.util.List; import java.util.Map; import javax.annotation.Nullable; +import org.apache.commons.lang3.tuple.ImmutablePair; +import org.apache.commons.lang3.tuple.Pair; import org.apache.pinot.core.operator.ColumnContext; import org.apache.pinot.core.operator.blocks.ValueBlock; import org.apache.pinot.core.operator.transform.TransformResultMetadata; import org.apache.pinot.segment.spi.index.reader.Dictionary; +import org.roaringbitmap.RoaringBitmap; /** @@ -71,11 +74,25 @@ public interface TransformFunction { */ int[] transformToDictIdsSV(ValueBlock valueBlock); + /** + * Transforms the data from the given value block to single-valued dictionary ids with null bit vector. + */ + default Pair<int[], RoaringBitmap> transformToDictIdsSVWithNull(ValueBlock block) { + return ImmutablePair.of(transformToDictIdsSV(block), getNullBitmap(block)); + } + /** * Transforms the data from the given value block to multi-valued dictionary ids. */ int[][] transformToDictIdsMV(ValueBlock valueBlock); + /** + * Transforms the data from the given value block to multi-valued dictionary ids with null bit vector. + */ + default Pair<int[][], RoaringBitmap> transformToDictIdsMVWithNull(ValueBlock valueBlock) { + return ImmutablePair.of(transformToDictIdsMV(valueBlock), getNullBitmap(valueBlock)); + } + /** * SINGLE-VALUED APIs */ @@ -85,36 +102,88 @@ public interface TransformFunction { */ int[] transformToIntValuesSV(ValueBlock valueBlock); + /** + * Transforms the data from the given value block to single-valued int values with null bit vector. + */ + default Pair<int[], RoaringBitmap> transformToIntValuesSVWithNull(ValueBlock valueBlock) { + return ImmutablePair.of(transformToIntValuesSV(valueBlock), getNullBitmap(valueBlock)); + } + /** * Transforms the data from the given value block to single-valued long values. */ long[] transformToLongValuesSV(ValueBlock valueBlock); + + /** + * Transforms the data from the given value block to single-valued long values with null bit vector. + */ + default Pair<long[], RoaringBitmap> transformToLongValuesSVWithNull(ValueBlock valueBlock) { + return ImmutablePair.of(transformToLongValuesSV(valueBlock), getNullBitmap(valueBlock)); + } + /** * Transforms the data from the given value block to single-valued float values. */ float[] transformToFloatValuesSV(ValueBlock valueBlock); + /** + * Transforms the data from the given value block to single-valued float values with null bit vector. + */ + default Pair<float[], RoaringBitmap> transformToFloatValuesSVWithNull(ValueBlock valueBlock) { + return ImmutablePair.of(transformToFloatValuesSV(valueBlock), getNullBitmap(valueBlock)); + } + /** * Transforms the data from the given value block to single-valued double values. */ double[] transformToDoubleValuesSV(ValueBlock valueBlock); + /** + * Transforms the data from the given value block to single-valued double values with null bit vector. + */ + default Pair<double[], RoaringBitmap> transformToDoubleValuesSVWithNull(ValueBlock valueBlock) { + return ImmutablePair.of(transformToDoubleValuesSV(valueBlock), getNullBitmap(valueBlock)); + } + /** * Transforms the data from the given value block to single-valued BigDecimal values. */ BigDecimal[] transformToBigDecimalValuesSV(ValueBlock valueBlock); + + /** + * Transforms the data from the given projection block to single-valued BigDecimal values and null bit vector. + */ + default Pair<BigDecimal[], RoaringBitmap> transformToBigDecimalValuesSVWithNull(ValueBlock valueBlock) { + return ImmutablePair.of(transformToBigDecimalValuesSV(valueBlock), getNullBitmap(valueBlock)); + } + /** * Transforms the data from the given value block to single-valued string values. */ String[] transformToStringValuesSV(ValueBlock valueBlock); + + /** + * Transforms the data from the given projection block to single-valued string values and null bit vector. + */ + default Pair<String[], RoaringBitmap> transformToStringValuesSVWithNull(ValueBlock valueBlock) { + return ImmutablePair.of(transformToStringValuesSV(valueBlock), getNullBitmap(valueBlock)); + } + /** * Transforms the data from the given value block to single-valued bytes values. */ byte[][] transformToBytesValuesSV(ValueBlock valueBlock); + /** + * Transforms the data from the given projection block to single-valued bytes values and null bit vector. + */ + default Pair<byte[][], RoaringBitmap> transformToBytesValuesSVWithNull(ValueBlock valueBlock) { + return ImmutablePair.of(transformToBytesValuesSV(valueBlock), getNullBitmap(valueBlock)); + } + /** * MULTI-VALUED APIs */ @@ -124,28 +193,81 @@ public interface TransformFunction { */ int[][] transformToIntValuesMV(ValueBlock valueBlock); + /** + * Transforms the data from the given value block to multi-valued double values and null bit vector. + */ + default Pair<int[][], RoaringBitmap> transformToIntValuesMVWithNull(ValueBlock valueBlock) { + return ImmutablePair.of(transformToIntValuesMV(valueBlock), getNullBitmap(valueBlock)); + } + /** * Transforms the data from the given value block to multi-valued long values. */ long[][] transformToLongValuesMV(ValueBlock valueBlock); + /** + * Transforms the data from the given value block to multi-valued double values and null bit vector. + */ + default Pair<long[][], RoaringBitmap> transformToLongValuesMVWithNull(ValueBlock valueBlock) { + return ImmutablePair.of(transformToLongValuesMV(valueBlock), getNullBitmap(valueBlock)); + } + /** * Transforms the data from the given value block to multi-valued float values. */ float[][] transformToFloatValuesMV(ValueBlock valueBlock); + /** + * Transforms the data from the given value block to multi-valued double values and null bit vector. + */ + default Pair<float[][], RoaringBitmap> transformToFloatValuesMVWithNull(ValueBlock valueBlock) { + return ImmutablePair.of(transformToFloatValuesMV(valueBlock), getNullBitmap(valueBlock)); + } + /** * Transforms the data from the given value block to multi-valued double values. */ double[][] transformToDoubleValuesMV(ValueBlock valueBlock); + /** + * Transforms the data from the given projection block to multi-valued double values and null bit vector. + */ + default Pair<double[][], RoaringBitmap> transformToDoubleValuesMVWithNull(ValueBlock valueBlock) { + return ImmutablePair.of(transformToDoubleValuesMV(valueBlock), getNullBitmap(valueBlock)); + } + /** * Transforms the data from the given value block to multi-valued string values. */ String[][] transformToStringValuesMV(ValueBlock valueBlock); + /** + * Transforms the data from the given projection block to multi-valued string values and null bit vector. + */ + default Pair<String[][], RoaringBitmap> transformToStringValuesMVWithNull(ValueBlock valueBlock) { + return ImmutablePair.of(transformToStringValuesMV(valueBlock), getNullBitmap(valueBlock)); + } + + /** * Transforms the data from the given value block to multi-valued bytes values. */ byte[][][] transformToBytesValuesMV(ValueBlock valueBlock); + + /** + * Transforms the data from the given projection block to multi-valued bytes values and null bit vector. + */ + default Pair<byte[][][], RoaringBitmap> transformToBytesValuesMVWithNull(ValueBlock valueBlock) { + return ImmutablePair.of(transformToBytesValuesMV(valueBlock), getNullBitmap(valueBlock)); + } + + /** + * Gets the null rows for transformation result. Should be called when only null information is needed for + * transformation. + * + * @return Null bit vector that indicates null rows for transformation result + * If returns null, it means no record is null. + */ + @Nullable + RoaringBitmap getNullBitmap(ValueBlock block); } diff --git a/pinot-core/src/test/java/org/apache/pinot/core/operator/transform/function/AdditionTransformFunctionTest.java b/pinot-core/src/test/java/org/apache/pinot/core/operator/transform/function/AdditionTransformFunctionTest.java index 744aa1cec4..044388dac8 100644 --- a/pinot-core/src/test/java/org/apache/pinot/core/operator/transform/function/AdditionTransformFunctionTest.java +++ b/pinot-core/src/test/java/org/apache/pinot/core/operator/transform/function/AdditionTransformFunctionTest.java @@ -22,6 +22,7 @@ import java.math.BigDecimal; import org.apache.pinot.common.request.context.ExpressionContext; import org.apache.pinot.common.request.context.RequestContextUtils; import org.apache.pinot.spi.exception.BadQueryRequestException; +import org.roaringbitmap.RoaringBitmap; import org.testng.Assert; import org.testng.annotations.DataProvider; import org.testng.annotations.Test; @@ -99,4 +100,40 @@ public class AdditionTransformFunctionTest extends BaseTransformFunctionTest { } }; } + + @Test + public void testAdditionNullLiteral() { + ExpressionContext expression = RequestContextUtils.getExpression(String.format("add(%s,null)", INT_SV_COLUMN)); + TransformFunction transformFunction = TransformFunctionFactory.get(expression, _dataSourceMap); + Assert.assertTrue(transformFunction instanceof AdditionTransformFunction); + Assert.assertEquals(transformFunction.getName(), AdditionTransformFunction.FUNCTION_NAME); + double[] expectedValues = new double[NUM_ROWS]; + for (int i = 0; i < NUM_ROWS; i++) { + expectedValues[i] = _intSVValues[i]; + } + RoaringBitmap roaringBitmap = new RoaringBitmap(); + roaringBitmap.add(0L, NUM_ROWS); + testTransformFunctionWithNull(transformFunction, expectedValues, roaringBitmap); + } + + @Test + public void testAdditionNullColumn() { + ExpressionContext expression = + RequestContextUtils.getExpression(String.format("add(%s,%s)", INT_SV_COLUMN, INT_SV_NULL_COLUMN)); + TransformFunction transformFunction = TransformFunctionFactory.get(expression, _dataSourceMap); + Assert.assertTrue(transformFunction instanceof AdditionTransformFunction); + Assert.assertEquals(transformFunction.getName(), AdditionTransformFunction.FUNCTION_NAME); + double[] expectedValues = new double[NUM_ROWS]; + RoaringBitmap roaringBitmap = new RoaringBitmap(); + for (int i = 0; i < NUM_ROWS; i++) { + if (i % 2 == 0) { + expectedValues[i] = (double) _intSVValues[i] * 2; + } else { + // + expectedValues[i] = (double) Integer.MIN_VALUE + (double) _intSVValues[i]; + roaringBitmap.add(i); + } + } + testTransformFunctionWithNull(transformFunction, expectedValues, roaringBitmap); + } } diff --git a/pinot-core/src/test/java/org/apache/pinot/core/operator/transform/function/BaseTransformFunctionTest.java b/pinot-core/src/test/java/org/apache/pinot/core/operator/transform/function/BaseTransformFunctionTest.java index 0aa57af3c2..0f0fd9b102 100644 --- a/pinot-core/src/test/java/org/apache/pinot/core/operator/transform/function/BaseTransformFunctionTest.java +++ b/pinot-core/src/test/java/org/apache/pinot/core/operator/transform/function/BaseTransformFunctionTest.java @@ -34,6 +34,7 @@ import java.util.concurrent.TimeUnit; import org.apache.commons.io.FileUtils; import org.apache.commons.lang3.ArrayUtils; import org.apache.commons.lang3.RandomStringUtils; +import org.apache.commons.lang3.tuple.Pair; import org.apache.pinot.core.operator.DocIdSetOperator; import org.apache.pinot.core.operator.ProjectionOperator; import org.apache.pinot.core.operator.blocks.ProjectionBlock; @@ -56,6 +57,7 @@ import org.apache.pinot.spi.utils.BytesUtils; import org.apache.pinot.spi.utils.JsonUtils; import org.apache.pinot.spi.utils.ReadMode; import org.apache.pinot.spi.utils.builder.TableConfigBuilder; +import org.roaringbitmap.RoaringBitmap; import org.testng.annotations.AfterClass; import org.testng.annotations.BeforeClass; @@ -71,6 +73,8 @@ public abstract class BaseTransformFunctionTest { protected static final int MAX_NUM_MULTI_VALUES = 5; protected static final int MAX_MULTI_VALUE = 10; protected static final String INT_SV_COLUMN = "intSV"; + // INT_SV_NULL_COLUMN's even row equals to INT_SV_COLUMN. odd row is null. + protected static final String INT_SV_NULL_COLUMN = "intSVNull"; protected static final String LONG_SV_COLUMN = "longSV"; protected static final String FLOAT_SV_COLUMN = "floatSV"; protected static final String DOUBLE_SV_COLUMN = "doubleSV"; @@ -154,6 +158,11 @@ public abstract class BaseTransformFunctionTest { for (int i = 0; i < NUM_ROWS; i++) { Map<String, Object> map = new HashMap<>(); map.put(INT_SV_COLUMN, _intSVValues[i]); + if (i % 2 == 0) { + map.put(INT_SV_NULL_COLUMN, _intSVValues[i]); + } else { + map.put(INT_SV_NULL_COLUMN, null); + } map.put(LONG_SV_COLUMN, _longSVValues[i]); map.put(FLOAT_SV_COLUMN, _floatSVValues[i]); map.put(DOUBLE_SV_COLUMN, _doubleSVValues[i]); @@ -178,6 +187,7 @@ public abstract class BaseTransformFunctionTest { } Schema schema = new Schema.SchemaBuilder().addSingleValueDimension(INT_SV_COLUMN, FieldSpec.DataType.INT) + .addSingleValueDimension(INT_SV_NULL_COLUMN, FieldSpec.DataType.INT) .addSingleValueDimension(LONG_SV_COLUMN, FieldSpec.DataType.LONG) .addSingleValueDimension(FLOAT_SV_COLUMN, FieldSpec.DataType.FLOAT) .addSingleValueDimension(DOUBLE_SV_COLUMN, FieldSpec.DataType.DOUBLE) @@ -197,7 +207,8 @@ public abstract class BaseTransformFunctionTest { .addDateTime(TIMESTAMP_COLUMN, FieldSpec.DataType.TIMESTAMP, "1:MILLISECONDS:EPOCH", "1:MILLISECONDS") .addTime(new TimeGranularitySpec(FieldSpec.DataType.LONG, TimeUnit.MILLISECONDS, TIME_COLUMN), null).build(); TableConfig tableConfig = - new TableConfigBuilder(TableType.OFFLINE).setTableName("test").setTimeColumnName(TIME_COLUMN).build(); + new TableConfigBuilder(TableType.OFFLINE).setTableName("test").setTimeColumnName(TIME_COLUMN) + .setNullHandlingEnabled(true).build(); SegmentGeneratorConfig config = new SegmentGeneratorConfig(tableConfig, schema); config.setOutDir(INDEX_DIR_PATH); @@ -294,6 +305,41 @@ public abstract class BaseTransformFunctionTest { } } + protected void testTransformFunctionWithNull(TransformFunction transformFunction, double[] expectedValues, + RoaringBitmap expectedNull) { + Pair<int[], RoaringBitmap> intValues = transformFunction.transformToIntValuesSVWithNull(_projectionBlock); + Pair<long[], RoaringBitmap> longValues = transformFunction.transformToLongValuesSVWithNull(_projectionBlock); + Pair<float[], RoaringBitmap> floatValues = transformFunction.transformToFloatValuesSVWithNull(_projectionBlock); + Pair<double[], RoaringBitmap> doubleValues = transformFunction.transformToDoubleValuesSVWithNull(_projectionBlock); + Pair<BigDecimal[], RoaringBitmap> bigDecimalValues = null; + try { + // 1- Some transform functions cannot work with BigDecimal (e.g. exp, ln, and sqrt). + // 2- NumberFormatException is thrown when converting double.NaN, Double.POSITIVE_INFINITY, + // or Double.NEGATIVE_INFINITY. + bigDecimalValues = transformFunction.transformToBigDecimalValuesSVWithNull(_projectionBlock); + } catch (UnsupportedOperationException | NumberFormatException ignored) { + } + Pair<String[], RoaringBitmap> stringValues = transformFunction.transformToStringValuesSVWithNull(_projectionBlock); + for (int i = 0; i < NUM_ROWS; i++) { + assertEquals(intValues.getLeft()[i], (int) expectedValues[i]); + assertEquals(longValues.getLeft()[i], (long) expectedValues[i]); + assertEquals(floatValues.getLeft()[i], (float) expectedValues[i]); + assertEquals(doubleValues.getLeft()[i], expectedValues[i]); + if (bigDecimalValues != null) { + assertEquals(bigDecimalValues.getLeft()[i].doubleValue(), expectedValues[i]); + } + assertEquals(stringValues.getLeft()[i], Double.toString(expectedValues[i])); + } + assertEquals(intValues.getRight(), expectedNull); + assertEquals(longValues.getRight(), expectedNull); + assertEquals(floatValues.getRight(), expectedNull); + assertEquals(doubleValues.getRight(), expectedNull); + if (bigDecimalValues != null) { + assertEquals(bigDecimalValues.getRight(), expectedNull); + } + assertEquals(stringValues.getRight(), expectedNull); + } + protected void testTransformFunction(TransformFunction transformFunction, BigDecimal[] expectedValues) { int[] intValues = transformFunction.transformToIntValuesSV(_projectionBlock); long[] longValues = transformFunction.transformToLongValuesSV(_projectionBlock); diff --git a/pinot-core/src/test/java/org/apache/pinot/core/operator/transform/function/IdentifierTransformFunctionTest.java b/pinot-core/src/test/java/org/apache/pinot/core/operator/transform/function/IdentifierTransformFunctionTest.java new file mode 100644 index 0000000000..ed2624a5b9 --- /dev/null +++ b/pinot-core/src/test/java/org/apache/pinot/core/operator/transform/function/IdentifierTransformFunctionTest.java @@ -0,0 +1,110 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.pinot.core.operator.transform.function; + +import org.apache.commons.lang3.tuple.Pair; +import org.apache.pinot.core.common.BlockValSet; +import org.apache.pinot.core.operator.ColumnContext; +import org.apache.pinot.core.operator.blocks.ProjectionBlock; +import org.apache.pinot.segment.spi.datasource.DataSource; +import org.apache.pinot.segment.spi.datasource.DataSourceMetadata; +import org.apache.pinot.segment.spi.index.reader.Dictionary; +import org.apache.pinot.spi.data.FieldSpec; +import org.mockito.Mock; +import org.mockito.MockitoAnnotations; +import org.roaringbitmap.RoaringBitmap; +import org.testng.Assert; +import org.testng.annotations.AfterMethod; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.Test; + +import static org.mockito.Mockito.when; + + +public class IdentifierTransformFunctionTest { + private static final int NUM_DOCS = 100; + + private static final int[] INT_VALUES; + + private static final RoaringBitmap NULL_BITMAP; + + static { + INT_VALUES = new int[100]; + NULL_BITMAP = new RoaringBitmap(); + for (int i = 0; i < NUM_DOCS; i++) { + INT_VALUES[i] = i; + if (i % 2 == 0) { + NULL_BITMAP.add(i); + } + } + } + + private static final String TEST_COLUMN_NAME = "testColumn"; + + private AutoCloseable _mocks; + + @Mock + private ColumnContext _columnContext; + + @Mock + private ProjectionBlock _projectionBlock; + + @Mock + private BlockValSet _blockValSet; + + @Mock + private DataSource _dataSource; + + @Mock + private Dictionary _dictionary; + + @Mock + private DataSourceMetadata _metadata; + + @BeforeMethod + public void setUp() { + _mocks = MockitoAnnotations.openMocks(this); + when(_projectionBlock.getNumDocs()).thenReturn(NUM_DOCS); + when(_blockValSet.getIntValuesSV()).thenReturn(INT_VALUES); + when(_blockValSet.getNullBitmap()).thenReturn(NULL_BITMAP); + when(_projectionBlock.getBlockValueSet(TEST_COLUMN_NAME)).thenReturn(_blockValSet); + when(_columnContext.getDataSource()).thenReturn(_dataSource); + when(_dataSource.getDictionary()).thenReturn(_dictionary); + when(_dataSource.getDataSourceMetadata()).thenReturn(_metadata); + when(_metadata.getDataType()).thenReturn(FieldSpec.DataType.INT); + when(_metadata.isSingleValue()).thenReturn(true); + } + + @AfterMethod + public void tearDown() + throws Exception { + _mocks.close(); + } + + @Test + public void testNullBitmap() { + IdentifierTransformFunction identifierTransformFunction = + new IdentifierTransformFunction(TEST_COLUMN_NAME, _columnContext); + RoaringBitmap bitmap = identifierTransformFunction.getNullBitmap(_projectionBlock); + Assert.assertEquals(bitmap, NULL_BITMAP); + Pair<int[], RoaringBitmap> intResult = identifierTransformFunction.transformToIntValuesSVWithNull(_projectionBlock); + Assert.assertEquals(intResult.getLeft(), INT_VALUES); + Assert.assertEquals(intResult.getRight(), NULL_BITMAP); + } +} diff --git a/pinot-core/src/test/java/org/apache/pinot/core/operator/transform/function/LiteralTransformFunctionTest.java b/pinot-core/src/test/java/org/apache/pinot/core/operator/transform/function/LiteralTransformFunctionTest.java index add8b83dca..abb84b0adc 100644 --- a/pinot-core/src/test/java/org/apache/pinot/core/operator/transform/function/LiteralTransformFunctionTest.java +++ b/pinot-core/src/test/java/org/apache/pinot/core/operator/transform/function/LiteralTransformFunctionTest.java @@ -18,13 +18,39 @@ */ package org.apache.pinot.core.operator.transform.function; +import org.apache.commons.lang3.tuple.Pair; import org.apache.pinot.common.request.context.LiteralContext; +import org.apache.pinot.core.operator.blocks.ProjectionBlock; import org.apache.pinot.spi.data.FieldSpec.DataType; +import org.mockito.Mock; +import org.mockito.MockitoAnnotations; +import org.roaringbitmap.RoaringBitmap; import org.testng.Assert; +import org.testng.annotations.AfterMethod; +import org.testng.annotations.BeforeMethod; import org.testng.annotations.Test; +import static org.mockito.Mockito.when; + public class LiteralTransformFunctionTest { + private static final int NUM_DOCS = 100; + private AutoCloseable _mocks; + + @Mock + private ProjectionBlock _projectionBlock; + + @BeforeMethod + public void setUp() { + _mocks = MockitoAnnotations.openMocks(this); + when(_projectionBlock.getNumDocs()).thenReturn(NUM_DOCS); + } + + @AfterMethod + public void tearDown() + throws Exception { + _mocks.close(); + } @Test public void testLiteralTransformFunction() { @@ -37,4 +63,21 @@ public class LiteralTransformFunctionTest { LiteralTransformFunction nullLiteral = new LiteralTransformFunction(new LiteralContext(DataType.UNKNOWN, true)); Assert.assertEquals(nullLiteral.getStringLiteral(), "null"); } + + @Test + public void testNullTransform() { + LiteralTransformFunction nullLiteral = new LiteralTransformFunction(new LiteralContext(DataType.UNKNOWN, true)); + Assert.assertEquals(nullLiteral.getStringLiteral(), "null"); + RoaringBitmap bitmap = nullLiteral.getNullBitmap(_projectionBlock); + RoaringBitmap expectedBitmap = new RoaringBitmap(); + expectedBitmap.add(0L, NUM_DOCS); + Assert.assertEquals(bitmap, expectedBitmap); + Pair<int[], RoaringBitmap> intResult = nullLiteral.transformToIntValuesSVWithNull(_projectionBlock); + int[] intValues = intResult.getLeft(); + Assert.assertEquals(intValues.length, NUM_DOCS); + for (int i = 0; i < NUM_DOCS; i++) { + Assert.assertEquals(intValues[i], 0); + } + Assert.assertEquals(intResult.getRight(), expectedBitmap); + } } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For additional commands, e-mail: commits-h...@pinot.apache.org