This is an automated email from the ASF dual-hosted git repository. jackie pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/pinot.git
The following commit(s) were added to refs/heads/master by this push: new 174377df2a jsonExtractIndex support array of default values (#12748) 174377df2a is described below commit 174377df2a8fd5b3e4d28c4242b977b1694f3ef5 Author: Saurabh Dubey <saurabhd...@gmail.com> AuthorDate: Fri Mar 29 22:16:03 2024 +0530 jsonExtractIndex support array of default values (#12748) --- .../JsonExtractIndexTransformFunction.java | 77 +++++++++++++++++- .../JsonExtractIndexTransformFunctionTest.java | 90 ++++++++++++++++++---- 2 files changed, 151 insertions(+), 16 deletions(-) diff --git a/pinot-core/src/main/java/org/apache/pinot/core/operator/transform/function/JsonExtractIndexTransformFunction.java b/pinot-core/src/main/java/org/apache/pinot/core/operator/transform/function/JsonExtractIndexTransformFunction.java index 12e38ea5d6..b499b7384c 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/operator/transform/function/JsonExtractIndexTransformFunction.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/operator/transform/function/JsonExtractIndexTransformFunction.java @@ -18,6 +18,8 @@ */ package org.apache.pinot.core.operator.transform.function; +import com.fasterxml.jackson.databind.JsonNode; +import java.io.IOException; import java.math.BigDecimal; import java.util.List; import java.util.Map; @@ -27,6 +29,7 @@ import org.apache.pinot.core.operator.blocks.ValueBlock; import org.apache.pinot.core.operator.transform.TransformResultMetadata; import org.apache.pinot.segment.spi.index.reader.JsonIndexReader; import org.apache.pinot.spi.data.FieldSpec.DataType; +import org.apache.pinot.spi.utils.JsonUtils; import org.roaringbitmap.RoaringBitmap; @@ -101,7 +104,24 @@ public class JsonExtractIndexTransformFunction extends BaseTransformFunction { if (!(fourthArgument instanceof LiteralTransformFunction)) { throw new IllegalArgumentException("Default value must be a literal"); } - _defaultValue = dataType.convert(((LiteralTransformFunction) fourthArgument).getStringLiteral()); + + if (isSingleValue) { + _defaultValue = dataType.convert(((LiteralTransformFunction) fourthArgument).getStringLiteral()); + } else { + try { + JsonNode mvArray = JsonUtils.stringToJsonNode(((LiteralTransformFunction) fourthArgument).getStringLiteral()); + if (!mvArray.isArray()) { + throw new IllegalArgumentException("Default value must be a valid JSON array"); + } + Object[] defaultValues = new Object[mvArray.size()]; + for (int i = 0; i < mvArray.size(); i++) { + defaultValues[i] = dataType.convert(mvArray.get(i).asText()); + } + _defaultValue = defaultValues; + } catch (IOException e) { + throw new IllegalArgumentException("Default value must be a valid JSON array"); + } + } } String filterJsonPath = null; @@ -267,6 +287,17 @@ public class JsonExtractIndexTransformFunction extends BaseTransformFunction { for (int i = 0; i < numDocs; i++) { String[] value = valuesFromIndex[i]; + if (value.length == 0) { + if (_defaultValue != null) { + _intValuesMV[i] = new int[((Object[]) (_defaultValue)).length]; + for (int j = 0; j < _intValuesMV[i].length; j++) { + _intValuesMV[i][j] = (int) ((Object[]) _defaultValue)[j]; + } + continue; + } + throw new RuntimeException( + String.format("Illegal Json Path: [%s], for docId [%s]", _jsonPathString, valueBlock.getDocIds()[i])); + } _intValuesMV[i] = new int[value.length]; for (int j = 0; j < value.length; j++) { _intValuesMV[i][j] = Integer.parseInt(value[j]); @@ -283,6 +314,17 @@ public class JsonExtractIndexTransformFunction extends BaseTransformFunction { _valueToMatchingDocsMap); for (int i = 0; i < numDocs; i++) { String[] value = valuesFromIndex[i]; + if (value.length == 0) { + if (_defaultValue != null) { + _longValuesMV[i] = new long[((Object[]) (_defaultValue)).length]; + for (int j = 0; j < _longValuesMV[i].length; j++) { + _longValuesMV[i][j] = (long) ((Object[]) _defaultValue)[j]; + } + continue; + } + throw new RuntimeException( + String.format("Illegal Json Path: [%s], for docId [%s]", _jsonPathString, valueBlock.getDocIds()[i])); + } _longValuesMV[i] = new long[value.length]; for (int j = 0; j < value.length; j++) { _longValuesMV[i][j] = Long.parseLong(value[j]); @@ -299,6 +341,17 @@ public class JsonExtractIndexTransformFunction extends BaseTransformFunction { _valueToMatchingDocsMap); for (int i = 0; i < numDocs; i++) { String[] value = valuesFromIndex[i]; + if (value.length == 0) { + if (_defaultValue != null) { + _floatValuesMV[i] = new float[((Object[]) (_defaultValue)).length]; + for (int j = 0; j < _floatValuesMV[i].length; j++) { + _floatValuesMV[i][j] = (float) ((Object[]) _defaultValue)[j]; + } + continue; + } + throw new RuntimeException( + String.format("Illegal Json Path: [%s], for docId [%s]", _jsonPathString, valueBlock.getDocIds()[i])); + } _floatValuesMV[i] = new float[value.length]; for (int j = 0; j < value.length; j++) { _floatValuesMV[i][j] = Float.parseFloat(value[j]); @@ -315,6 +368,17 @@ public class JsonExtractIndexTransformFunction extends BaseTransformFunction { _valueToMatchingDocsMap); for (int i = 0; i < numDocs; i++) { String[] value = valuesFromIndex[i]; + if (value.length == 0) { + if (_defaultValue != null) { + _doubleValuesMV[i] = new double[((Object[]) (_defaultValue)).length]; + for (int j = 0; j < _doubleValuesMV[i].length; j++) { + _doubleValuesMV[i][j] = (double) ((Object[]) _defaultValue)[j]; + } + continue; + } + throw new RuntimeException( + String.format("Illegal Json Path: [%s], for docId [%s]", _jsonPathString, valueBlock.getDocIds()[i])); + } _doubleValuesMV[i] = new double[value.length]; for (int j = 0; j < value.length; j++) { _doubleValuesMV[i][j] = Double.parseDouble(value[j]); @@ -331,6 +395,17 @@ public class JsonExtractIndexTransformFunction extends BaseTransformFunction { _valueToMatchingDocsMap); for (int i = 0; i < numDocs; i++) { String[] value = valuesFromIndex[i]; + if (value.length == 0) { + if (_defaultValue != null) { + _stringValuesMV[i] = new String[((Object[]) (_defaultValue)).length]; + for (int j = 0; j < _stringValuesMV[i].length; j++) { + _stringValuesMV[i][j] = (String) ((Object[]) _defaultValue)[j]; + } + continue; + } + throw new RuntimeException( + String.format("Illegal Json Path: [%s], for docId [%s]", _jsonPathString, valueBlock.getDocIds()[i])); + } _stringValuesMV[i] = new String[value.length]; System.arraycopy(value, 0, _stringValuesMV[i], 0, value.length); } diff --git a/pinot-core/src/test/java/org/apache/pinot/core/operator/transform/function/JsonExtractIndexTransformFunctionTest.java b/pinot-core/src/test/java/org/apache/pinot/core/operator/transform/function/JsonExtractIndexTransformFunctionTest.java index d2cd792107..7fcfb31b53 100644 --- a/pinot-core/src/test/java/org/apache/pinot/core/operator/transform/function/JsonExtractIndexTransformFunctionTest.java +++ b/pinot-core/src/test/java/org/apache/pinot/core/operator/transform/function/JsonExtractIndexTransformFunctionTest.java @@ -251,7 +251,7 @@ public class JsonExtractIndexTransformFunctionTest extends BaseTransformFunction // MV with filters testArguments.add(new Object[]{ String.format( - "jsonExtractIndex(%s,'%s','INT_ARRAY', '0', 'REGEXP_LIKE(\"$.arrayField[*].arrStringField\", ''.*y.*'')')", + "jsonExtractIndex(%s,'%s','INT_ARRAY', '[]', 'REGEXP_LIKE(\"$.arrayField[*].arrStringField\", ''.*y.*'')')", JSON_STRING_SV_COLUMN, "$.arrayField[*].arrIntField"), "$.arrayField[?(@.arrStringField =~ /.*y.*/)].arrIntField", DataType.INT, false @@ -259,7 +259,7 @@ public class JsonExtractIndexTransformFunctionTest extends BaseTransformFunction testArguments.add(new Object[]{ String.format( - "jsonExtractIndex(%s,'%s','STRING_ARRAY', '0', '\"$.arrayField[*].arrIntField\" > 2')", + "jsonExtractIndex(%s,'%s','STRING_ARRAY', '[]', '\"$.arrayField[*].arrIntField\" > 2')", JSON_STRING_SV_COLUMN, "$.arrayField[*].arrStringField"), "$.arrayField[?(@.arrIntField > 2)].arrStringField", DataType.STRING, false @@ -268,7 +268,7 @@ public class JsonExtractIndexTransformFunctionTest extends BaseTransformFunction @Test(dataProvider = "testJsonExtractIndexDefaultValue") public void testJsonExtractIndexDefaultValue(String expressionStr, String jsonPathString, DataType resultsDataType, - boolean isSingleValue) { + boolean isSingleValue, Object expectedDefaultValue) { ExpressionContext expression = RequestContextUtils.getExpression(expressionStr); TransformFunction transformFunction = TransformFunctionFactory.get(expression, _dataSourceMap); Assert.assertTrue(transformFunction instanceof JsonExtractIndexTransformFunction); @@ -281,37 +281,72 @@ public class JsonExtractIndexTransformFunctionTest extends BaseTransformFunction case INT: int[] intValues = transformFunction.transformToIntValuesSV(_projectionBlock); for (int i = 0; i < NUM_ROWS; i++) { - Assert.assertEquals(intValues[i], 0); + Assert.assertEquals(intValues[i], expectedDefaultValue); } break; case LONG: long[] longValues = transformFunction.transformToLongValuesSV(_projectionBlock); for (int i = 0; i < NUM_ROWS; i++) { - Assert.assertEquals(longValues[i], 0L); + Assert.assertEquals(longValues[i], expectedDefaultValue); } break; case FLOAT: float[] floatValues = transformFunction.transformToFloatValuesSV(_projectionBlock); for (int i = 0; i < NUM_ROWS; i++) { - Assert.assertEquals(floatValues[i], 0f); + Assert.assertEquals(floatValues[i], expectedDefaultValue); } break; case DOUBLE: double[] doubleValues = transformFunction.transformToDoubleValuesSV(_projectionBlock); for (int i = 0; i < NUM_ROWS; i++) { - Assert.assertEquals(doubleValues[i], 0d); + Assert.assertEquals(doubleValues[i], expectedDefaultValue); } break; case BIG_DECIMAL: BigDecimal[] bigDecimalValues = transformFunction.transformToBigDecimalValuesSV(_projectionBlock); for (int i = 0; i < NUM_ROWS; i++) { - Assert.assertEquals(bigDecimalValues[i], BigDecimal.ZERO); + Assert.assertEquals(bigDecimalValues[i], expectedDefaultValue); } break; case STRING: String[] stringValues = transformFunction.transformToStringValuesSV(_projectionBlock); for (int i = 0; i < NUM_ROWS; i++) { - Assert.assertEquals(stringValues[i], "null"); + Assert.assertEquals(stringValues[i], expectedDefaultValue); + } + break; + default: + throw new UnsupportedOperationException("Not support data type - " + resultsDataType); + } + } else { + switch (resultsDataType) { + case INT: + int[][] intValues = transformFunction.transformToIntValuesMV(_projectionBlock); + for (int i = 0; i < NUM_ROWS; i++) { + Assert.assertEquals(intValues[i], expectedDefaultValue); + } + break; + case LONG: + long[][] longValues = transformFunction.transformToLongValuesMV(_projectionBlock); + for (int i = 0; i < NUM_ROWS; i++) { + Assert.assertEquals(longValues[i], expectedDefaultValue); + } + break; + case FLOAT: + float[][] floatValues = transformFunction.transformToFloatValuesMV(_projectionBlock); + for (int i = 0; i < NUM_ROWS; i++) { + Assert.assertEquals(floatValues[i], expectedDefaultValue); + } + break; + case DOUBLE: + double[][] doubleValues = transformFunction.transformToDoubleValuesMV(_projectionBlock); + for (int i = 0; i < NUM_ROWS; i++) { + Assert.assertEquals(doubleValues[i], expectedDefaultValue); + } + break; + case STRING: + String[][] stringValues = transformFunction.transformToStringValuesMV(_projectionBlock); + for (int i = 0; i < NUM_ROWS; i++) { + Assert.assertEquals(stringValues[i], expectedDefaultValue); } break; default: @@ -326,31 +361,56 @@ public class JsonExtractIndexTransformFunctionTest extends BaseTransformFunction // With default value testArguments.add(new Object[]{ String.format("jsonExtractIndex(%s,'%s','INT',0)", JSON_STRING_SV_COLUMN, - "$.noField"), "$.noField", DataType.INT, true + "$.noField"), "$.noField", DataType.INT, true, 0 }); testArguments.add(new Object[]{ String.format("jsonExtractIndex(%s,'%s','LONG',0)", JSON_STRING_SV_COLUMN, - "$.noField"), "$.noField", DataType.LONG, true + "$.noField"), "$.noField", DataType.LONG, true, 0L }); testArguments.add(new Object[]{ String.format("jsonExtractIndex(%s,'%s','FLOAT',0)", JSON_STRING_SV_COLUMN, - "$.noField"), "$.noField", DataType.FLOAT, true + "$.noField"), "$.noField", DataType.FLOAT, true, (float) 0 }); testArguments.add(new Object[]{ String.format("jsonExtractIndex(%s,'%s','DOUBLE',0)", JSON_STRING_SV_COLUMN, - "$.noField"), "$.noField", DataType.DOUBLE, true + "$.noField"), "$.noField", DataType.DOUBLE, true, (double) 0 }); testArguments.add(new Object[]{ String.format("jsonExtractIndex(%s,'%s','BIG_DECIMAL',0)", JSON_STRING_SV_COLUMN, - "$.noField"), "$.noField", DataType.BIG_DECIMAL, true + "$.noField"), "$.noField", DataType.BIG_DECIMAL, true, new BigDecimal(0) }); testArguments.add(new Object[]{ String.format("jsonExtractIndex(%s,'%s','STRING','null')", JSON_STRING_SV_COLUMN, - "$.noField"), "$.noField", DataType.STRING, true + "$.noField"), "$.noField", DataType.STRING, true, "null" }); + addMvDefaultValueTests(testArguments); return testArguments.toArray(new Object[0][]); } + private void addMvDefaultValueTests(List<Object[]> testArguments) { + testArguments.add(new Object[]{ + String.format("jsonExtractIndex(%s,'%s','INT_ARRAY', '%s')", JSON_STRING_SV_COLUMN, "$.noField", + "[1, 2, 3]"), "$.noField", DataType.INT, false, new Integer[]{1, 2, 3} + }); + testArguments.add(new Object[]{ + String.format("jsonExtractIndex(%s,'%s','LONG_ARRAY', '%s')", JSON_STRING_SV_COLUMN, "$.noField", + "[1, 5, 6]"), "$.noField", DataType.LONG, false, new Long[]{1L, 5L, 6L} + }); + testArguments.add(new Object[]{ + String.format("jsonExtractIndex(%s,'%s','FLOAT_ARRAY', '%s')", JSON_STRING_SV_COLUMN, "$.noField", + "[1.2, 3.1, 1.6]"), "$.noField", DataType.FLOAT, false, new Float[]{1.2f, 3.1f, 1.6f} + }); + testArguments.add(new Object[]{ + String.format("jsonExtractIndex(%s,'%s','DOUBLE_ARRAY', '%s')", JSON_STRING_SV_COLUMN, "$.noField", + "[1.5, 3.4, 1.6]"), "$.noField", DataType.DOUBLE, false, new Double[]{1.5d, 3.4d, 1.6d} + }); + testArguments.add(new Object[]{ + String.format("jsonExtractIndex(%s,'%s','STRING_ARRAY', '%s')", JSON_STRING_SV_COLUMN, "$.noField", + "[\"randomString1\", \"randomString2\"]"), "$.noField", DataType.STRING, false, + new String[]{"randomString1", "randomString2"} + }); + } + // get value for key, excluding nested private String getValueForKey(String blob, JsonPath path) { Object out = JSON_PARSER_CONTEXT.parse(blob).read(path); --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For additional commands, e-mail: commits-h...@pinot.apache.org