This is an automated email from the ASF dual-hosted git repository.

jackie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
     new 174377df2a jsonExtractIndex support array of default values (#12748)
174377df2a is described below

commit 174377df2a8fd5b3e4d28c4242b977b1694f3ef5
Author: Saurabh Dubey <saurabhd...@gmail.com>
AuthorDate: Fri Mar 29 22:16:03 2024 +0530

    jsonExtractIndex support array of default values (#12748)
---
 .../JsonExtractIndexTransformFunction.java         | 77 +++++++++++++++++-
 .../JsonExtractIndexTransformFunctionTest.java     | 90 ++++++++++++++++++----
 2 files changed, 151 insertions(+), 16 deletions(-)

diff --git 
a/pinot-core/src/main/java/org/apache/pinot/core/operator/transform/function/JsonExtractIndexTransformFunction.java
 
b/pinot-core/src/main/java/org/apache/pinot/core/operator/transform/function/JsonExtractIndexTransformFunction.java
index 12e38ea5d6..b499b7384c 100644
--- 
a/pinot-core/src/main/java/org/apache/pinot/core/operator/transform/function/JsonExtractIndexTransformFunction.java
+++ 
b/pinot-core/src/main/java/org/apache/pinot/core/operator/transform/function/JsonExtractIndexTransformFunction.java
@@ -18,6 +18,8 @@
  */
 package org.apache.pinot.core.operator.transform.function;
 
+import com.fasterxml.jackson.databind.JsonNode;
+import java.io.IOException;
 import java.math.BigDecimal;
 import java.util.List;
 import java.util.Map;
@@ -27,6 +29,7 @@ import org.apache.pinot.core.operator.blocks.ValueBlock;
 import org.apache.pinot.core.operator.transform.TransformResultMetadata;
 import org.apache.pinot.segment.spi.index.reader.JsonIndexReader;
 import org.apache.pinot.spi.data.FieldSpec.DataType;
+import org.apache.pinot.spi.utils.JsonUtils;
 import org.roaringbitmap.RoaringBitmap;
 
 
@@ -101,7 +104,24 @@ public class JsonExtractIndexTransformFunction extends 
BaseTransformFunction {
       if (!(fourthArgument instanceof LiteralTransformFunction)) {
         throw new IllegalArgumentException("Default value must be a literal");
       }
-      _defaultValue = dataType.convert(((LiteralTransformFunction) 
fourthArgument).getStringLiteral());
+
+      if (isSingleValue) {
+        _defaultValue = dataType.convert(((LiteralTransformFunction) 
fourthArgument).getStringLiteral());
+      } else {
+        try {
+          JsonNode mvArray = 
JsonUtils.stringToJsonNode(((LiteralTransformFunction) 
fourthArgument).getStringLiteral());
+          if (!mvArray.isArray()) {
+            throw new IllegalArgumentException("Default value must be a valid 
JSON array");
+          }
+          Object[] defaultValues = new Object[mvArray.size()];
+          for (int i = 0; i < mvArray.size(); i++) {
+            defaultValues[i] = dataType.convert(mvArray.get(i).asText());
+          }
+          _defaultValue = defaultValues;
+        } catch (IOException e) {
+          throw new IllegalArgumentException("Default value must be a valid 
JSON array");
+        }
+      }
     }
 
     String filterJsonPath = null;
@@ -267,6 +287,17 @@ public class JsonExtractIndexTransformFunction extends 
BaseTransformFunction {
 
     for (int i = 0; i < numDocs; i++) {
       String[] value = valuesFromIndex[i];
+      if (value.length == 0) {
+        if (_defaultValue != null) {
+          _intValuesMV[i] = new int[((Object[]) (_defaultValue)).length];
+          for (int j = 0; j < _intValuesMV[i].length; j++) {
+            _intValuesMV[i][j] = (int) ((Object[]) _defaultValue)[j];
+          }
+          continue;
+        }
+        throw new RuntimeException(
+            String.format("Illegal Json Path: [%s], for docId [%s]", 
_jsonPathString, valueBlock.getDocIds()[i]));
+      }
       _intValuesMV[i] = new int[value.length];
       for (int j = 0; j < value.length; j++) {
         _intValuesMV[i][j] = Integer.parseInt(value[j]);
@@ -283,6 +314,17 @@ public class JsonExtractIndexTransformFunction extends 
BaseTransformFunction {
         _valueToMatchingDocsMap);
     for (int i = 0; i < numDocs; i++) {
       String[] value = valuesFromIndex[i];
+      if (value.length == 0) {
+        if (_defaultValue != null) {
+          _longValuesMV[i] = new long[((Object[]) (_defaultValue)).length];
+          for (int j = 0; j < _longValuesMV[i].length; j++) {
+            _longValuesMV[i][j] = (long) ((Object[]) _defaultValue)[j];
+          }
+          continue;
+        }
+        throw new RuntimeException(
+            String.format("Illegal Json Path: [%s], for docId [%s]", 
_jsonPathString, valueBlock.getDocIds()[i]));
+      }
       _longValuesMV[i] = new long[value.length];
       for (int j = 0; j < value.length; j++) {
         _longValuesMV[i][j] = Long.parseLong(value[j]);
@@ -299,6 +341,17 @@ public class JsonExtractIndexTransformFunction extends 
BaseTransformFunction {
         _valueToMatchingDocsMap);
     for (int i = 0; i < numDocs; i++) {
       String[] value = valuesFromIndex[i];
+      if (value.length == 0) {
+        if (_defaultValue != null) {
+          _floatValuesMV[i] = new float[((Object[]) (_defaultValue)).length];
+          for (int j = 0; j < _floatValuesMV[i].length; j++) {
+            _floatValuesMV[i][j] = (float) ((Object[]) _defaultValue)[j];
+          }
+          continue;
+        }
+        throw new RuntimeException(
+            String.format("Illegal Json Path: [%s], for docId [%s]", 
_jsonPathString, valueBlock.getDocIds()[i]));
+      }
       _floatValuesMV[i] = new float[value.length];
       for (int j = 0; j < value.length; j++) {
         _floatValuesMV[i][j] = Float.parseFloat(value[j]);
@@ -315,6 +368,17 @@ public class JsonExtractIndexTransformFunction extends 
BaseTransformFunction {
         _valueToMatchingDocsMap);
     for (int i = 0; i < numDocs; i++) {
       String[] value = valuesFromIndex[i];
+      if (value.length == 0) {
+        if (_defaultValue != null) {
+          _doubleValuesMV[i] = new double[((Object[]) (_defaultValue)).length];
+          for (int j = 0; j < _doubleValuesMV[i].length; j++) {
+            _doubleValuesMV[i][j] = (double) ((Object[]) _defaultValue)[j];
+          }
+          continue;
+        }
+        throw new RuntimeException(
+            String.format("Illegal Json Path: [%s], for docId [%s]", 
_jsonPathString, valueBlock.getDocIds()[i]));
+      }
       _doubleValuesMV[i] = new double[value.length];
       for (int j = 0; j < value.length; j++) {
         _doubleValuesMV[i][j] = Double.parseDouble(value[j]);
@@ -331,6 +395,17 @@ public class JsonExtractIndexTransformFunction extends 
BaseTransformFunction {
         _valueToMatchingDocsMap);
     for (int i = 0; i < numDocs; i++) {
       String[] value = valuesFromIndex[i];
+      if (value.length == 0) {
+        if (_defaultValue != null) {
+          _stringValuesMV[i] = new String[((Object[]) (_defaultValue)).length];
+          for (int j = 0; j < _stringValuesMV[i].length; j++) {
+            _stringValuesMV[i][j] = (String) ((Object[]) _defaultValue)[j];
+          }
+          continue;
+        }
+        throw new RuntimeException(
+            String.format("Illegal Json Path: [%s], for docId [%s]", 
_jsonPathString, valueBlock.getDocIds()[i]));
+      }
       _stringValuesMV[i] = new String[value.length];
       System.arraycopy(value, 0, _stringValuesMV[i], 0, value.length);
     }
diff --git 
a/pinot-core/src/test/java/org/apache/pinot/core/operator/transform/function/JsonExtractIndexTransformFunctionTest.java
 
b/pinot-core/src/test/java/org/apache/pinot/core/operator/transform/function/JsonExtractIndexTransformFunctionTest.java
index d2cd792107..7fcfb31b53 100644
--- 
a/pinot-core/src/test/java/org/apache/pinot/core/operator/transform/function/JsonExtractIndexTransformFunctionTest.java
+++ 
b/pinot-core/src/test/java/org/apache/pinot/core/operator/transform/function/JsonExtractIndexTransformFunctionTest.java
@@ -251,7 +251,7 @@ public class JsonExtractIndexTransformFunctionTest extends 
BaseTransformFunction
     // MV with filters
     testArguments.add(new Object[]{
         String.format(
-            "jsonExtractIndex(%s,'%s','INT_ARRAY', '0', 
'REGEXP_LIKE(\"$.arrayField[*].arrStringField\", ''.*y.*'')')",
+            "jsonExtractIndex(%s,'%s','INT_ARRAY', '[]', 
'REGEXP_LIKE(\"$.arrayField[*].arrStringField\", ''.*y.*'')')",
             JSON_STRING_SV_COLUMN,
             "$.arrayField[*].arrIntField"), "$.arrayField[?(@.arrStringField 
=~ /.*y.*/)].arrIntField", DataType.INT,
         false
@@ -259,7 +259,7 @@ public class JsonExtractIndexTransformFunctionTest extends 
BaseTransformFunction
 
     testArguments.add(new Object[]{
         String.format(
-            "jsonExtractIndex(%s,'%s','STRING_ARRAY', '0', 
'\"$.arrayField[*].arrIntField\" > 2')",
+            "jsonExtractIndex(%s,'%s','STRING_ARRAY', '[]', 
'\"$.arrayField[*].arrIntField\" > 2')",
             JSON_STRING_SV_COLUMN,
             "$.arrayField[*].arrStringField"), "$.arrayField[?(@.arrIntField > 
2)].arrStringField", DataType.STRING,
         false
@@ -268,7 +268,7 @@ public class JsonExtractIndexTransformFunctionTest extends 
BaseTransformFunction
 
   @Test(dataProvider = "testJsonExtractIndexDefaultValue")
   public void testJsonExtractIndexDefaultValue(String expressionStr, String 
jsonPathString, DataType resultsDataType,
-      boolean isSingleValue) {
+      boolean isSingleValue, Object expectedDefaultValue) {
     ExpressionContext expression = 
RequestContextUtils.getExpression(expressionStr);
     TransformFunction transformFunction = 
TransformFunctionFactory.get(expression, _dataSourceMap);
     Assert.assertTrue(transformFunction instanceof 
JsonExtractIndexTransformFunction);
@@ -281,37 +281,72 @@ public class JsonExtractIndexTransformFunctionTest 
extends BaseTransformFunction
         case INT:
           int[] intValues = 
transformFunction.transformToIntValuesSV(_projectionBlock);
           for (int i = 0; i < NUM_ROWS; i++) {
-            Assert.assertEquals(intValues[i], 0);
+            Assert.assertEquals(intValues[i], expectedDefaultValue);
           }
           break;
         case LONG:
           long[] longValues = 
transformFunction.transformToLongValuesSV(_projectionBlock);
           for (int i = 0; i < NUM_ROWS; i++) {
-            Assert.assertEquals(longValues[i], 0L);
+            Assert.assertEquals(longValues[i], expectedDefaultValue);
           }
           break;
         case FLOAT:
           float[] floatValues = 
transformFunction.transformToFloatValuesSV(_projectionBlock);
           for (int i = 0; i < NUM_ROWS; i++) {
-            Assert.assertEquals(floatValues[i], 0f);
+            Assert.assertEquals(floatValues[i], expectedDefaultValue);
           }
           break;
         case DOUBLE:
           double[] doubleValues = 
transformFunction.transformToDoubleValuesSV(_projectionBlock);
           for (int i = 0; i < NUM_ROWS; i++) {
-            Assert.assertEquals(doubleValues[i], 0d);
+            Assert.assertEquals(doubleValues[i], expectedDefaultValue);
           }
           break;
         case BIG_DECIMAL:
           BigDecimal[] bigDecimalValues = 
transformFunction.transformToBigDecimalValuesSV(_projectionBlock);
           for (int i = 0; i < NUM_ROWS; i++) {
-            Assert.assertEquals(bigDecimalValues[i], BigDecimal.ZERO);
+            Assert.assertEquals(bigDecimalValues[i], expectedDefaultValue);
           }
           break;
         case STRING:
           String[] stringValues = 
transformFunction.transformToStringValuesSV(_projectionBlock);
           for (int i = 0; i < NUM_ROWS; i++) {
-            Assert.assertEquals(stringValues[i], "null");
+            Assert.assertEquals(stringValues[i], expectedDefaultValue);
+          }
+          break;
+        default:
+          throw new UnsupportedOperationException("Not support data type - " + 
resultsDataType);
+      }
+    } else {
+      switch (resultsDataType) {
+        case INT:
+          int[][] intValues = 
transformFunction.transformToIntValuesMV(_projectionBlock);
+          for (int i = 0; i < NUM_ROWS; i++) {
+            Assert.assertEquals(intValues[i], expectedDefaultValue);
+          }
+          break;
+        case LONG:
+          long[][] longValues = 
transformFunction.transformToLongValuesMV(_projectionBlock);
+          for (int i = 0; i < NUM_ROWS; i++) {
+            Assert.assertEquals(longValues[i], expectedDefaultValue);
+          }
+          break;
+        case FLOAT:
+          float[][] floatValues = 
transformFunction.transformToFloatValuesMV(_projectionBlock);
+          for (int i = 0; i < NUM_ROWS; i++) {
+            Assert.assertEquals(floatValues[i], expectedDefaultValue);
+          }
+          break;
+        case DOUBLE:
+          double[][] doubleValues = 
transformFunction.transformToDoubleValuesMV(_projectionBlock);
+          for (int i = 0; i < NUM_ROWS; i++) {
+            Assert.assertEquals(doubleValues[i], expectedDefaultValue);
+          }
+          break;
+        case STRING:
+          String[][] stringValues = 
transformFunction.transformToStringValuesMV(_projectionBlock);
+          for (int i = 0; i < NUM_ROWS; i++) {
+            Assert.assertEquals(stringValues[i], expectedDefaultValue);
           }
           break;
         default:
@@ -326,31 +361,56 @@ public class JsonExtractIndexTransformFunctionTest 
extends BaseTransformFunction
     // With default value
     testArguments.add(new Object[]{
         String.format("jsonExtractIndex(%s,'%s','INT',0)", 
JSON_STRING_SV_COLUMN,
-            "$.noField"), "$.noField", DataType.INT, true
+            "$.noField"), "$.noField", DataType.INT, true, 0
     });
     testArguments.add(new Object[]{
         String.format("jsonExtractIndex(%s,'%s','LONG',0)", 
JSON_STRING_SV_COLUMN,
-            "$.noField"), "$.noField", DataType.LONG, true
+            "$.noField"), "$.noField", DataType.LONG, true, 0L
     });
     testArguments.add(new Object[]{
         String.format("jsonExtractIndex(%s,'%s','FLOAT',0)", 
JSON_STRING_SV_COLUMN,
-            "$.noField"), "$.noField", DataType.FLOAT, true
+            "$.noField"), "$.noField", DataType.FLOAT, true, (float) 0
     });
     testArguments.add(new Object[]{
         String.format("jsonExtractIndex(%s,'%s','DOUBLE',0)", 
JSON_STRING_SV_COLUMN,
-            "$.noField"), "$.noField", DataType.DOUBLE, true
+            "$.noField"), "$.noField", DataType.DOUBLE, true, (double) 0
     });
     testArguments.add(new Object[]{
         String.format("jsonExtractIndex(%s,'%s','BIG_DECIMAL',0)", 
JSON_STRING_SV_COLUMN,
-            "$.noField"), "$.noField", DataType.BIG_DECIMAL, true
+            "$.noField"), "$.noField", DataType.BIG_DECIMAL, true, new 
BigDecimal(0)
     });
     testArguments.add(new Object[]{
         String.format("jsonExtractIndex(%s,'%s','STRING','null')", 
JSON_STRING_SV_COLUMN,
-            "$.noField"), "$.noField", DataType.STRING, true
+            "$.noField"), "$.noField", DataType.STRING, true, "null"
     });
+    addMvDefaultValueTests(testArguments);
     return testArguments.toArray(new Object[0][]);
   }
 
+  private void addMvDefaultValueTests(List<Object[]> testArguments) {
+    testArguments.add(new Object[]{
+        String.format("jsonExtractIndex(%s,'%s','INT_ARRAY', '%s')", 
JSON_STRING_SV_COLUMN, "$.noField",
+            "[1, 2, 3]"), "$.noField", DataType.INT, false, new Integer[]{1, 
2, 3}
+    });
+    testArguments.add(new Object[]{
+        String.format("jsonExtractIndex(%s,'%s','LONG_ARRAY', '%s')", 
JSON_STRING_SV_COLUMN, "$.noField",
+            "[1, 5, 6]"), "$.noField", DataType.LONG, false, new Long[]{1L, 
5L, 6L}
+    });
+    testArguments.add(new Object[]{
+        String.format("jsonExtractIndex(%s,'%s','FLOAT_ARRAY', '%s')", 
JSON_STRING_SV_COLUMN, "$.noField",
+            "[1.2, 3.1, 1.6]"), "$.noField", DataType.FLOAT, false, new 
Float[]{1.2f, 3.1f, 1.6f}
+    });
+    testArguments.add(new Object[]{
+        String.format("jsonExtractIndex(%s,'%s','DOUBLE_ARRAY', '%s')", 
JSON_STRING_SV_COLUMN, "$.noField",
+            "[1.5, 3.4, 1.6]"), "$.noField", DataType.DOUBLE, false, new 
Double[]{1.5d, 3.4d, 1.6d}
+    });
+    testArguments.add(new Object[]{
+        String.format("jsonExtractIndex(%s,'%s','STRING_ARRAY', '%s')", 
JSON_STRING_SV_COLUMN, "$.noField",
+            "[\"randomString1\", \"randomString2\"]"), "$.noField", 
DataType.STRING, false,
+        new String[]{"randomString1", "randomString2"}
+    });
+  }
+
   // get value for key, excluding nested
   private String getValueForKey(String blob, JsonPath path) {
     Object out = JSON_PARSER_CONTEXT.parse(blob).read(path);


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org
For additional commands, e-mail: commits-h...@pinot.apache.org

Reply via email to