This is an automated email from the ASF dual-hosted git repository.

jackie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
     new c2c887a  allow to extract values from array of objects with 
jsonPathArray (#7208)
c2c887a is described below

commit c2c887afe2445d61f97130e82b6fcaa5019d5174
Author: Xiaobing <61892277+klsi...@users.noreply.github.com>
AuthorDate: Wed Jul 28 12:33:58 2021 -0700

    allow to extract values from array of objects with jsonPathArray (#7208)
    
    Allow to extract values from array of objects with jsonPathArray.
    And add jsonPathArrayDefaultEmpty to allow skip json records without target 
path, instead of aborting the ingestion.
---
 .../common/function/scalar/JsonFunctions.java      |  48 ++++++++-
 .../pinot/common/function/JsonFunctionsTest.java   |  78 ++++++++++++++
 .../scalar/ArrayAwareJacksonJsonProviderTest.java  | 114 +++++++++++++++++++++
 3 files changed, 236 insertions(+), 4 deletions(-)

diff --git 
a/pinot-common/src/main/java/org/apache/pinot/common/function/scalar/JsonFunctions.java
 
b/pinot-common/src/main/java/org/apache/pinot/common/function/scalar/JsonFunctions.java
index 37027ff..82536d6 100644
--- 
a/pinot-common/src/main/java/org/apache/pinot/common/function/scalar/JsonFunctions.java
+++ 
b/pinot-common/src/main/java/org/apache/pinot/common/function/scalar/JsonFunctions.java
@@ -19,6 +19,7 @@
 package org.apache.pinot.common.function.scalar;
 
 import com.fasterxml.jackson.core.JsonProcessingException;
+import com.google.common.annotations.VisibleForTesting;
 import com.jayway.jsonpath.Configuration;
 import com.jayway.jsonpath.JsonPath;
 import com.jayway.jsonpath.Option;
@@ -26,6 +27,7 @@ import com.jayway.jsonpath.spi.json.JacksonJsonProvider;
 import com.jayway.jsonpath.spi.json.JsonProvider;
 import com.jayway.jsonpath.spi.mapper.JacksonMappingProvider;
 import com.jayway.jsonpath.spi.mapper.MappingProvider;
+import java.util.Arrays;
 import java.util.EnumSet;
 import java.util.List;
 import java.util.Map;
@@ -48,7 +50,7 @@ import org.apache.pinot.spi.utils.JsonUtils;
 public class JsonFunctions {
   static {
     Configuration.setDefaults(new Configuration.Defaults() {
-      private final JsonProvider jsonProvider = new JacksonJsonProvider();
+      private final JsonProvider jsonProvider = new 
ArrayAwareJacksonJsonProvider();
       private final MappingProvider mappingProvider = new 
JacksonMappingProvider();
 
       @Override
@@ -109,12 +111,18 @@ public class JsonFunctions {
     if (object instanceof String) {
       return convertObjectToArray(JsonPath.read((String) object, jsonPath));
     }
-    if (object instanceof Object[]) {
-      return 
convertObjectToArray(JsonPath.read(JsonUtils.objectToString(object), jsonPath));
-    }
     return convertObjectToArray(JsonPath.read(object, jsonPath));
   }
 
+  @ScalarFunction
+  public static Object[] jsonPathArrayDefaultEmpty(Object object, String 
jsonPath) {
+    try {
+      return jsonPathArray(object, jsonPath);
+    } catch (Exception e) {
+      return new Object[0];
+    }
+  }
+
   private static Object[] convertObjectToArray(Object arrayObject) {
     if (arrayObject instanceof List) {
       return ((List) arrayObject).toArray();
@@ -199,4 +207,36 @@ public class JsonFunctions {
       return defaultValue;
     }
   }
+
+  @VisibleForTesting
+  static class ArrayAwareJacksonJsonProvider extends JacksonJsonProvider {
+    @Override
+    public boolean isArray(Object obj) {
+      return (obj instanceof List) || (obj instanceof Object[]);
+    }
+
+    @Override
+    public Object getArrayIndex(Object obj, int idx) {
+      if (obj instanceof Object[]) {
+        return ((Object[]) obj)[idx];
+      }
+      return super.getArrayIndex(obj, idx);
+    }
+
+    @Override
+    public int length(Object obj) {
+      if (obj instanceof Object[]) {
+        return ((Object[]) obj).length;
+      }
+      return super.length(obj);
+    }
+
+    @Override
+    public Iterable<?> toIterable(Object obj) {
+      if (obj instanceof Object[]) {
+        return Arrays.asList((Object[]) obj);
+      }
+      return super.toIterable(obj);
+    }
+  }
 }
diff --git 
a/pinot-common/src/test/java/org/apache/pinot/common/function/JsonFunctionsTest.java
 
b/pinot-common/src/test/java/org/apache/pinot/common/function/JsonFunctionsTest.java
index cbfe2e4..47f56d5 100644
--- 
a/pinot-common/src/test/java/org/apache/pinot/common/function/JsonFunctionsTest.java
+++ 
b/pinot-common/src/test/java/org/apache/pinot/common/function/JsonFunctionsTest.java
@@ -19,7 +19,9 @@
 package org.apache.pinot.common.function;
 
 import com.fasterxml.jackson.core.JsonProcessingException;
+import com.google.common.collect.ImmutableList;
 import com.google.common.collect.ImmutableMap;
+import com.jayway.jsonpath.PathNotFoundException;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.List;
@@ -28,6 +30,7 @@ import org.apache.pinot.common.function.scalar.JsonFunctions;
 import org.testng.annotations.Test;
 
 import static org.testng.Assert.assertEquals;
+import static org.testng.Assert.fail;
 
 
 public class JsonFunctionsTest {
@@ -95,6 +98,81 @@ public class JsonFunctionsTest {
   }
 
   @Test
+  public void testJsonFunctionExtractingArrayWithMissingField()
+      throws JsonProcessingException {
+    String jsonString = "{\"name\": \"Pete\", \"age\": 24}";
+
+    try {
+      assertEquals(JsonFunctions.jsonPathArray(jsonString, 
"$.subjects[*].name"), new String[]{});
+      fail();
+    } catch (PathNotFoundException e) {
+      assertEquals(e.getMessage(), "Missing property in path $['subjects']");
+    }
+    assertEquals(JsonFunctions.jsonPathArrayDefaultEmpty(jsonString, 
"$.subjects[*].name"), new String[]{});
+    assertEquals(JsonFunctions.jsonPathArrayDefaultEmpty(jsonString, 
"$.subjects[*].grade"), new String[]{});
+    assertEquals(JsonFunctions.jsonPathArrayDefaultEmpty(jsonString, 
"$.subjects[*].homework_grades"), new Object[]{});
+
+    // jsonPathArrayDefaultEmpty should work fine with existing fields.
+    jsonString = "{\n" +
+        "    \"name\": \"Pete\",\n" +
+        "    \"age\": 24,\n" +
+        "    \"subjects\": [\n" +
+        "        {\n" +
+        "            \"name\": \"maths\",\n" +
+        "            \"homework_grades\": [80, 85, 90, 95, 100],\n" +
+        "            \"grade\": \"A\"\n" +
+        "        },\n" +
+        "        {\n" +
+        "            \"name\": \"english\",\n" +
+        "            \"homework_grades\": [60, 65, 70, 85, 90],\n" +
+        "            \"grade\": \"B\"\n" +
+        "        }\n" +
+        "    ]\n" +
+        "}";
+    assertEquals(JsonFunctions.jsonPathArrayDefaultEmpty(jsonString, 
"$.subjects[*].name"), new String[]{"maths", "english"});
+    assertEquals(JsonFunctions.jsonPathArrayDefaultEmpty(jsonString, 
"$.subjects[*].grade"), new String[]{"A", "B"});
+    assertEquals(JsonFunctions.jsonPathArrayDefaultEmpty(jsonString, 
"$.subjects[*].homework_grades"),
+        new Object[]{Arrays.asList(80, 85, 90, 95, 100), Arrays.asList(60, 65, 
70, 85, 90)});
+  }
+
+  @Test
+  public void testJsonFunctionExtractingArrayWithObjectArray()
+      throws JsonProcessingException {
+    // ImmutableList works fine with JsonPath with default 
JacksonJsonProvider. But on ingestion
+    // path, JSONRecordExtractor converts all Collections in parsed JSON 
object to Object[].
+    // Object[] doesn't work with default JsonPath, where "$.commits[*].sha" 
would return empty,
+    // and "$.commits[1].sha" led to exception `Filter: [1]['sha'] can only be 
applied to arrays`.
+    // Those failure could be reproduced by using the default 
JacksonJsonProvider for JsonPath.
+    Map<String, Object> rawData = ImmutableMap.of("commits",
+        ImmutableList.of(ImmutableMap.of("sha", 123, "name", "k"), 
ImmutableMap.of("sha", 456, "name", "j")));
+    assertEquals(JsonFunctions.jsonPathArray(rawData, "$.commits[*].sha"), new 
Integer[]{123, 456});
+    assertEquals(JsonFunctions.jsonPathArray(rawData, "$.commits[1].sha"), new 
Integer[]{456});
+
+    // ArrayAwareJacksonJsonProvider should fix this issue.
+    rawData = ImmutableMap.of("commits",
+        new Object[]{ImmutableMap.of("sha", 123, "name", "k"), 
ImmutableMap.of("sha", 456, "name", "j")});
+    assertEquals(JsonFunctions.jsonPathArray(rawData, "$.commits[*].sha"), new 
Integer[]{123, 456});
+    assertEquals(JsonFunctions.jsonPathArray(rawData, "$.commits[1].sha"), new 
Integer[]{456});
+  }
+
+  @Test
+  public void testJsonFunctionExtractingArrayWithTopLevelObjectArray()
+      throws JsonProcessingException {
+    // JSON formatted string works fine with JsonPath, and we used to 
serialize Object[]
+    // to JSON formatted string for JsonPath to work.
+    String rawDataInStr = "[{\"sha\": 123, \"name\": \"k\"}, {\"sha\": 456, 
\"name\": \"j\"}]";
+    assertEquals(JsonFunctions.jsonPathArray(rawDataInStr, "$.[*].sha"), new 
Integer[]{123, 456});
+    assertEquals(JsonFunctions.jsonPathArray(rawDataInStr, "$.[1].sha"), new 
Integer[]{456});
+
+    // ArrayAwareJacksonJsonProvider can work with Array directly, thus no 
need to serialize
+    // Object[] any more.
+    Object[] rawDataInAry =
+        new Object[]{ImmutableMap.of("sha", 123, "name", "kk"), 
ImmutableMap.of("sha", 456, "name", "jj")};
+    assertEquals(JsonFunctions.jsonPathArray(rawDataInAry, "$.[*].sha"), new 
Integer[]{123, 456});
+    assertEquals(JsonFunctions.jsonPathArray(rawDataInAry, "$.[1].sha"), new 
Integer[]{456});
+  }
+
+  @Test
   public void testJsonFunctionOnJsonArray()
       throws JsonProcessingException {
     String jsonArrayString =
diff --git 
a/pinot-common/src/test/java/org/apache/pinot/common/function/scalar/ArrayAwareJacksonJsonProviderTest.java
 
b/pinot-common/src/test/java/org/apache/pinot/common/function/scalar/ArrayAwareJacksonJsonProviderTest.java
new file mode 100644
index 0000000..3f4b71e
--- /dev/null
+++ 
b/pinot-common/src/test/java/org/apache/pinot/common/function/scalar/ArrayAwareJacksonJsonProviderTest.java
@@ -0,0 +1,114 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.common.function.scalar;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import org.testng.annotations.Test;
+
+import static org.testng.Assert.*;
+
+
+public class ArrayAwareJacksonJsonProviderTest {
+  @Test
+  public void testIsArray() {
+    JsonFunctions.ArrayAwareJacksonJsonProvider jp = new 
JsonFunctions.ArrayAwareJacksonJsonProvider();
+
+    assertFalse(jp.isArray(null));
+    assertTrue(jp.isArray(new Object[0]));
+    assertTrue(jp.isArray(new ArrayList<>(0)));
+    assertFalse(jp.isArray("I'm a list"));
+    assertFalse(jp.isArray(new HashMap<String, String>()));
+  }
+
+  @Test
+  public void testArrayLength() {
+    JsonFunctions.ArrayAwareJacksonJsonProvider jp = new 
JsonFunctions.ArrayAwareJacksonJsonProvider();
+
+    Object[] dataInAry = new Object[]{"123", "456"};
+    assertEquals(jp.length(dataInAry), 2);
+
+    List<Object> dataInList = Arrays.asList("abc", "efg", "hij");
+    assertEquals(jp.length(dataInList), 3);
+
+    try {
+      jp.length(null);
+      fail();
+    } catch (NullPointerException e) {
+      // It's supposed to get a JsonPathException, but JsonPath library 
actually
+      // has a bug leading to NullPointerException while creating the 
JsonPathException.
+      assertNull(e.getMessage());
+    }
+  }
+
+  @Test
+  public void testGetArrayIndex() {
+    JsonFunctions.ArrayAwareJacksonJsonProvider jp = new 
JsonFunctions.ArrayAwareJacksonJsonProvider();
+
+    Object[] dataInAry = new Object[]{"123", "456"};
+    for (int i = 0; i < dataInAry.length; i++) {
+      assertEquals(jp.getArrayIndex(dataInAry, i), dataInAry[i]);
+    }
+    try {
+      jp.getArrayIndex(dataInAry, dataInAry.length);
+      fail();
+    } catch (IndexOutOfBoundsException e) {
+      assertEquals(e.getMessage(), "Index 2 out of bounds for length 2");
+    }
+
+    List<Object> dataInList = Arrays.asList("abc", "efg", "hij");
+    for (int i = 0; i < dataInList.size(); i++) {
+      assertEquals(jp.getArrayIndex(dataInList, i), dataInList.get(i));
+    }
+    try {
+      jp.getArrayIndex(dataInList, dataInList.size());
+      fail();
+    } catch (IndexOutOfBoundsException e) {
+      assertEquals(e.getMessage(), "Index 3 out of bounds for length 3");
+    }
+  }
+
+  @Test
+  public void testToIterable() {
+    JsonFunctions.ArrayAwareJacksonJsonProvider jp = new 
JsonFunctions.ArrayAwareJacksonJsonProvider();
+
+    Object[] dataInAry = new Object[]{"123", "456"};
+    int idx = 0;
+    for (Object v : jp.toIterable(dataInAry)) {
+      assertEquals(v, dataInAry[idx++]);
+    }
+
+    List<Object> dataInList = Arrays.asList("abc", "efg", "hij");
+    idx = 0;
+    for (Object v : jp.toIterable(dataInList)) {
+      assertEquals(v, dataInList.get(idx++));
+    }
+
+    try {
+      jp.toIterable(null);
+      fail();
+    } catch (NullPointerException e) {
+      // It's supposed to get a JsonPathException, but JsonPath library 
actually
+      // has a bug leading to NullPointerException while creating the 
JsonPathException.
+      assertNull(e.getMessage());
+    }
+  }
+}

---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org
For additional commands, e-mail: commits-h...@pinot.apache.org

Reply via email to