This is an automated email from the ASF dual-hosted git repository. jackie pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/pinot.git
The following commit(s) were added to refs/heads/master by this push: new c2c887a allow to extract values from array of objects with jsonPathArray (#7208) c2c887a is described below commit c2c887afe2445d61f97130e82b6fcaa5019d5174 Author: Xiaobing <61892277+klsi...@users.noreply.github.com> AuthorDate: Wed Jul 28 12:33:58 2021 -0700 allow to extract values from array of objects with jsonPathArray (#7208) Allow to extract values from array of objects with jsonPathArray. And add jsonPathArrayDefaultEmpty to allow skip json records without target path, instead of aborting the ingestion. --- .../common/function/scalar/JsonFunctions.java | 48 ++++++++- .../pinot/common/function/JsonFunctionsTest.java | 78 ++++++++++++++ .../scalar/ArrayAwareJacksonJsonProviderTest.java | 114 +++++++++++++++++++++ 3 files changed, 236 insertions(+), 4 deletions(-) diff --git a/pinot-common/src/main/java/org/apache/pinot/common/function/scalar/JsonFunctions.java b/pinot-common/src/main/java/org/apache/pinot/common/function/scalar/JsonFunctions.java index 37027ff..82536d6 100644 --- a/pinot-common/src/main/java/org/apache/pinot/common/function/scalar/JsonFunctions.java +++ b/pinot-common/src/main/java/org/apache/pinot/common/function/scalar/JsonFunctions.java @@ -19,6 +19,7 @@ package org.apache.pinot.common.function.scalar; import com.fasterxml.jackson.core.JsonProcessingException; +import com.google.common.annotations.VisibleForTesting; import com.jayway.jsonpath.Configuration; import com.jayway.jsonpath.JsonPath; import com.jayway.jsonpath.Option; @@ -26,6 +27,7 @@ import com.jayway.jsonpath.spi.json.JacksonJsonProvider; import com.jayway.jsonpath.spi.json.JsonProvider; import com.jayway.jsonpath.spi.mapper.JacksonMappingProvider; import com.jayway.jsonpath.spi.mapper.MappingProvider; +import java.util.Arrays; import java.util.EnumSet; import java.util.List; import java.util.Map; @@ -48,7 +50,7 @@ import org.apache.pinot.spi.utils.JsonUtils; public class JsonFunctions { static { Configuration.setDefaults(new Configuration.Defaults() { - private final JsonProvider jsonProvider = new JacksonJsonProvider(); + private final JsonProvider jsonProvider = new ArrayAwareJacksonJsonProvider(); private final MappingProvider mappingProvider = new JacksonMappingProvider(); @Override @@ -109,12 +111,18 @@ public class JsonFunctions { if (object instanceof String) { return convertObjectToArray(JsonPath.read((String) object, jsonPath)); } - if (object instanceof Object[]) { - return convertObjectToArray(JsonPath.read(JsonUtils.objectToString(object), jsonPath)); - } return convertObjectToArray(JsonPath.read(object, jsonPath)); } + @ScalarFunction + public static Object[] jsonPathArrayDefaultEmpty(Object object, String jsonPath) { + try { + return jsonPathArray(object, jsonPath); + } catch (Exception e) { + return new Object[0]; + } + } + private static Object[] convertObjectToArray(Object arrayObject) { if (arrayObject instanceof List) { return ((List) arrayObject).toArray(); @@ -199,4 +207,36 @@ public class JsonFunctions { return defaultValue; } } + + @VisibleForTesting + static class ArrayAwareJacksonJsonProvider extends JacksonJsonProvider { + @Override + public boolean isArray(Object obj) { + return (obj instanceof List) || (obj instanceof Object[]); + } + + @Override + public Object getArrayIndex(Object obj, int idx) { + if (obj instanceof Object[]) { + return ((Object[]) obj)[idx]; + } + return super.getArrayIndex(obj, idx); + } + + @Override + public int length(Object obj) { + if (obj instanceof Object[]) { + return ((Object[]) obj).length; + } + return super.length(obj); + } + + @Override + public Iterable<?> toIterable(Object obj) { + if (obj instanceof Object[]) { + return Arrays.asList((Object[]) obj); + } + return super.toIterable(obj); + } + } } diff --git a/pinot-common/src/test/java/org/apache/pinot/common/function/JsonFunctionsTest.java b/pinot-common/src/test/java/org/apache/pinot/common/function/JsonFunctionsTest.java index cbfe2e4..47f56d5 100644 --- a/pinot-common/src/test/java/org/apache/pinot/common/function/JsonFunctionsTest.java +++ b/pinot-common/src/test/java/org/apache/pinot/common/function/JsonFunctionsTest.java @@ -19,7 +19,9 @@ package org.apache.pinot.common.function; import com.fasterxml.jackson.core.JsonProcessingException; +import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; +import com.jayway.jsonpath.PathNotFoundException; import java.util.ArrayList; import java.util.Arrays; import java.util.List; @@ -28,6 +30,7 @@ import org.apache.pinot.common.function.scalar.JsonFunctions; import org.testng.annotations.Test; import static org.testng.Assert.assertEquals; +import static org.testng.Assert.fail; public class JsonFunctionsTest { @@ -95,6 +98,81 @@ public class JsonFunctionsTest { } @Test + public void testJsonFunctionExtractingArrayWithMissingField() + throws JsonProcessingException { + String jsonString = "{\"name\": \"Pete\", \"age\": 24}"; + + try { + assertEquals(JsonFunctions.jsonPathArray(jsonString, "$.subjects[*].name"), new String[]{}); + fail(); + } catch (PathNotFoundException e) { + assertEquals(e.getMessage(), "Missing property in path $['subjects']"); + } + assertEquals(JsonFunctions.jsonPathArrayDefaultEmpty(jsonString, "$.subjects[*].name"), new String[]{}); + assertEquals(JsonFunctions.jsonPathArrayDefaultEmpty(jsonString, "$.subjects[*].grade"), new String[]{}); + assertEquals(JsonFunctions.jsonPathArrayDefaultEmpty(jsonString, "$.subjects[*].homework_grades"), new Object[]{}); + + // jsonPathArrayDefaultEmpty should work fine with existing fields. + jsonString = "{\n" + + " \"name\": \"Pete\",\n" + + " \"age\": 24,\n" + + " \"subjects\": [\n" + + " {\n" + + " \"name\": \"maths\",\n" + + " \"homework_grades\": [80, 85, 90, 95, 100],\n" + + " \"grade\": \"A\"\n" + + " },\n" + + " {\n" + + " \"name\": \"english\",\n" + + " \"homework_grades\": [60, 65, 70, 85, 90],\n" + + " \"grade\": \"B\"\n" + + " }\n" + + " ]\n" + + "}"; + assertEquals(JsonFunctions.jsonPathArrayDefaultEmpty(jsonString, "$.subjects[*].name"), new String[]{"maths", "english"}); + assertEquals(JsonFunctions.jsonPathArrayDefaultEmpty(jsonString, "$.subjects[*].grade"), new String[]{"A", "B"}); + assertEquals(JsonFunctions.jsonPathArrayDefaultEmpty(jsonString, "$.subjects[*].homework_grades"), + new Object[]{Arrays.asList(80, 85, 90, 95, 100), Arrays.asList(60, 65, 70, 85, 90)}); + } + + @Test + public void testJsonFunctionExtractingArrayWithObjectArray() + throws JsonProcessingException { + // ImmutableList works fine with JsonPath with default JacksonJsonProvider. But on ingestion + // path, JSONRecordExtractor converts all Collections in parsed JSON object to Object[]. + // Object[] doesn't work with default JsonPath, where "$.commits[*].sha" would return empty, + // and "$.commits[1].sha" led to exception `Filter: [1]['sha'] can only be applied to arrays`. + // Those failure could be reproduced by using the default JacksonJsonProvider for JsonPath. + Map<String, Object> rawData = ImmutableMap.of("commits", + ImmutableList.of(ImmutableMap.of("sha", 123, "name", "k"), ImmutableMap.of("sha", 456, "name", "j"))); + assertEquals(JsonFunctions.jsonPathArray(rawData, "$.commits[*].sha"), new Integer[]{123, 456}); + assertEquals(JsonFunctions.jsonPathArray(rawData, "$.commits[1].sha"), new Integer[]{456}); + + // ArrayAwareJacksonJsonProvider should fix this issue. + rawData = ImmutableMap.of("commits", + new Object[]{ImmutableMap.of("sha", 123, "name", "k"), ImmutableMap.of("sha", 456, "name", "j")}); + assertEquals(JsonFunctions.jsonPathArray(rawData, "$.commits[*].sha"), new Integer[]{123, 456}); + assertEquals(JsonFunctions.jsonPathArray(rawData, "$.commits[1].sha"), new Integer[]{456}); + } + + @Test + public void testJsonFunctionExtractingArrayWithTopLevelObjectArray() + throws JsonProcessingException { + // JSON formatted string works fine with JsonPath, and we used to serialize Object[] + // to JSON formatted string for JsonPath to work. + String rawDataInStr = "[{\"sha\": 123, \"name\": \"k\"}, {\"sha\": 456, \"name\": \"j\"}]"; + assertEquals(JsonFunctions.jsonPathArray(rawDataInStr, "$.[*].sha"), new Integer[]{123, 456}); + assertEquals(JsonFunctions.jsonPathArray(rawDataInStr, "$.[1].sha"), new Integer[]{456}); + + // ArrayAwareJacksonJsonProvider can work with Array directly, thus no need to serialize + // Object[] any more. + Object[] rawDataInAry = + new Object[]{ImmutableMap.of("sha", 123, "name", "kk"), ImmutableMap.of("sha", 456, "name", "jj")}; + assertEquals(JsonFunctions.jsonPathArray(rawDataInAry, "$.[*].sha"), new Integer[]{123, 456}); + assertEquals(JsonFunctions.jsonPathArray(rawDataInAry, "$.[1].sha"), new Integer[]{456}); + } + + @Test public void testJsonFunctionOnJsonArray() throws JsonProcessingException { String jsonArrayString = diff --git a/pinot-common/src/test/java/org/apache/pinot/common/function/scalar/ArrayAwareJacksonJsonProviderTest.java b/pinot-common/src/test/java/org/apache/pinot/common/function/scalar/ArrayAwareJacksonJsonProviderTest.java new file mode 100644 index 0000000..3f4b71e --- /dev/null +++ b/pinot-common/src/test/java/org/apache/pinot/common/function/scalar/ArrayAwareJacksonJsonProviderTest.java @@ -0,0 +1,114 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.pinot.common.function.scalar; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import org.testng.annotations.Test; + +import static org.testng.Assert.*; + + +public class ArrayAwareJacksonJsonProviderTest { + @Test + public void testIsArray() { + JsonFunctions.ArrayAwareJacksonJsonProvider jp = new JsonFunctions.ArrayAwareJacksonJsonProvider(); + + assertFalse(jp.isArray(null)); + assertTrue(jp.isArray(new Object[0])); + assertTrue(jp.isArray(new ArrayList<>(0))); + assertFalse(jp.isArray("I'm a list")); + assertFalse(jp.isArray(new HashMap<String, String>())); + } + + @Test + public void testArrayLength() { + JsonFunctions.ArrayAwareJacksonJsonProvider jp = new JsonFunctions.ArrayAwareJacksonJsonProvider(); + + Object[] dataInAry = new Object[]{"123", "456"}; + assertEquals(jp.length(dataInAry), 2); + + List<Object> dataInList = Arrays.asList("abc", "efg", "hij"); + assertEquals(jp.length(dataInList), 3); + + try { + jp.length(null); + fail(); + } catch (NullPointerException e) { + // It's supposed to get a JsonPathException, but JsonPath library actually + // has a bug leading to NullPointerException while creating the JsonPathException. + assertNull(e.getMessage()); + } + } + + @Test + public void testGetArrayIndex() { + JsonFunctions.ArrayAwareJacksonJsonProvider jp = new JsonFunctions.ArrayAwareJacksonJsonProvider(); + + Object[] dataInAry = new Object[]{"123", "456"}; + for (int i = 0; i < dataInAry.length; i++) { + assertEquals(jp.getArrayIndex(dataInAry, i), dataInAry[i]); + } + try { + jp.getArrayIndex(dataInAry, dataInAry.length); + fail(); + } catch (IndexOutOfBoundsException e) { + assertEquals(e.getMessage(), "Index 2 out of bounds for length 2"); + } + + List<Object> dataInList = Arrays.asList("abc", "efg", "hij"); + for (int i = 0; i < dataInList.size(); i++) { + assertEquals(jp.getArrayIndex(dataInList, i), dataInList.get(i)); + } + try { + jp.getArrayIndex(dataInList, dataInList.size()); + fail(); + } catch (IndexOutOfBoundsException e) { + assertEquals(e.getMessage(), "Index 3 out of bounds for length 3"); + } + } + + @Test + public void testToIterable() { + JsonFunctions.ArrayAwareJacksonJsonProvider jp = new JsonFunctions.ArrayAwareJacksonJsonProvider(); + + Object[] dataInAry = new Object[]{"123", "456"}; + int idx = 0; + for (Object v : jp.toIterable(dataInAry)) { + assertEquals(v, dataInAry[idx++]); + } + + List<Object> dataInList = Arrays.asList("abc", "efg", "hij"); + idx = 0; + for (Object v : jp.toIterable(dataInList)) { + assertEquals(v, dataInList.get(idx++)); + } + + try { + jp.toIterable(null); + fail(); + } catch (NullPointerException e) { + // It's supposed to get a JsonPathException, but JsonPath library actually + // has a bug leading to NullPointerException while creating the JsonPathException. + assertNull(e.getMessage()); + } + } +} --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For additional commands, e-mail: commits-h...@pinot.apache.org