This is an automated email from the ASF dual-hosted git repository. gortiz pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/pinot.git
The following commit(s) were added to refs/heads/master by this push: new 013e80f968 Fix long value parsing in jsonextractscalar (#14337) 013e80f968 is described below commit 013e80f96802936b5e63305cbc4c9c9ab0dd62a4 Author: Bolek Ziobrowski <26925920+bziobrow...@users.noreply.github.com> AuthorDate: Tue Nov 12 15:58:42 2024 +0100 Fix long value parsing in jsonextractscalar (#14337) --- .../JsonExtractScalarTransformFunction.java | 16 +- .../org/apache/pinot/core/util/NumberUtils.java | 230 +++++++++++++++++++++ .../apache/pinot/core/util/NumericException.java | 31 +++ .../JsonExtractScalarTransformFunctionTest.java | 45 ++++ .../apache/pinot/core/util/NumberUtilsTest.java | 138 +++++++++++++ .../apache/pinot/queries/BaseJsonQueryTest.java | 5 + .../pinot/queries/JsonExtractScalarTest.java | 12 ++ .../org/apache/pinot/queries/QueriesTestUtils.java | 12 +- 8 files changed, 485 insertions(+), 4 deletions(-) diff --git a/pinot-core/src/main/java/org/apache/pinot/core/operator/transform/function/JsonExtractScalarTransformFunction.java b/pinot-core/src/main/java/org/apache/pinot/core/operator/transform/function/JsonExtractScalarTransformFunction.java index 35249e475b..783ec02322 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/operator/transform/function/JsonExtractScalarTransformFunction.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/operator/transform/function/JsonExtractScalarTransformFunction.java @@ -34,6 +34,8 @@ import org.apache.pinot.common.function.JsonPathCache; import org.apache.pinot.core.operator.ColumnContext; import org.apache.pinot.core.operator.blocks.ValueBlock; import org.apache.pinot.core.operator.transform.TransformResultMetadata; +import org.apache.pinot.core.util.NumberUtils; +import org.apache.pinot.core.util.NumericException; import org.apache.pinot.spi.data.FieldSpec.DataType; import org.apache.pinot.spi.utils.JsonUtils; @@ -120,6 +122,10 @@ public class JsonExtractScalarTransformFunction extends BaseTransformFunction { @Override public int[] transformToIntValuesSV(ValueBlock valueBlock) { + if (_resultMetadata.getDataType().getStoredType() != DataType.INT) { + return super.transformToIntValuesSV(valueBlock); + } + initIntValuesSV(valueBlock.getNumDocs()); IntFunction<Object> resultExtractor = getResultExtractor(valueBlock); int defaultValue = 0; @@ -156,6 +162,9 @@ public class JsonExtractScalarTransformFunction extends BaseTransformFunction { @Override public long[] transformToLongValuesSV(ValueBlock valueBlock) { + if (_resultMetadata.getDataType().getStoredType() != DataType.LONG) { + return super.transformToLongValuesSV(valueBlock); + } initLongValuesSV(valueBlock.getNumDocs()); IntFunction<Object> resultExtractor = getResultExtractor(valueBlock); long defaultValue = 0; @@ -184,8 +193,11 @@ public class JsonExtractScalarTransformFunction extends BaseTransformFunction { if (result instanceof Number) { _longValuesSV[i] = ((Number) result).longValue(); } else { - // Handle scientific notation - _longValuesSV[i] = (long) Double.parseDouble(result.toString()); + try { + _longValuesSV[i] = NumberUtils.parseJsonLong(result.toString()); + } catch (NumericException nfe) { + throw new NumberFormatException("For input string: \"" + result + "\""); + } } } return _longValuesSV; diff --git a/pinot-core/src/main/java/org/apache/pinot/core/util/NumberUtils.java b/pinot-core/src/main/java/org/apache/pinot/core/util/NumberUtils.java new file mode 100644 index 0000000000..89f1f64e19 --- /dev/null +++ b/pinot-core/src/main/java/org/apache/pinot/core/util/NumberUtils.java @@ -0,0 +1,230 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.pinot.core.util; + +/** + * Utility class with various number related methods. + */ +public class NumberUtils { + + public static final NumericException NULL_EXCEPTION = new NumericException("Can't parse null string"); + public static final NumericException EXP_EXCEPTION = new NumericException("Wrong exponent"); + + private static final long[] POWERS_OF_10 = new long[]{ + 1L, + 10L, + 100L, + 1000L, + 10000L, + 100000L, + 1000000L, + 10000000L, + 100000000L, + 1000000000L, + 10000000000L, + 100000000000L, + 1000000000000L, + 10000000000000L, + 100000000000000L, + 1000000000000000L, + 10000000000000000L, + 100000000000000000L, + 1000000000000000000L, + }; + + private NumberUtils() { + } + + /** + * Parses whole input char sequence. + * Throws static, pre-allocated NumericException. + * If proper stack trace is required, caller has to catch it and throw another exception. + * @param cs char sequence to parse + * @return parsed long value + */ + public static long parseLong(CharSequence cs) + throws NumericException { + if (cs == null) { + throw NULL_EXCEPTION; + } + return parseLong(cs, 0, cs.length()); + } + + /** + * Parses input char sequence between given indices. + * Throws static, pre-allocated NumericException. + * If proper stack trace is required, caller has to catch it and throw another exception. + * @param cs char sequence to parse + * @param start start index (inclusive) + * @param end end index (exclusive) + * @return parsed long value + */ + public static long parseLong(CharSequence cs, int start, int end) + throws NumericException { + if (cs == null) { + throw NULL_EXCEPTION; + } + + boolean negative = false; + int i = start; + long limit = -Long.MAX_VALUE; + + if (end > start) { + char firstChar = cs.charAt(start); + if (firstChar < '0') { // Possible leading "+" or "-" + if (firstChar == '-') { + negative = true; + limit = Long.MIN_VALUE; + } else if (firstChar != '+') { + throw NumericException.INSTANCE; + } + + if (end == start + 1) { // Cannot have lone "+" or "-" + throw NumericException.INSTANCE; + } + i++; + } + long multmin = limit / 10; + long result = 0; + while (i < end) { + // Accumulating negatively avoids surprises near MAX_VALUE + char c = cs.charAt(i++); + if (c < '0' || c > '9' || result < multmin) { + throw NumericException.INSTANCE; + } + + int digit = c - '0'; + result *= 10; + if (result < limit + digit) { + throw NumericException.INSTANCE; + } + result -= digit; + } + return negative ? result : -result; + } else { + throw NumericException.INSTANCE; + } + } + + /** + * Parses input accepting regular long syntax plus: + * 1E1 -> 10 + * 1.234 -> 1 + * 1.123E1 -> 11 + * @param cs - char sequence to parse + * @return parsed long value + */ + public static long parseJsonLong(CharSequence cs) + throws NumericException { + if (cs == null) { + throw NULL_EXCEPTION; + } + + boolean negative = false; + int i = 0; + int len = cs.length(); + long limit = -Long.MAX_VALUE; + + if (len > 0) { + boolean dotFound = false; + boolean exponentFound = false; + + char firstChar = cs.charAt(0); + if (firstChar < '0') { // Possible leading "+" or "-" + if (firstChar == '-') { + negative = true; + limit = Long.MIN_VALUE; + } else if (firstChar != '+') { + throw NumericException.INSTANCE; + } + + if (len == 1) { // Cannot have lone "+" or "-" + throw NumericException.INSTANCE; + } + i++; + } + long multmin = limit / 10; + long result = 0; + while (i < len) { + // Accumulating negatively avoids surprises near MAX_VALUE + char c = cs.charAt(i++); + if (c < '0' || c > '9' || result < multmin) { + if (c == '.') { + //ignore the rest of sequence + dotFound = true; + break; + } else if (c == 'e' || c == 'E') { + exponentFound = true; + break; + } + throw NumericException.INSTANCE; + } + + int digit = c - '0'; + result *= 10; + if (result < limit + digit) { + throw NumericException.INSTANCE; + } + result -= digit; + } + + if (dotFound) { + //scan rest of the string to make sure it's only digits + while (i < len) { + char c = cs.charAt(i++); + if (c < '0' || c > '9') { + if ((c | 32) == 'e') { + exponentFound = true; + break; + } else { + throw NumericException.INSTANCE; + } + } + } + } + + if (exponentFound) { + if (dotFound) { + try { // TODO: remove toString() + return (long) Double.parseDouble(cs.toString()); + } catch (NumberFormatException ne) { + throw NumericException.INSTANCE; + } + } + + long exp; + try { + exp = parseLong(cs, i, len); + } catch (NumericException nfe) { + throw EXP_EXCEPTION; + } + + if (exp < 0 || exp > POWERS_OF_10.length) { + throw EXP_EXCEPTION; + } + + return (negative ? result : -result) * POWERS_OF_10[(int) exp]; + } + + return negative ? result : -result; + } else { + throw NumericException.INSTANCE; + } + } +} diff --git a/pinot-core/src/main/java/org/apache/pinot/core/util/NumericException.java b/pinot-core/src/main/java/org/apache/pinot/core/util/NumericException.java new file mode 100644 index 0000000000..427bbc4209 --- /dev/null +++ b/pinot-core/src/main/java/org/apache/pinot/core/util/NumericException.java @@ -0,0 +1,31 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.pinot.core.util; + +public class NumericException extends Exception { + + public static final NumericException INSTANCE = new NumericException(); + + public NumericException() { + } + + public NumericException(String message) { + super(message); + } +} diff --git a/pinot-core/src/test/java/org/apache/pinot/core/operator/transform/function/JsonExtractScalarTransformFunctionTest.java b/pinot-core/src/test/java/org/apache/pinot/core/operator/transform/function/JsonExtractScalarTransformFunctionTest.java index 12effbf8a5..57a8ae04d9 100644 --- a/pinot-core/src/test/java/org/apache/pinot/core/operator/transform/function/JsonExtractScalarTransformFunctionTest.java +++ b/pinot-core/src/test/java/org/apache/pinot/core/operator/transform/function/JsonExtractScalarTransformFunctionTest.java @@ -42,11 +42,13 @@ public class JsonExtractScalarTransformFunctionTest extends BaseTransformFunctio public void testJsonPathTransformFunction(String expressionStr, DataType resultsDataType, boolean isSingleValue) { ExpressionContext expression = RequestContextUtils.getExpression(expressionStr); TransformFunction transformFunction = TransformFunctionFactory.get(expression, _dataSourceMap); + Assert.assertTrue(transformFunction instanceof JsonExtractScalarTransformFunction); Assert.assertEquals(transformFunction.getName(), JsonExtractScalarTransformFunction.FUNCTION_NAME); Assert.assertEquals(transformFunction.getResultMetadata().getDataType(), resultsDataType); Assert.assertEquals(transformFunction.getResultMetadata().isSingleValue(), isSingleValue); + if (isSingleValue) { switch (resultsDataType) { case INT: @@ -105,6 +107,49 @@ public class JsonExtractScalarTransformFunctionTest extends BaseTransformFunctio } } + @Test + public void testExtractWithScalarTypeMismatch() { + ExpressionContext expression = RequestContextUtils.getExpression("jsonExtractScalar(json,'$.stringSV','DOUBLE')"); + TransformFunction transformFunction = TransformFunctionFactory.get(expression, _dataSourceMap); + + Assert.assertTrue(transformFunction instanceof JsonExtractScalarTransformFunction); + Assert.assertEquals(transformFunction.getName(), JsonExtractScalarTransformFunction.FUNCTION_NAME); + + long[] longValues = transformFunction.transformToLongValuesSV(_projectionBlock); + for (int i = 0; i < NUM_ROWS; i++) { + Assert.assertEquals(longValues[i], (long) Double.parseDouble(_stringSVValues[i])); + } + + int[] intValues = transformFunction.transformToIntValuesSV(_projectionBlock); + for (int i = 0; i < NUM_ROWS; i++) { + Assert.assertEquals(intValues[i], (int) Double.parseDouble(_stringSVValues[i])); + } + + float[] floatValues = transformFunction.transformToFloatValuesSV(_projectionBlock); + for (int i = 0; i < NUM_ROWS; i++) { + Assert.assertEquals(floatValues[i], Float.parseFloat(_stringSVValues[i])); + } + + BigDecimal[] bdValues = transformFunction.transformToBigDecimalValuesSV(_projectionBlock); + for (int i = 0; i < NUM_ROWS; i++) { + Assert.assertEquals(bdValues[i], new BigDecimal(_stringSVValues[i])); + } + } + + @Test + public void testExtractWithScalarStringToLong() { + ExpressionContext expression = RequestContextUtils.getExpression("jsonExtractScalar(json,'$.stringSV','LONG')"); + TransformFunction transformFunction = TransformFunctionFactory.get(expression, _dataSourceMap); + + Assert.assertTrue(transformFunction instanceof JsonExtractScalarTransformFunction); + Assert.assertEquals(transformFunction.getName(), JsonExtractScalarTransformFunction.FUNCTION_NAME); + + long[] longValues = transformFunction.transformToLongValuesSV(_projectionBlock); + for (int i = 0; i < NUM_ROWS; i++) { + Assert.assertEquals(longValues[i], (long) Double.parseDouble(_stringSVValues[i])); + } + } + @DataProvider(name = "testJsonPathTransformFunction") public Object[][] testJsonPathTransformFunctionDataProvider() { List<Object[]> testArguments = new ArrayList<>(); diff --git a/pinot-core/src/test/java/org/apache/pinot/core/util/NumberUtilsTest.java b/pinot-core/src/test/java/org/apache/pinot/core/util/NumberUtilsTest.java new file mode 100644 index 0000000000..68b59a2497 --- /dev/null +++ b/pinot-core/src/test/java/org/apache/pinot/core/util/NumberUtilsTest.java @@ -0,0 +1,138 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.pinot.core.util; + +import org.testng.Assert; +import org.testng.annotations.Test; + + +public class NumberUtilsTest { + + @Test + public void testParseLong() { + assertLong("0", 0); + assertLong("-1", -1); + assertLong("-1000000", -1000000); + assertLong("-1223372036854775808", -1223372036854775808L); + assertLong("-9223372036854775808", Long.MIN_VALUE); + assertLong("9223372036854775807", Long.MAX_VALUE); + assertLong("1223372036854775807", 1223372036854775807L); + + assertLongError(null); + assertLongError(""); + assertLongError("q"); + assertLongError("--1"); + assertLongError("++1"); + assertLongError("+1+"); + assertLongError("+1-"); + assertLongError("+1."); + assertLongError("1."); + assertLongError("1e"); + assertLongError("1E"); + assertLongError("9223372036854775808"); + assertLongError("19223372036854775808"); + assertLongError("-9223372036854775809"); + assertLongError("-19223372036854775808"); + } + + @Test + public void testParseJsonLong() { + assertJsonLong("0", 0); + assertJsonLong("-1", -1); + assertJsonLong("-1000000", -1000000); + assertJsonLong("-1223372036854775808", -1223372036854775808L); + assertJsonLong("-9223372036854775808", Long.MIN_VALUE); + assertJsonLong("9223372036854775807", Long.MAX_VALUE); + assertJsonLong("1223372036854775807", 1223372036854775807L); + + // fp literals + assertJsonLong("0.0", 0); + assertJsonLong("-0.0", 0); + assertJsonLong("1.", 1); + assertJsonLong("+1.", 1); + assertJsonLong("0.12345678", 0); + assertJsonLong("-1000000.12345678", -1000000); + assertJsonLong("1000000.12345678", 1000000); + assertJsonLong("-9223372036854775808.123456", Long.MIN_VALUE); + assertJsonLong("9223372036854775807.123456", Long.MAX_VALUE); + + // with exponent + assertJsonLong("2e0", 2L); + assertJsonLong("2e1", 20L); + assertJsonLong("2e2", 200L); + assertJsonLong("1e10", 10000000000L); + assertJsonLong("1e15", 1000000000000000L); + assertJsonLong("1.1e10", 11000000000L); + assertJsonLong("1.1E10", 11000000000L); + + assertJsonLongError(null); + assertJsonLongError(""); + assertJsonLongError("q"); + assertJsonLongError("--1"); + assertJsonLongError("++1"); + assertJsonLongError("+1+"); + assertJsonLongError("+1-"); + assertJsonLongError("1e"); + assertJsonLongError("1E"); + assertJsonLongError("9223372036854775808"); + assertJsonLongError("19223372036854775808"); + assertJsonLongError("-9223372036854775809"); + assertJsonLongError("-19223372036854775808"); + + // fp literals + assertJsonLongError("1.Q"); + assertJsonLongError("1.."); + assertJsonLongError("1.1."); + assertJsonLongError("1.1e"); + assertJsonLongError("1e"); + assertJsonLongError("1ee"); + + //with exponent + assertJsonLongError("2E+"); + assertJsonLongError("2E-"); + assertJsonLongError("2E20"); + assertJsonLongError("2E100"); + assertJsonLongError("2E100.123"); + assertJsonLongError("2E-1"); + } + + private void assertLong(String input, long expected) { + try { + Assert.assertEquals(NumberUtils.parseLong(input), expected); + } catch (NumericException nfe) { + Assert.fail("Can't parse " + input); + } + } + + private void assertLongError(String input) { + Assert.assertThrows(NumericException.class, () -> NumberUtils.parseLong(input)); + } + + private void assertJsonLong(String input, long expected) { + try { + Assert.assertEquals(NumberUtils.parseJsonLong(input), expected); + } catch (NumericException nfe) { + Assert.fail("Can't parse " + input); + } + } + + private void assertJsonLongError(String input) { + Assert.assertThrows(NumericException.class, () -> NumberUtils.parseJsonLong(input)); + } +} diff --git a/pinot-core/src/test/java/org/apache/pinot/queries/BaseJsonQueryTest.java b/pinot-core/src/test/java/org/apache/pinot/queries/BaseJsonQueryTest.java index d2cefa9475..6ef6e89d66 100644 --- a/pinot-core/src/test/java/org/apache/pinot/queries/BaseJsonQueryTest.java +++ b/pinot-core/src/test/java/org/apache/pinot/queries/BaseJsonQueryTest.java @@ -141,6 +141,11 @@ public abstract class BaseJsonQueryTest extends BaseQueriesTest { "{\"name\": {\"first\": \"multi-dimensional-1\",\"last\": \"array\"},\"days\": 111}")); records.add(createRecord(14, 14, "top level array", "[{\"i1\":1,\"i2\":2}, {\"i1\":3,\"i2\":4}]")); + records.add(createRecord(15, 15, "john doe", "{\"longVal\": \"9223372036854775807\"}")); + records.add(createRecord(16, 16, "john doe", "{\"longVal\": \"-9223372036854775808\" }")); + records.add(createRecord(17, 17, "john doe", "{\"longVal\": \"-100.12345\" }")); + records.add(createRecord(18, 18, "john doe", "{\"longVal\": \"10e2\" }")); + tableConfig.getIndexingConfig().setJsonIndexColumns(List.of("jsonColumn")); SegmentGeneratorConfig segmentGeneratorConfig = new SegmentGeneratorConfig(tableConfig, schema); segmentGeneratorConfig.setTableName(RAW_TABLE_NAME); diff --git a/pinot-core/src/test/java/org/apache/pinot/queries/JsonExtractScalarTest.java b/pinot-core/src/test/java/org/apache/pinot/queries/JsonExtractScalarTest.java index baec8db4d6..8af1349096 100644 --- a/pinot-core/src/test/java/org/apache/pinot/queries/JsonExtractScalarTest.java +++ b/pinot-core/src/test/java/org/apache/pinot/queries/JsonExtractScalarTest.java @@ -159,4 +159,16 @@ public class JsonExtractScalarTest extends BaseJsonQueryTest { Object[][] expecteds3 = {{176.0}}; checkResult("SELECT MAX(" + column + ".id - 5) FROM testTable", expecteds3); } + + @Test(dataProvider = "allJsonColumns") + public void testExtractJsonLongValue(String column) { + // test fails, actual number returned is 1790416068515225856 + checkResult("SELECT intColumn, jsonextractscalar(" + column + ", '$.longVal', 'LONG', 0) " + + "FROM testTable " + + "where intColumn >= 15 and intColumn <= 18 " + + "group by 1, 2 " + + "order by 1, 2 " + + "limit 4", + new Object[][]{{15, Long.MAX_VALUE}, {16, Long.MIN_VALUE}, {17, -100L}, {18, 1000L}}); + } } diff --git a/pinot-core/src/test/java/org/apache/pinot/queries/QueriesTestUtils.java b/pinot-core/src/test/java/org/apache/pinot/queries/QueriesTestUtils.java index f98ae81afd..2d0e709964 100644 --- a/pinot-core/src/test/java/org/apache/pinot/queries/QueriesTestUtils.java +++ b/pinot-core/src/test/java/org/apache/pinot/queries/QueriesTestUtils.java @@ -160,14 +160,22 @@ public class QueriesTestUtils { for (int i = 0; i < actual.size(); i++) { // NOTE: Do not use 'assertEquals(actual.get(i), expected.get(i))' because for array within the row, it only // compares the reference of the array, instead of the content of the array. - validateRow(actual.get(i), expected.get(i)); + try { + validateRow(actual.get(i), expected.get(i)); + } catch (AssertionError ae) { + throw new AssertionError("Row: " + i + " " + ae.getMessage(), ae); + } } } private static void validateRow(Object[] actual, Object[] expected) { assertEquals(actual.length, expected.length); for (int i = 0; i < actual.length; i++) { - assertEquals(actual[i], expected[i]); + try { + assertEquals(actual[i], expected[i]); + } catch (AssertionError ae) { + throw new AssertionError("column:" + i + " " + ae.getMessage(), ae); + } } } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For additional commands, e-mail: commits-h...@pinot.apache.org