This is an automated email from the ASF dual-hosted git repository.
gortiz pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git
The following commit(s) were added to refs/heads/master by this push:
new f2153a1382b Handle null values in JsonExtractScalarTransformFunction
with default value support (#16683)
f2153a1382b is described below
commit f2153a1382b52b6c3f4d479e72cc7607934736d0
Author: Gonzalo Ortiz Jaureguizar <[email protected]>
AuthorDate: Tue Aug 26 13:42:20 2025 +0200
Handle null values in JsonExtractScalarTransformFunction with default value
support (#16683)
---
.../JsonExtractScalarTransformFunction.java | 60 ++++++++-
.../JsonExtractScalarTransformFunctionTest.java | 136 +++++++++++++++++++++
2 files changed, 191 insertions(+), 5 deletions(-)
diff --git
a/pinot-core/src/main/java/org/apache/pinot/core/operator/transform/function/JsonExtractScalarTransformFunction.java
b/pinot-core/src/main/java/org/apache/pinot/core/operator/transform/function/JsonExtractScalarTransformFunction.java
index 783ec023227..f171d1aed2a 100644
---
a/pinot-core/src/main/java/org/apache/pinot/core/operator/transform/function/JsonExtractScalarTransformFunction.java
+++
b/pinot-core/src/main/java/org/apache/pinot/core/operator/transform/function/JsonExtractScalarTransformFunction.java
@@ -361,7 +361,17 @@ public class JsonExtractScalarTransformFunction extends
BaseTransformFunction {
int numValues = result.size();
int[] values = new int[numValues];
for (int j = 0; j < numValues; j++) {
- values[j] = result.get(j);
+ Integer value = result.get(j);
+ if (value == null) {
+ if (_defaultValue != null) {
+ value = ((Number) _defaultValue).intValue();
+ } else {
+ throw new IllegalArgumentException(
+ "At least one of the resolved JSON arrays include nulls, which
is not supported in Pinot. "
+ + "Consider setting a default value as the fourth argument
of json_extract_scalar.");
+ }
+ }
+ values[j] = value;
}
_intValuesMV[i] = values;
}
@@ -386,7 +396,17 @@ public class JsonExtractScalarTransformFunction extends
BaseTransformFunction {
int numValues = result.size();
long[] values = new long[numValues];
for (int j = 0; j < numValues; j++) {
- values[j] = result.get(j);
+ Long value = result.get(j);
+ if (value == null) {
+ if (_defaultValue != null) {
+ value = ((Number) _defaultValue).longValue();
+ } else {
+ throw new IllegalArgumentException(
+ "At least one of the resolved JSON arrays include nulls, which
is not supported in Pinot. "
+ + "Consider setting a default value as the fourth argument
of json_extract_scalar.");
+ }
+ }
+ values[j] = value;
}
_longValuesMV[i] = values;
}
@@ -411,7 +431,17 @@ public class JsonExtractScalarTransformFunction extends
BaseTransformFunction {
int numValues = result.size();
float[] values = new float[numValues];
for (int j = 0; j < numValues; j++) {
- values[j] = result.get(j);
+ Float value = result.get(j);
+ if (value == null) {
+ if (_defaultValue != null) {
+ value = ((Number) _defaultValue).floatValue();
+ } else {
+ throw new IllegalArgumentException(
+ "At least one of the resolved JSON arrays include nulls, which
is not supported in Pinot. "
+ + "Consider setting a default value as the fourth argument
of json_extract_scalar.");
+ }
+ }
+ values[j] = value;
}
_floatValuesMV[i] = values;
}
@@ -436,7 +466,17 @@ public class JsonExtractScalarTransformFunction extends
BaseTransformFunction {
int numValues = result.size();
double[] values = new double[numValues];
for (int j = 0; j < numValues; j++) {
- values[j] = result.get(j);
+ Double value = result.get(j);
+ if (value == null) {
+ if (_defaultValue != null) {
+ value = ((Number) _defaultValue).doubleValue();
+ } else {
+ throw new IllegalArgumentException(
+ "At least one of the resolved JSON arrays include nulls, which
is not supported in Pinot. "
+ + "Consider setting a default value as the fourth argument
of json_extract_scalar.");
+ }
+ }
+ values[j] = value;
}
_doubleValuesMV[i] = values;
}
@@ -461,7 +501,17 @@ public class JsonExtractScalarTransformFunction extends
BaseTransformFunction {
int numValues = result.size();
String[] values = new String[numValues];
for (int j = 0; j < numValues; j++) {
- values[j] = result.get(j);
+ String value = result.get(j);
+ if (value == null) {
+ if (_defaultValue != null) {
+ value = _defaultValue.toString();
+ } else {
+ throw new IllegalArgumentException(
+ "At least one of the resolved JSON arrays include nulls, which
is not supported in Pinot. "
+ + "Consider setting a default value as the fourth argument
of json_extract_scalar.");
+ }
+ }
+ values[j] = value;
}
_stringValuesMV[i] = values;
}
diff --git
a/pinot-core/src/test/java/org/apache/pinot/core/operator/transform/function/JsonExtractScalarTransformFunctionTest.java
b/pinot-core/src/test/java/org/apache/pinot/core/operator/transform/function/JsonExtractScalarTransformFunctionTest.java
index c65a4295fe5..0eaaf52b259 100644
---
a/pinot-core/src/test/java/org/apache/pinot/core/operator/transform/function/JsonExtractScalarTransformFunctionTest.java
+++
b/pinot-core/src/test/java/org/apache/pinot/core/operator/transform/function/JsonExtractScalarTransformFunctionTest.java
@@ -18,26 +18,40 @@
*/
package org.apache.pinot.core.operator.transform.function;
+import java.io.File;
import java.io.IOException;
+import java.io.UncheckedIOException;
import java.math.BigDecimal;
+import java.nio.file.Files;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Random;
+import org.apache.commons.io.FileUtils;
import org.apache.pinot.common.request.context.ExpressionContext;
import org.apache.pinot.common.request.context.RequestContextUtils;
+import org.apache.pinot.queries.FluentQueryTest;
+import org.apache.pinot.spi.config.table.TableConfig;
+import org.apache.pinot.spi.config.table.TableType;
import org.apache.pinot.spi.data.FieldSpec.DataType;
+import org.apache.pinot.spi.data.Schema;
import org.apache.pinot.spi.exception.BadQueryRequestException;
import org.apache.pinot.spi.utils.JsonUtils;
+import org.apache.pinot.spi.utils.builder.TableConfigBuilder;
import org.apache.pinot.sql.parsers.SqlCompilationException;
+import org.assertj.core.api.Assertions;
import org.testng.Assert;
+import org.testng.annotations.AfterClass;
+import org.testng.annotations.BeforeClass;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
public class JsonExtractScalarTransformFunctionTest extends
BaseTransformFunctionTest {
+ protected File _baseDir;
+
@Test(dataProvider = "testJsonPathTransformFunction")
public void testJsonPathTransformFunction(String expressionStr, DataType
resultsDataType, boolean isSingleValue) {
ExpressionContext expression =
RequestContextUtils.getExpression(expressionStr);
@@ -517,4 +531,126 @@ public class JsonExtractScalarTransformFunctionTest
extends BaseTransformFunctio
new Object[]{String.format("json_extract_key(%s, \"$.*\")",
JSON_COLUMN)}};
//@formatter:on
}
+
+ @BeforeClass
+ void createBaseDir() {
+ try {
+ _baseDir =
Files.createTempDirectory(getClass().getSimpleName()).toFile();
+ } catch (IOException ex) {
+ throw new UncheckedIOException(ex);
+ }
+ }
+
+ @AfterClass
+ void destroyBaseDir()
+ throws IOException {
+ if (_baseDir != null) {
+ FileUtils.deleteDirectory(_baseDir);
+ }
+ }
+
+ @DataProvider
+ public static Object[][] multiValuesAndDefaults() {
+ return new Object[][]{
+ {"INT_ARRAY", "-123"},
+ {"LONG_ARRAY", "-123"},
+ {"FLOAT_ARRAY", "0.5"},
+ {"DOUBLE_ARRAY", "0.5"},
+ {"STRING_ARRAY", "'default'"}
+ };
+ }
+
+ /**
+ *
+ * @param resultsType The result type used in the query, either 'INT_ARRAY'
or 'STRING_ARRAY'
+ * @param notUsed Not used, just here to be able to use the same provider as
+ * {@link #mvWithNullsWithDefault(String, String)}
+ */
+ @Test(dataProvider = "multiValuesAndDefaults")
+ public void mvWithNullsWithoutDefault(String resultsType, String notUsed) {
+ // schema with a single column called "json" of type JSON
+ Schema schema = new Schema.SchemaBuilder()
+ .setSchemaName("testTable")
+ .setEnableColumnBasedNullHandling(true)
+ .addDimensionField("json", DataType.JSON)
+ .build();
+ // trivial table config
+ TableConfig tableConfig = new TableConfigBuilder(TableType.OFFLINE)
+ .setTableName("testTable")
+ .build();
+ try {
+ FluentQueryTest.withBaseDir(_baseDir)
+ .withNullHandling(false)
+ .givenTable(schema, tableConfig)
+ .onFirstInstance(new Object[]{"{\"name\": [null]}"})
+ .whenQuery("SELECT jsonExtractScalar(json, '$.name', '" +
resultsType + "') FROM testTable")
+ // TODO: Change the framework to do not duplicate segments when only
one segment is used
+ .thenResultIs(new Object[]{"doesn't matter, it should fail"});
+ } catch (AssertionError e) {
+ Assertions.assertThat(e.getMessage())
+ .describedAs("Expected to fail because the JSON array contains
nulls")
+ .contains("At least one of the resolved JSON arrays include nulls");
+ }
+ }
+
+ /**
+ *
+ * @param resultsType The result type used in the query, either 'INT_ARRAY'
or 'STRING_ARRAY'
+ * @param defaultValSql The default value to use in the query, as passed to
jsonExtractScalar forth argument
+ */
+ @Test(dataProvider = "multiValuesAndDefaults")
+ public void mvWithNullsWithDefault(String resultsType, String defaultValSql)
{
+ // schema with a single column called "json" of type JSON
+ Schema schema = new Schema.SchemaBuilder()
+ .setSchemaName("testTable")
+ .setEnableColumnBasedNullHandling(true)
+ .addDimensionField("json", DataType.JSON)
+ .build();
+ // trivial table config
+ TableConfig tableConfig = new TableConfigBuilder(TableType.OFFLINE)
+ .setTableName("testTable")
+ .build();
+
+ Object[] expectedRow;
+ switch (resultsType) {
+ case "INT_ARRAY": {
+ int[] rowValue = new int[]{Integer.parseInt(defaultValSql)};
+ expectedRow = new Object[]{rowValue};
+ break;
+ }
+ case "LONG_ARRAY": {
+ long[] rowValue = new long[]{Long.parseLong(defaultValSql)};
+ expectedRow = new Object[]{rowValue};
+ break;
+ }
+ case "FLOAT_ARRAY": {
+ float[] rowValue = new float[]{Float.parseFloat(defaultValSql)};
+ expectedRow = new Object[]{rowValue};
+ break;
+ }
+ case "DOUBLE_ARRAY": {
+ double[] rowValue = new double[]{Double.parseDouble(defaultValSql)};
+ expectedRow = new Object[]{rowValue};
+ break;
+ }
+ case "STRING_ARRAY": {
+ // defaultValSql is quoted, need to remove quotes
+ String str = defaultValSql.substring(1, defaultValSql.length() - 1);
+ String[] rowValue = new String[]{str};
+ expectedRow = new Object[]{rowValue};
+ break;
+ }
+ default:
+ throw new UnsupportedOperationException("Not support data type - " +
resultsType);
+ }
+
+ FluentQueryTest.withBaseDir(_baseDir)
+ .withNullHandling(false)
+ .givenTable(schema, tableConfig)
+ .onFirstInstance(new Object[]{"{\"name\": [null]}"})
+ .whenQuery("SELECT jsonExtractScalar(json, '$.name', '" + resultsType
+ "', " + defaultValSql + ") "
+ + "FROM testTable")
+ // TODO: Change the framework to do not duplicate segments when only
one segment is used
+ .thenResultIs(expectedRow, expectedRow); // 2 rows because of segment
duplication
+ }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]