This is an automated email from the ASF dual-hosted git repository.

gortiz pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
     new f2153a1382b Handle null values in JsonExtractScalarTransformFunction 
with default value support (#16683)
f2153a1382b is described below

commit f2153a1382b52b6c3f4d479e72cc7607934736d0
Author: Gonzalo Ortiz Jaureguizar <[email protected]>
AuthorDate: Tue Aug 26 13:42:20 2025 +0200

    Handle null values in JsonExtractScalarTransformFunction with default value 
support (#16683)
---
 .../JsonExtractScalarTransformFunction.java        |  60 ++++++++-
 .../JsonExtractScalarTransformFunctionTest.java    | 136 +++++++++++++++++++++
 2 files changed, 191 insertions(+), 5 deletions(-)

diff --git 
a/pinot-core/src/main/java/org/apache/pinot/core/operator/transform/function/JsonExtractScalarTransformFunction.java
 
b/pinot-core/src/main/java/org/apache/pinot/core/operator/transform/function/JsonExtractScalarTransformFunction.java
index 783ec023227..f171d1aed2a 100644
--- 
a/pinot-core/src/main/java/org/apache/pinot/core/operator/transform/function/JsonExtractScalarTransformFunction.java
+++ 
b/pinot-core/src/main/java/org/apache/pinot/core/operator/transform/function/JsonExtractScalarTransformFunction.java
@@ -361,7 +361,17 @@ public class JsonExtractScalarTransformFunction extends 
BaseTransformFunction {
       int numValues = result.size();
       int[] values = new int[numValues];
       for (int j = 0; j < numValues; j++) {
-        values[j] = result.get(j);
+        Integer value = result.get(j);
+        if (value == null) {
+          if (_defaultValue != null) {
+            value = ((Number) _defaultValue).intValue();
+          } else {
+            throw new IllegalArgumentException(
+                "At least one of the resolved JSON arrays include nulls, which 
is not supported in Pinot. "
+                    + "Consider setting a default value as the fourth argument 
of json_extract_scalar.");
+          }
+        }
+        values[j] = value;
       }
       _intValuesMV[i] = values;
     }
@@ -386,7 +396,17 @@ public class JsonExtractScalarTransformFunction extends 
BaseTransformFunction {
       int numValues = result.size();
       long[] values = new long[numValues];
       for (int j = 0; j < numValues; j++) {
-        values[j] = result.get(j);
+        Long value = result.get(j);
+        if (value == null) {
+          if (_defaultValue != null) {
+            value = ((Number) _defaultValue).longValue();
+          } else {
+            throw new IllegalArgumentException(
+                "At least one of the resolved JSON arrays include nulls, which 
is not supported in Pinot. "
+                    + "Consider setting a default value as the fourth argument 
of json_extract_scalar.");
+          }
+        }
+        values[j] = value;
       }
       _longValuesMV[i] = values;
     }
@@ -411,7 +431,17 @@ public class JsonExtractScalarTransformFunction extends 
BaseTransformFunction {
       int numValues = result.size();
       float[] values = new float[numValues];
       for (int j = 0; j < numValues; j++) {
-        values[j] = result.get(j);
+        Float value = result.get(j);
+        if (value == null) {
+          if (_defaultValue != null) {
+            value = ((Number) _defaultValue).floatValue();
+          } else {
+            throw new IllegalArgumentException(
+                "At least one of the resolved JSON arrays include nulls, which 
is not supported in Pinot. "
+                    + "Consider setting a default value as the fourth argument 
of json_extract_scalar.");
+          }
+        }
+        values[j] = value;
       }
       _floatValuesMV[i] = values;
     }
@@ -436,7 +466,17 @@ public class JsonExtractScalarTransformFunction extends 
BaseTransformFunction {
       int numValues = result.size();
       double[] values = new double[numValues];
       for (int j = 0; j < numValues; j++) {
-        values[j] = result.get(j);
+        Double value = result.get(j);
+        if (value == null) {
+          if (_defaultValue != null) {
+            value = ((Number) _defaultValue).doubleValue();
+          } else {
+            throw new IllegalArgumentException(
+                "At least one of the resolved JSON arrays include nulls, which 
is not supported in Pinot. "
+                    + "Consider setting a default value as the fourth argument 
of json_extract_scalar.");
+          }
+        }
+        values[j] = value;
       }
       _doubleValuesMV[i] = values;
     }
@@ -461,7 +501,17 @@ public class JsonExtractScalarTransformFunction extends 
BaseTransformFunction {
       int numValues = result.size();
       String[] values = new String[numValues];
       for (int j = 0; j < numValues; j++) {
-        values[j] = result.get(j);
+        String value = result.get(j);
+        if (value == null) {
+          if (_defaultValue != null) {
+            value = _defaultValue.toString();
+          } else {
+            throw new IllegalArgumentException(
+                "At least one of the resolved JSON arrays include nulls, which 
is not supported in Pinot. "
+                    + "Consider setting a default value as the fourth argument 
of json_extract_scalar.");
+          }
+        }
+        values[j] = value;
       }
       _stringValuesMV[i] = values;
     }
diff --git 
a/pinot-core/src/test/java/org/apache/pinot/core/operator/transform/function/JsonExtractScalarTransformFunctionTest.java
 
b/pinot-core/src/test/java/org/apache/pinot/core/operator/transform/function/JsonExtractScalarTransformFunctionTest.java
index c65a4295fe5..0eaaf52b259 100644
--- 
a/pinot-core/src/test/java/org/apache/pinot/core/operator/transform/function/JsonExtractScalarTransformFunctionTest.java
+++ 
b/pinot-core/src/test/java/org/apache/pinot/core/operator/transform/function/JsonExtractScalarTransformFunctionTest.java
@@ -18,26 +18,40 @@
  */
 package org.apache.pinot.core.operator.transform.function;
 
+import java.io.File;
 import java.io.IOException;
+import java.io.UncheckedIOException;
 import java.math.BigDecimal;
+import java.nio.file.Files;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Random;
+import org.apache.commons.io.FileUtils;
 import org.apache.pinot.common.request.context.ExpressionContext;
 import org.apache.pinot.common.request.context.RequestContextUtils;
+import org.apache.pinot.queries.FluentQueryTest;
+import org.apache.pinot.spi.config.table.TableConfig;
+import org.apache.pinot.spi.config.table.TableType;
 import org.apache.pinot.spi.data.FieldSpec.DataType;
+import org.apache.pinot.spi.data.Schema;
 import org.apache.pinot.spi.exception.BadQueryRequestException;
 import org.apache.pinot.spi.utils.JsonUtils;
+import org.apache.pinot.spi.utils.builder.TableConfigBuilder;
 import org.apache.pinot.sql.parsers.SqlCompilationException;
+import org.assertj.core.api.Assertions;
 import org.testng.Assert;
+import org.testng.annotations.AfterClass;
+import org.testng.annotations.BeforeClass;
 import org.testng.annotations.DataProvider;
 import org.testng.annotations.Test;
 
 
 public class JsonExtractScalarTransformFunctionTest extends 
BaseTransformFunctionTest {
 
+  protected File _baseDir;
+
   @Test(dataProvider = "testJsonPathTransformFunction")
   public void testJsonPathTransformFunction(String expressionStr, DataType 
resultsDataType, boolean isSingleValue) {
     ExpressionContext expression = 
RequestContextUtils.getExpression(expressionStr);
@@ -517,4 +531,126 @@ public class JsonExtractScalarTransformFunctionTest 
extends BaseTransformFunctio
         new Object[]{String.format("json_extract_key(%s, \"$.*\")", 
JSON_COLUMN)}};
     //@formatter:on
   }
+
+  @BeforeClass
+  void createBaseDir() {
+    try {
+      _baseDir = 
Files.createTempDirectory(getClass().getSimpleName()).toFile();
+    } catch (IOException ex) {
+      throw new UncheckedIOException(ex);
+    }
+  }
+
+  @AfterClass
+  void destroyBaseDir()
+      throws IOException {
+    if (_baseDir != null) {
+      FileUtils.deleteDirectory(_baseDir);
+    }
+  }
+
+  @DataProvider
+  public static Object[][] multiValuesAndDefaults() {
+    return new Object[][]{
+        {"INT_ARRAY", "-123"},
+        {"LONG_ARRAY", "-123"},
+        {"FLOAT_ARRAY", "0.5"},
+        {"DOUBLE_ARRAY", "0.5"},
+        {"STRING_ARRAY", "'default'"}
+    };
+  }
+
+  /**
+   *
+   * @param resultsType The result type used in the query, either 'INT_ARRAY' 
or 'STRING_ARRAY'
+   * @param notUsed Not used, just here to be able to use the same provider as
+   *                {@link #mvWithNullsWithDefault(String, String)}
+   */
+  @Test(dataProvider = "multiValuesAndDefaults")
+  public void mvWithNullsWithoutDefault(String resultsType, String notUsed) {
+    // schema with a single column called "json" of type JSON
+    Schema schema = new Schema.SchemaBuilder()
+        .setSchemaName("testTable")
+        .setEnableColumnBasedNullHandling(true)
+        .addDimensionField("json", DataType.JSON)
+        .build();
+    // trivial table config
+    TableConfig tableConfig = new TableConfigBuilder(TableType.OFFLINE)
+        .setTableName("testTable")
+        .build();
+    try {
+      FluentQueryTest.withBaseDir(_baseDir)
+          .withNullHandling(false)
+          .givenTable(schema, tableConfig)
+          .onFirstInstance(new Object[]{"{\"name\": [null]}"})
+          .whenQuery("SELECT jsonExtractScalar(json, '$.name', '" + 
resultsType + "') FROM testTable")
+          // TODO: Change the framework to do not duplicate segments when only 
one segment is used
+          .thenResultIs(new Object[]{"doesn't matter, it should fail"});
+    } catch (AssertionError e) {
+      Assertions.assertThat(e.getMessage())
+          .describedAs("Expected to fail because the JSON array contains 
nulls")
+          .contains("At least one of the resolved JSON arrays include nulls");
+    }
+  }
+
+  /**
+   *
+   * @param resultsType The result type used in the query, either 'INT_ARRAY' 
or 'STRING_ARRAY'
+   * @param defaultValSql The default value to use in the query, as passed to 
jsonExtractScalar forth argument
+   */
+  @Test(dataProvider = "multiValuesAndDefaults")
+  public void mvWithNullsWithDefault(String resultsType, String defaultValSql) 
{
+    // schema with a single column called "json" of type JSON
+    Schema schema = new Schema.SchemaBuilder()
+        .setSchemaName("testTable")
+        .setEnableColumnBasedNullHandling(true)
+        .addDimensionField("json", DataType.JSON)
+        .build();
+    // trivial table config
+    TableConfig tableConfig = new TableConfigBuilder(TableType.OFFLINE)
+        .setTableName("testTable")
+        .build();
+
+    Object[] expectedRow;
+    switch (resultsType) {
+      case "INT_ARRAY": {
+        int[] rowValue = new int[]{Integer.parseInt(defaultValSql)};
+        expectedRow = new Object[]{rowValue};
+        break;
+      }
+      case "LONG_ARRAY": {
+        long[] rowValue = new long[]{Long.parseLong(defaultValSql)};
+        expectedRow = new Object[]{rowValue};
+        break;
+      }
+      case "FLOAT_ARRAY": {
+        float[] rowValue = new float[]{Float.parseFloat(defaultValSql)};
+        expectedRow = new Object[]{rowValue};
+        break;
+      }
+      case "DOUBLE_ARRAY": {
+        double[] rowValue = new double[]{Double.parseDouble(defaultValSql)};
+        expectedRow = new Object[]{rowValue};
+        break;
+      }
+      case "STRING_ARRAY": {
+        // defaultValSql is quoted, need to remove quotes
+        String str = defaultValSql.substring(1, defaultValSql.length() - 1);
+        String[] rowValue = new String[]{str};
+        expectedRow = new Object[]{rowValue};
+        break;
+      }
+      default:
+        throw new UnsupportedOperationException("Not support data type - " + 
resultsType);
+    }
+
+    FluentQueryTest.withBaseDir(_baseDir)
+        .withNullHandling(false)
+        .givenTable(schema, tableConfig)
+        .onFirstInstance(new Object[]{"{\"name\": [null]}"})
+        .whenQuery("SELECT jsonExtractScalar(json, '$.name', '" + resultsType 
+ "', " + defaultValSql + ") "
+            + "FROM testTable")
+        // TODO: Change the framework to do not duplicate segments when only 
one segment is used
+        .thenResultIs(expectedRow, expectedRow); // 2 rows because of segment 
duplication
+  }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to