This is an automated email from the ASF dual-hosted git repository.

xiangfu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
     new 852305a4cd0 Make Scalar Functions Advertise Determinism and Support 
RAND function with Runtime-Only and Seeded Mode (#17208)
852305a4cd0 is described below

commit 852305a4cd09cf6d73929d53be398368842fa1e1
Author: Xiang Fu <[email protected]>
AuthorDate: Mon Nov 17 17:37:41 2025 -0800

    Make Scalar Functions Advertise Determinism and Support RAND function with 
Runtime-Only and Seeded Mode (#17208)
    
    * Add seeded rand support
    
    * Track scalar function determinism
---
 .../apache/pinot/common/function/FunctionInfo.java | 14 +++++-
 .../pinot/common/function/FunctionRegistry.java    | 14 +++++-
 .../function/scalar/ArithmeticFunctions.java       | 31 +++++++++++++
 .../rewriter/CompileTimeFunctionsInvoker.java      |  2 +-
 .../pinot/sql/parsers/CalciteSqlCompilerTest.java  | 11 +++++
 .../ScalarTransformFunctionWrapperTest.java        | 52 ++++++++++++++++++++++
 .../rel/rules/PinotEvaluateLiteralRule.java        |  2 +-
 .../pinot/query/QueryPlannerRuleOptionsTest.java   |  7 +++
 .../pinot/spi/annotations/ScalarFunction.java      |  5 +++
 9 files changed, 132 insertions(+), 6 deletions(-)

diff --git 
a/pinot-common/src/main/java/org/apache/pinot/common/function/FunctionInfo.java 
b/pinot-common/src/main/java/org/apache/pinot/common/function/FunctionInfo.java
index 6675417739e..16cd3997bb5 100644
--- 
a/pinot-common/src/main/java/org/apache/pinot/common/function/FunctionInfo.java
+++ 
b/pinot-common/src/main/java/org/apache/pinot/common/function/FunctionInfo.java
@@ -26,11 +26,17 @@ public class FunctionInfo {
   private final Method _method;
   private final Class<?> _clazz;
   private final boolean _nullableParameters;
+  private final boolean _deterministic;
 
   public FunctionInfo(Method method, Class<?> clazz, boolean 
nullableParameters) {
+    this(method, clazz, nullableParameters, true);
+  }
+
+  public FunctionInfo(Method method, Class<?> clazz, boolean 
nullableParameters, boolean deterministic) {
     _method = method;
     _clazz = clazz;
     _nullableParameters = nullableParameters;
+    _deterministic = deterministic;
   }
 
   public Method getMethod() {
@@ -45,10 +51,14 @@ public class FunctionInfo {
     return _nullableParameters;
   }
 
+  public boolean isDeterministic() {
+    return _deterministic;
+  }
+
   public static FunctionInfo fromMethod(Method method) {
     ScalarFunction annotation = method.getAnnotation(ScalarFunction.class);
     boolean nullableParameters = annotation != null && 
annotation.nullableParameters();
-
-    return new FunctionInfo(method, method.getDeclaringClass(), 
nullableParameters);
+    boolean deterministic = annotation == null || annotation.isDeterministic();
+    return new FunctionInfo(method, method.getDeclaringClass(), 
nullableParameters, deterministic);
   }
 }
diff --git 
a/pinot-common/src/main/java/org/apache/pinot/common/function/FunctionRegistry.java
 
b/pinot-common/src/main/java/org/apache/pinot/common/function/FunctionRegistry.java
index 75f5dc1eb8d..395c49e8ca8 100644
--- 
a/pinot-common/src/main/java/org/apache/pinot/common/function/FunctionRegistry.java
+++ 
b/pinot-common/src/main/java/org/apache/pinot/common/function/FunctionRegistry.java
@@ -129,7 +129,8 @@ public class FunctionRegistry {
       ScalarFunction scalarFunction = 
method.getAnnotation(ScalarFunction.class);
       if (scalarFunction.enabled()) {
         FunctionInfo functionInfo =
-            new FunctionInfo(method, method.getDeclaringClass(), 
scalarFunction.nullableParameters());
+            new FunctionInfo(method, method.getDeclaringClass(), 
scalarFunction.nullableParameters(),
+                scalarFunction.isDeterministic());
         int numArguments = scalarFunction.isVarArg() ? VAR_ARG_KEY : 
method.getParameterCount();
         String[] names = scalarFunction.names();
         if (names.length == 0) {
@@ -279,7 +280,7 @@ public class FunctionRegistry {
 
     @Override
     public PinotSqlFunction toPinotSqlFunction() {
-      return new PinotSqlFunction(_mainName, getReturnTypeInference(), 
getOperandTypeChecker());
+      return new PinotSqlFunction(_mainName, getReturnTypeInference(), 
getOperandTypeChecker(), isDeterministic());
     }
 
     private SqlReturnTypeInference getReturnTypeInference() {
@@ -349,6 +350,15 @@ public class FunctionRegistry {
       return functionInfo != null ? functionInfo : 
_functionInfoMap.get(VAR_ARG_KEY);
     }
 
+    private boolean isDeterministic() {
+      for (FunctionInfo functionInfo : _functionInfoMap.values()) {
+        if (!functionInfo.isDeterministic()) {
+          return false;
+        }
+      }
+      return true;
+    }
+
     @Override
     public String getScalarFunctionId() {
       if (_functionInfoMap.size() == 1) {
diff --git 
a/pinot-common/src/main/java/org/apache/pinot/common/function/scalar/ArithmeticFunctions.java
 
b/pinot-common/src/main/java/org/apache/pinot/common/function/scalar/ArithmeticFunctions.java
index 6412c9c22f2..70ff0dca5f6 100644
--- 
a/pinot-common/src/main/java/org/apache/pinot/common/function/scalar/ArithmeticFunctions.java
+++ 
b/pinot-common/src/main/java/org/apache/pinot/common/function/scalar/ArithmeticFunctions.java
@@ -20,6 +20,7 @@ package org.apache.pinot.common.function.scalar;
 
 import java.math.BigDecimal;
 import java.math.RoundingMode;
+import java.util.concurrent.ThreadLocalRandom;
 import org.apache.pinot.spi.annotations.ScalarFunction;
 
 
@@ -27,6 +28,8 @@ import org.apache.pinot.spi.annotations.ScalarFunction;
  * Arithmetic scalar functions.
  */
 public class ArithmeticFunctions {
+  private static final double DOUBLE_UNIT = 0x1.0p-53;
+
   private ArithmeticFunctions() {
   }
 
@@ -201,6 +204,34 @@ public class ArithmeticFunctions {
     return Math.signum(a) * Math.floor(Math.abs(a));
   }
 
+  @ScalarFunction(isDeterministic = false)
+  public static double rand() {
+    return ThreadLocalRandom.current().nextDouble();
+  }
+
+  @ScalarFunction
+  public static double rand(long seed) {
+    return deterministicRand(seed);
+  }
+
+  private static double deterministicRand(long seed) {
+    // The XOR with 0x5DEECE66DL is inspired by java.util.Random's initial LCG 
multiplier scramble,
+    // but unlike Random, we use a custom non-linear mix64 routine for 
diffusion and reproducible output.
+    // This approach is chosen to provide deterministic, well-diffused 
pseudo-random values, not to match Random's
+    // output.
+    // Reference for multiplier: 
https://docs.oracle.com/javase/8/docs/api/java/util/Random.html
+    long mixed = mix64(seed ^ 0x5DEECE66DL);
+    // Right-shift by 11 bits to extract the top 53 bits for use as the 
mantissa of an IEEE 754 double-precision value.
+    // This produces a double in [0,1) with full precision.
+    return (mixed >>> 11) * DOUBLE_UNIT;
+  }
+
+  private static long mix64(long z) {
+    z = (z ^ (z >>> 33)) * 0xff51afd7ed558ccdL;
+    z = (z ^ (z >>> 33)) * 0xc4ceb9fe1a85ec53L;
+    return z ^ (z >>> 33);
+  }
+
   @ScalarFunction
   public static long gcd(long a, long b) {
     return a == 0 ? Math.abs(b) : gcd(b % a, a);
diff --git 
a/pinot-common/src/main/java/org/apache/pinot/sql/parsers/rewriter/CompileTimeFunctionsInvoker.java
 
b/pinot-common/src/main/java/org/apache/pinot/sql/parsers/rewriter/CompileTimeFunctionsInvoker.java
index 02bdf4b0961..9d7e9fbd50f 100644
--- 
a/pinot-common/src/main/java/org/apache/pinot/sql/parsers/rewriter/CompileTimeFunctionsInvoker.java
+++ 
b/pinot-common/src/main/java/org/apache/pinot/sql/parsers/rewriter/CompileTimeFunctionsInvoker.java
@@ -88,7 +88,7 @@ public class CompileTimeFunctionsInvoker implements 
QueryRewriter {
     }
     String canonicalName = 
FunctionRegistry.canonicalize(function.getOperator());
     FunctionInfo functionInfo = 
FunctionRegistry.lookupFunctionInfo(canonicalName, argumentTypes);
-    if (functionInfo == null) {
+    if (functionInfo == null || !functionInfo.isDeterministic()) {
       return expression;
     }
     try {
diff --git 
a/pinot-common/src/test/java/org/apache/pinot/sql/parsers/CalciteSqlCompilerTest.java
 
b/pinot-common/src/test/java/org/apache/pinot/sql/parsers/CalciteSqlCompilerTest.java
index b547e97ae68..f3a2bf5d6bb 100644
--- 
a/pinot-common/src/test/java/org/apache/pinot/sql/parsers/CalciteSqlCompilerTest.java
+++ 
b/pinot-common/src/test/java/org/apache/pinot/sql/parsers/CalciteSqlCompilerTest.java
@@ -2142,6 +2142,17 @@ public class CalciteSqlCompilerTest {
     Assert.assertTrue(nowTs >= lowerBound);
     Assert.assertTrue(nowTs <= upperBound);
 
+    query = "SELECT rand() FROM foo";
+    pinotQuery = compileToPinotQuery(query);
+    Expression randExpression = pinotQuery.getSelectList().get(0);
+    Assert.assertTrue(randExpression.isSetFunctionCall());
+    Assert.assertEquals(randExpression.getFunctionCall().getOperator(), 
"rand");
+    
Assert.assertTrue(randExpression.getFunctionCall().getOperands().isEmpty());
+
+    query = "SELECT rand(123) FROM foo";
+    pinotQuery = compileToPinotQuery(query);
+    Assert.assertTrue(pinotQuery.getSelectList().get(0).isSetLiteral());
+
     query = "select encodeUrl('key1=value 1&key2=value@!$2&key3=value%3'), "
         + 
"decodeUrl('key1%3Dvalue+1%26key2%3Dvalue%40%21%242%26key3%3Dvalue%253') from 
mytable";
     pinotQuery = compileToPinotQuery(query);
diff --git 
a/pinot-core/src/test/java/org/apache/pinot/core/operator/transform/function/ScalarTransformFunctionWrapperTest.java
 
b/pinot-core/src/test/java/org/apache/pinot/core/operator/transform/function/ScalarTransformFunctionWrapperTest.java
index 13a38bd7418..20f0979649b 100644
--- 
a/pinot-core/src/test/java/org/apache/pinot/core/operator/transform/function/ScalarTransformFunctionWrapperTest.java
+++ 
b/pinot-core/src/test/java/org/apache/pinot/core/operator/transform/function/ScalarTransformFunctionWrapperTest.java
@@ -28,6 +28,7 @@ import java.util.Random;
 import org.apache.commons.codec.digest.DigestUtils;
 import org.apache.commons.lang3.ArrayUtils;
 import org.apache.commons.lang3.StringUtils;
+import org.apache.pinot.common.function.scalar.ArithmeticFunctions;
 import org.apache.pinot.common.request.context.ExpressionContext;
 import org.apache.pinot.common.request.context.RequestContextUtils;
 import org.apache.pinot.spi.data.FieldSpec.DataType;
@@ -1125,6 +1126,57 @@ public class ScalarTransformFunctionWrapperTest extends 
BaseTransformFunctionTes
     testTransformFunction(transformFunction, _bigDecimalSVValues);
   }
 
+  @Test
+  public void testRandTransformFunction() {
+    ExpressionContext expression = RequestContextUtils.getExpression("rand()");
+    TransformFunction transformFunction = 
TransformFunctionFactory.get(expression, _dataSourceMap);
+    assertTrue(transformFunction instanceof ScalarTransformFunctionWrapper);
+    assertEquals(transformFunction.getName(), "rand");
+    double[] firstValues = 
Arrays.copyOf(transformFunction.transformToDoubleValuesSV(_projectionBlock), 
NUM_ROWS);
+    for (double value : firstValues) {
+      assertTrue(value >= 0d && value < 1d, "rand() should return values in 
[0, 1)");
+    }
+    double[] secondValues = 
Arrays.copyOf(transformFunction.transformToDoubleValuesSV(_projectionBlock), 
NUM_ROWS);
+    assertFalse(Arrays.equals(firstValues, secondValues),
+        "rand() should yield non-deterministic results across evaluations");
+  }
+
+  @Test
+  public void testRandWithSeedTransformFunction() {
+    ExpressionContext expression = 
RequestContextUtils.getExpression("rand(42)");
+    TransformFunction transformFunction = 
TransformFunctionFactory.get(expression, _dataSourceMap);
+    assertTrue(transformFunction instanceof ScalarTransformFunctionWrapper);
+    assertEquals(transformFunction.getName(), "rand");
+    double[] firstValues = 
Arrays.copyOf(transformFunction.transformToDoubleValuesSV(_projectionBlock), 
NUM_ROWS);
+    double[] secondValues = 
Arrays.copyOf(transformFunction.transformToDoubleValuesSV(_projectionBlock), 
NUM_ROWS);
+    assertTrue(Arrays.equals(firstValues, secondValues), "rand(seed) should be 
deterministic");
+    double literalExpected = ArithmeticFunctions.rand(42L);
+    for (double value : firstValues) {
+      assertTrue(value >= 0d && value < 1d, "rand(seed) should return values 
in [0, 1)");
+      assertEquals(value, literalExpected);
+    }
+
+    expression = RequestContextUtils.getExpression(String.format("rand(%s)", 
INT_SV_COLUMN));
+    transformFunction = TransformFunctionFactory.get(expression, 
_dataSourceMap);
+    assertTrue(transformFunction instanceof ScalarTransformFunctionWrapper);
+    double[] columnSeedValues = 
Arrays.copyOf(transformFunction.transformToDoubleValuesSV(_projectionBlock), 
NUM_ROWS);
+    double[] columnSeedValuesSecond =
+        
Arrays.copyOf(transformFunction.transformToDoubleValuesSV(_projectionBlock), 
NUM_ROWS);
+    assertTrue(Arrays.equals(columnSeedValues, columnSeedValuesSecond),
+        "rand(seedColumn) should be deterministic per seed");
+    boolean hasVariance = false;
+    for (int i = 1; i < NUM_ROWS; i++) {
+      if (Double.compare(columnSeedValues[i], columnSeedValues[0]) != 0) {
+        hasVariance = true;
+        break;
+      }
+    }
+    assertTrue(hasVariance, "rand(seedColumn) should produce varied outputs 
when seeds differ");
+    for (double value : columnSeedValues) {
+      assertTrue(value >= 0d && value < 1d, "rand(seedColumn) should return 
values in [0, 1)");
+    }
+  }
+
   @Test
   public void testStringLowerTransformFunctionNullLiteral() {
     ExpressionContext expression =
diff --git 
a/pinot-query-planner/src/main/java/org/apache/pinot/calcite/rel/rules/PinotEvaluateLiteralRule.java
 
b/pinot-query-planner/src/main/java/org/apache/pinot/calcite/rel/rules/PinotEvaluateLiteralRule.java
index fea69ab5d21..2e2b6c57d66 100644
--- 
a/pinot-query-planner/src/main/java/org/apache/pinot/calcite/rel/rules/PinotEvaluateLiteralRule.java
+++ 
b/pinot-query-planner/src/main/java/org/apache/pinot/calcite/rel/rules/PinotEvaluateLiteralRule.java
@@ -187,7 +187,7 @@ public class PinotEvaluateLiteralRule {
     }
     String canonicalName = 
FunctionRegistry.canonicalize(PinotRuleUtils.extractFunctionName(rexCall));
     FunctionInfo functionInfo = 
FunctionRegistry.lookupFunctionInfo(canonicalName, argumentTypes);
-    if (functionInfo == null) {
+    if (functionInfo == null || !functionInfo.isDeterministic()) {
       // Function cannot be evaluated
       return rexCall;
     }
diff --git 
a/pinot-query-planner/src/test/java/org/apache/pinot/query/QueryPlannerRuleOptionsTest.java
 
b/pinot-query-planner/src/test/java/org/apache/pinot/query/QueryPlannerRuleOptionsTest.java
index 0bcf5e11838..98d1885223d 100644
--- 
a/pinot-query-planner/src/test/java/org/apache/pinot/query/QueryPlannerRuleOptionsTest.java
+++ 
b/pinot-query-planner/src/test/java/org/apache/pinot/query/QueryPlannerRuleOptionsTest.java
@@ -217,6 +217,13 @@ public class QueryPlannerRuleOptionsTest extends 
QueryEnvironmentTestBase {
     //@formatter:on
   }
 
+  @Test
+  public void testRandFunctionNotEvaluatedInMultiStagePlanner() {
+    String query = "EXPLAIN PLAN FOR SELECT rand() FROM b";
+    String explain = _queryEnvironment.explainQuery(query, 
RANDOM_REQUEST_ID_GEN.nextLong());
+    assertTrue(explain.contains("RAND()"), "Expected RAND() to remain in 
logical plan");
+  }
+
   @Test
   public void testDisablePinotProjectJoinTransposeRule() {
     // Test the knob of turning off PinotProjectJoinTransposeRule
diff --git 
a/pinot-spi/src/main/java/org/apache/pinot/spi/annotations/ScalarFunction.java 
b/pinot-spi/src/main/java/org/apache/pinot/spi/annotations/ScalarFunction.java
index e80b6d0c3b4..f78b364ee57 100644
--- 
a/pinot-spi/src/main/java/org/apache/pinot/spi/annotations/ScalarFunction.java
+++ 
b/pinot-spi/src/main/java/org/apache/pinot/spi/annotations/ScalarFunction.java
@@ -62,5 +62,10 @@ public @interface ScalarFunction {
    */
   boolean isVarArg() default false;
 
+  /**
+   * Whether the scalar function should be treated as deterministic (eligible 
for compile-time evaluation).
+   */
+  boolean isDeterministic() default true;
+
   @Deprecated boolean isPlaceholder() default false;
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to