This is an automated email from the ASF dual-hosted git repository.
xiangfu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git
The following commit(s) were added to refs/heads/master by this push:
new 852305a4cd0 Make Scalar Functions Advertise Determinism and Support
RAND function with Runtime-Only and Seeded Mode (#17208)
852305a4cd0 is described below
commit 852305a4cd09cf6d73929d53be398368842fa1e1
Author: Xiang Fu <[email protected]>
AuthorDate: Mon Nov 17 17:37:41 2025 -0800
Make Scalar Functions Advertise Determinism and Support RAND function with
Runtime-Only and Seeded Mode (#17208)
* Add seeded rand support
* Track scalar function determinism
---
.../apache/pinot/common/function/FunctionInfo.java | 14 +++++-
.../pinot/common/function/FunctionRegistry.java | 14 +++++-
.../function/scalar/ArithmeticFunctions.java | 31 +++++++++++++
.../rewriter/CompileTimeFunctionsInvoker.java | 2 +-
.../pinot/sql/parsers/CalciteSqlCompilerTest.java | 11 +++++
.../ScalarTransformFunctionWrapperTest.java | 52 ++++++++++++++++++++++
.../rel/rules/PinotEvaluateLiteralRule.java | 2 +-
.../pinot/query/QueryPlannerRuleOptionsTest.java | 7 +++
.../pinot/spi/annotations/ScalarFunction.java | 5 +++
9 files changed, 132 insertions(+), 6 deletions(-)
diff --git
a/pinot-common/src/main/java/org/apache/pinot/common/function/FunctionInfo.java
b/pinot-common/src/main/java/org/apache/pinot/common/function/FunctionInfo.java
index 6675417739e..16cd3997bb5 100644
---
a/pinot-common/src/main/java/org/apache/pinot/common/function/FunctionInfo.java
+++
b/pinot-common/src/main/java/org/apache/pinot/common/function/FunctionInfo.java
@@ -26,11 +26,17 @@ public class FunctionInfo {
private final Method _method;
private final Class<?> _clazz;
private final boolean _nullableParameters;
+ private final boolean _deterministic;
public FunctionInfo(Method method, Class<?> clazz, boolean
nullableParameters) {
+ this(method, clazz, nullableParameters, true);
+ }
+
+ public FunctionInfo(Method method, Class<?> clazz, boolean
nullableParameters, boolean deterministic) {
_method = method;
_clazz = clazz;
_nullableParameters = nullableParameters;
+ _deterministic = deterministic;
}
public Method getMethod() {
@@ -45,10 +51,14 @@ public class FunctionInfo {
return _nullableParameters;
}
+ public boolean isDeterministic() {
+ return _deterministic;
+ }
+
public static FunctionInfo fromMethod(Method method) {
ScalarFunction annotation = method.getAnnotation(ScalarFunction.class);
boolean nullableParameters = annotation != null &&
annotation.nullableParameters();
-
- return new FunctionInfo(method, method.getDeclaringClass(),
nullableParameters);
+ boolean deterministic = annotation == null || annotation.isDeterministic();
+ return new FunctionInfo(method, method.getDeclaringClass(),
nullableParameters, deterministic);
}
}
diff --git
a/pinot-common/src/main/java/org/apache/pinot/common/function/FunctionRegistry.java
b/pinot-common/src/main/java/org/apache/pinot/common/function/FunctionRegistry.java
index 75f5dc1eb8d..395c49e8ca8 100644
---
a/pinot-common/src/main/java/org/apache/pinot/common/function/FunctionRegistry.java
+++
b/pinot-common/src/main/java/org/apache/pinot/common/function/FunctionRegistry.java
@@ -129,7 +129,8 @@ public class FunctionRegistry {
ScalarFunction scalarFunction =
method.getAnnotation(ScalarFunction.class);
if (scalarFunction.enabled()) {
FunctionInfo functionInfo =
- new FunctionInfo(method, method.getDeclaringClass(),
scalarFunction.nullableParameters());
+ new FunctionInfo(method, method.getDeclaringClass(),
scalarFunction.nullableParameters(),
+ scalarFunction.isDeterministic());
int numArguments = scalarFunction.isVarArg() ? VAR_ARG_KEY :
method.getParameterCount();
String[] names = scalarFunction.names();
if (names.length == 0) {
@@ -279,7 +280,7 @@ public class FunctionRegistry {
@Override
public PinotSqlFunction toPinotSqlFunction() {
- return new PinotSqlFunction(_mainName, getReturnTypeInference(),
getOperandTypeChecker());
+ return new PinotSqlFunction(_mainName, getReturnTypeInference(),
getOperandTypeChecker(), isDeterministic());
}
private SqlReturnTypeInference getReturnTypeInference() {
@@ -349,6 +350,15 @@ public class FunctionRegistry {
return functionInfo != null ? functionInfo :
_functionInfoMap.get(VAR_ARG_KEY);
}
+ private boolean isDeterministic() {
+ for (FunctionInfo functionInfo : _functionInfoMap.values()) {
+ if (!functionInfo.isDeterministic()) {
+ return false;
+ }
+ }
+ return true;
+ }
+
@Override
public String getScalarFunctionId() {
if (_functionInfoMap.size() == 1) {
diff --git
a/pinot-common/src/main/java/org/apache/pinot/common/function/scalar/ArithmeticFunctions.java
b/pinot-common/src/main/java/org/apache/pinot/common/function/scalar/ArithmeticFunctions.java
index 6412c9c22f2..70ff0dca5f6 100644
---
a/pinot-common/src/main/java/org/apache/pinot/common/function/scalar/ArithmeticFunctions.java
+++
b/pinot-common/src/main/java/org/apache/pinot/common/function/scalar/ArithmeticFunctions.java
@@ -20,6 +20,7 @@ package org.apache.pinot.common.function.scalar;
import java.math.BigDecimal;
import java.math.RoundingMode;
+import java.util.concurrent.ThreadLocalRandom;
import org.apache.pinot.spi.annotations.ScalarFunction;
@@ -27,6 +28,8 @@ import org.apache.pinot.spi.annotations.ScalarFunction;
* Arithmetic scalar functions.
*/
public class ArithmeticFunctions {
+ private static final double DOUBLE_UNIT = 0x1.0p-53;
+
private ArithmeticFunctions() {
}
@@ -201,6 +204,34 @@ public class ArithmeticFunctions {
return Math.signum(a) * Math.floor(Math.abs(a));
}
+ @ScalarFunction(isDeterministic = false)
+ public static double rand() {
+ return ThreadLocalRandom.current().nextDouble();
+ }
+
+ @ScalarFunction
+ public static double rand(long seed) {
+ return deterministicRand(seed);
+ }
+
+ private static double deterministicRand(long seed) {
+ // The XOR with 0x5DEECE66DL is inspired by java.util.Random's initial LCG
multiplier scramble,
+ // but unlike Random, we use a custom non-linear mix64 routine for
diffusion and reproducible output.
+ // This approach is chosen to provide deterministic, well-diffused
pseudo-random values, not to match Random's
+ // output.
+ // Reference for multiplier:
https://docs.oracle.com/javase/8/docs/api/java/util/Random.html
+ long mixed = mix64(seed ^ 0x5DEECE66DL);
+ // Right-shift by 11 bits to extract the top 53 bits for use as the
mantissa of an IEEE 754 double-precision value.
+ // This produces a double in [0,1) with full precision.
+ return (mixed >>> 11) * DOUBLE_UNIT;
+ }
+
+ private static long mix64(long z) {
+ z = (z ^ (z >>> 33)) * 0xff51afd7ed558ccdL;
+ z = (z ^ (z >>> 33)) * 0xc4ceb9fe1a85ec53L;
+ return z ^ (z >>> 33);
+ }
+
@ScalarFunction
public static long gcd(long a, long b) {
return a == 0 ? Math.abs(b) : gcd(b % a, a);
diff --git
a/pinot-common/src/main/java/org/apache/pinot/sql/parsers/rewriter/CompileTimeFunctionsInvoker.java
b/pinot-common/src/main/java/org/apache/pinot/sql/parsers/rewriter/CompileTimeFunctionsInvoker.java
index 02bdf4b0961..9d7e9fbd50f 100644
---
a/pinot-common/src/main/java/org/apache/pinot/sql/parsers/rewriter/CompileTimeFunctionsInvoker.java
+++
b/pinot-common/src/main/java/org/apache/pinot/sql/parsers/rewriter/CompileTimeFunctionsInvoker.java
@@ -88,7 +88,7 @@ public class CompileTimeFunctionsInvoker implements
QueryRewriter {
}
String canonicalName =
FunctionRegistry.canonicalize(function.getOperator());
FunctionInfo functionInfo =
FunctionRegistry.lookupFunctionInfo(canonicalName, argumentTypes);
- if (functionInfo == null) {
+ if (functionInfo == null || !functionInfo.isDeterministic()) {
return expression;
}
try {
diff --git
a/pinot-common/src/test/java/org/apache/pinot/sql/parsers/CalciteSqlCompilerTest.java
b/pinot-common/src/test/java/org/apache/pinot/sql/parsers/CalciteSqlCompilerTest.java
index b547e97ae68..f3a2bf5d6bb 100644
---
a/pinot-common/src/test/java/org/apache/pinot/sql/parsers/CalciteSqlCompilerTest.java
+++
b/pinot-common/src/test/java/org/apache/pinot/sql/parsers/CalciteSqlCompilerTest.java
@@ -2142,6 +2142,17 @@ public class CalciteSqlCompilerTest {
Assert.assertTrue(nowTs >= lowerBound);
Assert.assertTrue(nowTs <= upperBound);
+ query = "SELECT rand() FROM foo";
+ pinotQuery = compileToPinotQuery(query);
+ Expression randExpression = pinotQuery.getSelectList().get(0);
+ Assert.assertTrue(randExpression.isSetFunctionCall());
+ Assert.assertEquals(randExpression.getFunctionCall().getOperator(),
"rand");
+
Assert.assertTrue(randExpression.getFunctionCall().getOperands().isEmpty());
+
+ query = "SELECT rand(123) FROM foo";
+ pinotQuery = compileToPinotQuery(query);
+ Assert.assertTrue(pinotQuery.getSelectList().get(0).isSetLiteral());
+
query = "select encodeUrl('key1=value 1&key2=value@!$2&key3=value%3'), "
+
"decodeUrl('key1%3Dvalue+1%26key2%3Dvalue%40%21%242%26key3%3Dvalue%253') from
mytable";
pinotQuery = compileToPinotQuery(query);
diff --git
a/pinot-core/src/test/java/org/apache/pinot/core/operator/transform/function/ScalarTransformFunctionWrapperTest.java
b/pinot-core/src/test/java/org/apache/pinot/core/operator/transform/function/ScalarTransformFunctionWrapperTest.java
index 13a38bd7418..20f0979649b 100644
---
a/pinot-core/src/test/java/org/apache/pinot/core/operator/transform/function/ScalarTransformFunctionWrapperTest.java
+++
b/pinot-core/src/test/java/org/apache/pinot/core/operator/transform/function/ScalarTransformFunctionWrapperTest.java
@@ -28,6 +28,7 @@ import java.util.Random;
import org.apache.commons.codec.digest.DigestUtils;
import org.apache.commons.lang3.ArrayUtils;
import org.apache.commons.lang3.StringUtils;
+import org.apache.pinot.common.function.scalar.ArithmeticFunctions;
import org.apache.pinot.common.request.context.ExpressionContext;
import org.apache.pinot.common.request.context.RequestContextUtils;
import org.apache.pinot.spi.data.FieldSpec.DataType;
@@ -1125,6 +1126,57 @@ public class ScalarTransformFunctionWrapperTest extends
BaseTransformFunctionTes
testTransformFunction(transformFunction, _bigDecimalSVValues);
}
+ @Test
+ public void testRandTransformFunction() {
+ ExpressionContext expression = RequestContextUtils.getExpression("rand()");
+ TransformFunction transformFunction =
TransformFunctionFactory.get(expression, _dataSourceMap);
+ assertTrue(transformFunction instanceof ScalarTransformFunctionWrapper);
+ assertEquals(transformFunction.getName(), "rand");
+ double[] firstValues =
Arrays.copyOf(transformFunction.transformToDoubleValuesSV(_projectionBlock),
NUM_ROWS);
+ for (double value : firstValues) {
+ assertTrue(value >= 0d && value < 1d, "rand() should return values in
[0, 1)");
+ }
+ double[] secondValues =
Arrays.copyOf(transformFunction.transformToDoubleValuesSV(_projectionBlock),
NUM_ROWS);
+ assertFalse(Arrays.equals(firstValues, secondValues),
+ "rand() should yield non-deterministic results across evaluations");
+ }
+
+ @Test
+ public void testRandWithSeedTransformFunction() {
+ ExpressionContext expression =
RequestContextUtils.getExpression("rand(42)");
+ TransformFunction transformFunction =
TransformFunctionFactory.get(expression, _dataSourceMap);
+ assertTrue(transformFunction instanceof ScalarTransformFunctionWrapper);
+ assertEquals(transformFunction.getName(), "rand");
+ double[] firstValues =
Arrays.copyOf(transformFunction.transformToDoubleValuesSV(_projectionBlock),
NUM_ROWS);
+ double[] secondValues =
Arrays.copyOf(transformFunction.transformToDoubleValuesSV(_projectionBlock),
NUM_ROWS);
+ assertTrue(Arrays.equals(firstValues, secondValues), "rand(seed) should be
deterministic");
+ double literalExpected = ArithmeticFunctions.rand(42L);
+ for (double value : firstValues) {
+ assertTrue(value >= 0d && value < 1d, "rand(seed) should return values
in [0, 1)");
+ assertEquals(value, literalExpected);
+ }
+
+ expression = RequestContextUtils.getExpression(String.format("rand(%s)",
INT_SV_COLUMN));
+ transformFunction = TransformFunctionFactory.get(expression,
_dataSourceMap);
+ assertTrue(transformFunction instanceof ScalarTransformFunctionWrapper);
+ double[] columnSeedValues =
Arrays.copyOf(transformFunction.transformToDoubleValuesSV(_projectionBlock),
NUM_ROWS);
+ double[] columnSeedValuesSecond =
+
Arrays.copyOf(transformFunction.transformToDoubleValuesSV(_projectionBlock),
NUM_ROWS);
+ assertTrue(Arrays.equals(columnSeedValues, columnSeedValuesSecond),
+ "rand(seedColumn) should be deterministic per seed");
+ boolean hasVariance = false;
+ for (int i = 1; i < NUM_ROWS; i++) {
+ if (Double.compare(columnSeedValues[i], columnSeedValues[0]) != 0) {
+ hasVariance = true;
+ break;
+ }
+ }
+ assertTrue(hasVariance, "rand(seedColumn) should produce varied outputs
when seeds differ");
+ for (double value : columnSeedValues) {
+ assertTrue(value >= 0d && value < 1d, "rand(seedColumn) should return
values in [0, 1)");
+ }
+ }
+
@Test
public void testStringLowerTransformFunctionNullLiteral() {
ExpressionContext expression =
diff --git
a/pinot-query-planner/src/main/java/org/apache/pinot/calcite/rel/rules/PinotEvaluateLiteralRule.java
b/pinot-query-planner/src/main/java/org/apache/pinot/calcite/rel/rules/PinotEvaluateLiteralRule.java
index fea69ab5d21..2e2b6c57d66 100644
---
a/pinot-query-planner/src/main/java/org/apache/pinot/calcite/rel/rules/PinotEvaluateLiteralRule.java
+++
b/pinot-query-planner/src/main/java/org/apache/pinot/calcite/rel/rules/PinotEvaluateLiteralRule.java
@@ -187,7 +187,7 @@ public class PinotEvaluateLiteralRule {
}
String canonicalName =
FunctionRegistry.canonicalize(PinotRuleUtils.extractFunctionName(rexCall));
FunctionInfo functionInfo =
FunctionRegistry.lookupFunctionInfo(canonicalName, argumentTypes);
- if (functionInfo == null) {
+ if (functionInfo == null || !functionInfo.isDeterministic()) {
// Function cannot be evaluated
return rexCall;
}
diff --git
a/pinot-query-planner/src/test/java/org/apache/pinot/query/QueryPlannerRuleOptionsTest.java
b/pinot-query-planner/src/test/java/org/apache/pinot/query/QueryPlannerRuleOptionsTest.java
index 0bcf5e11838..98d1885223d 100644
---
a/pinot-query-planner/src/test/java/org/apache/pinot/query/QueryPlannerRuleOptionsTest.java
+++
b/pinot-query-planner/src/test/java/org/apache/pinot/query/QueryPlannerRuleOptionsTest.java
@@ -217,6 +217,13 @@ public class QueryPlannerRuleOptionsTest extends
QueryEnvironmentTestBase {
//@formatter:on
}
+ @Test
+ public void testRandFunctionNotEvaluatedInMultiStagePlanner() {
+ String query = "EXPLAIN PLAN FOR SELECT rand() FROM b";
+ String explain = _queryEnvironment.explainQuery(query,
RANDOM_REQUEST_ID_GEN.nextLong());
+ assertTrue(explain.contains("RAND()"), "Expected RAND() to remain in
logical plan");
+ }
+
@Test
public void testDisablePinotProjectJoinTransposeRule() {
// Test the knob of turning off PinotProjectJoinTransposeRule
diff --git
a/pinot-spi/src/main/java/org/apache/pinot/spi/annotations/ScalarFunction.java
b/pinot-spi/src/main/java/org/apache/pinot/spi/annotations/ScalarFunction.java
index e80b6d0c3b4..f78b364ee57 100644
---
a/pinot-spi/src/main/java/org/apache/pinot/spi/annotations/ScalarFunction.java
+++
b/pinot-spi/src/main/java/org/apache/pinot/spi/annotations/ScalarFunction.java
@@ -62,5 +62,10 @@ public @interface ScalarFunction {
*/
boolean isVarArg() default false;
+ /**
+ * Whether the scalar function should be treated as deterministic (eligible
for compile-time evaluation).
+ */
+ boolean isDeterministic() default true;
+
@Deprecated boolean isPlaceholder() default false;
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]