This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch branch-4.1
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-4.1 by this push:
new 54d492c59ae branch-4.1: [Enhancement](mmhash) Support mmhash3_u64_v2
#61846 (#61925)
54d492c59ae is described below
commit 54d492c59ae88c167e17fc57c37c52605181ed9e
Author: github-actions[bot]
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Tue Mar 31 18:27:29 2026 +0800
branch-4.1: [Enhancement](mmhash) Support mmhash3_u64_v2 #61846 (#61925)
Cherry-picked from #61846
Co-authored-by: linrrarity <[email protected]>
---
be/src/exprs/function/function_hash.cpp | 9 ++-
.../doris/catalog/BuiltinScalarFunctions.java | 2 +
.../functions/scalar/MurmurHash3U64V2.java | 75 ++++++++++++++++++++++
.../expressions/visitor/ScalarFunctionVisitor.java | 5 ++
.../hash_functions/test_hash_function.out | 71 ++++++++++++++++++++
.../hash_functions/test_hash_function.groovy | 67 +++++++++++++++++++
6 files changed, 227 insertions(+), 2 deletions(-)
diff --git a/be/src/exprs/function/function_hash.cpp
b/be/src/exprs/function/function_hash.cpp
index 19145a80ba3..f07819c1f86 100644
--- a/be/src/exprs/function/function_hash.cpp
+++ b/be/src/exprs/function/function_hash.cpp
@@ -46,6 +46,8 @@ struct MurmurHash3Impl {
static constexpr auto get_name() {
if constexpr (ReturnType == TYPE_INT) {
return "murmur_hash3_32";
+ } else if constexpr (ReturnType == TYPE_LARGEINT) {
+ return "murmur_hash3_u64_v2";
} else if constexpr (is_mmh64_v2) {
return "murmur_hash3_64_v2";
} else {
@@ -98,7 +100,7 @@ struct MurmurHash3Impl {
} else {
col_to_data[i] = HashUtil::murmur_hash3_64<is_mmh64_v2>(
reinterpret_cast<const
char*>(&data[current_offset]),
- offsets[i] - current_offset, col_to_data[i]);
+ offsets[i] - current_offset,
static_cast<uint64_t>(col_to_data[i]));
}
current_offset = offsets[i];
}
@@ -111,7 +113,7 @@ struct MurmurHash3Impl {
HashUtil::murmur_hash3_32(value.data(),
value.size(), col_to_data[i]);
} else {
col_to_data[i] = HashUtil::murmur_hash3_64<is_mmh64_v2>(
- value.data(), value.size(), col_to_data[i]);
+ value.data(), value.size(),
static_cast<uint64_t>(col_to_data[i]));
}
}
} else {
@@ -129,6 +131,8 @@ using FunctionMurmurHash3_64 =
FunctionVariadicArgumentsBase<DataTypeInt64,
MurmurHash3Impl<TYPE_BIGINT>>;
using FunctionMurmurHash3_64_V2 =
FunctionVariadicArgumentsBase<DataTypeInt64,
MurmurHash3Impl<TYPE_BIGINT, true>>;
+using FunctionMurmurHash3U64V2 =
+ FunctionVariadicArgumentsBase<DataTypeInt128,
MurmurHash3Impl<TYPE_LARGEINT, true>>;
#ifdef BE_TEST
const char* murmur_hash3_get_name_type_int_for_test() {
@@ -230,6 +234,7 @@ void register_function_hash(SimpleFunctionFactory& factory)
{
factory.register_function<FunctionMurmurHash3_32>();
factory.register_function<FunctionMurmurHash3_64>();
factory.register_function<FunctionMurmurHash3_64_V2>();
+ factory.register_function<FunctionMurmurHash3U64V2>();
factory.register_function<FunctionXxHash_32>();
factory.register_function<FunctionXxHash_64>();
factory.register_alias("xxhash_64", "xxhash3_64");
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinScalarFunctions.java
b/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinScalarFunctions.java
index bb6bb4977e0..c9ba5ccc61a 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinScalarFunctions.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinScalarFunctions.java
@@ -372,6 +372,7 @@ import
org.apache.doris.nereids.trees.expressions.functions.scalar.MultiSearchAl
import
org.apache.doris.nereids.trees.expressions.functions.scalar.MurmurHash332;
import
org.apache.doris.nereids.trees.expressions.functions.scalar.MurmurHash364;
import
org.apache.doris.nereids.trees.expressions.functions.scalar.MurmurHash364V2;
+import
org.apache.doris.nereids.trees.expressions.functions.scalar.MurmurHash3U64V2;
import org.apache.doris.nereids.trees.expressions.functions.scalar.Negative;
import org.apache.doris.nereids.trees.expressions.functions.scalar.NextDay;
import org.apache.doris.nereids.trees.expressions.functions.scalar.NgramSearch;
@@ -938,6 +939,7 @@ public class BuiltinScalarFunctions implements
FunctionHelper {
scalar(MurmurHash332.class, "murmur_hash3_32"),
scalar(MurmurHash364.class, "murmur_hash3_64"),
scalar(MurmurHash364V2.class, "murmur_hash3_64_v2"),
+ scalar(MurmurHash3U64V2.class, "murmur_hash3_u64_v2"),
scalar(Negative.class, "negative"),
scalar(NextDay.class, "next_day"),
scalar(NonNullable.class, "non_nullable"),
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/MurmurHash3U64V2.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/MurmurHash3U64V2.java
new file mode 100644
index 00000000000..9ef87a50820
--- /dev/null
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/MurmurHash3U64V2.java
@@ -0,0 +1,75 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.nereids.trees.expressions.functions.scalar;
+
+import org.apache.doris.catalog.FunctionSignature;
+import org.apache.doris.nereids.trees.expressions.Expression;
+import
org.apache.doris.nereids.trees.expressions.functions.ExplicitlyCastableSignature;
+import org.apache.doris.nereids.trees.expressions.functions.PropagateNullable;
+import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor;
+import org.apache.doris.nereids.types.LargeIntType;
+import org.apache.doris.nereids.types.StringType;
+import org.apache.doris.nereids.types.VarcharType;
+import org.apache.doris.nereids.util.ExpressionUtils;
+
+import com.google.common.base.Preconditions;
+import com.google.common.collect.ImmutableList;
+
+import java.util.List;
+
+/**
+ * ScalarFunction 'murmur_hash3_u64_v2'.
+ */
+public class MurmurHash3U64V2 extends ScalarFunction
+ implements ExplicitlyCastableSignature, PropagateNullable {
+ public static final List<FunctionSignature> SIGNATURES = ImmutableList.of(
+
FunctionSignature.ret(LargeIntType.INSTANCE).varArgs(VarcharType.SYSTEM_DEFAULT),
+
FunctionSignature.ret(LargeIntType.INSTANCE).varArgs(StringType.INSTANCE)
+ );
+
+ /**
+ * constructor with 1 or more arguments.
+ */
+ public MurmurHash3U64V2(Expression arg, Expression... varArgs) {
+ super("murmur_hash3_u64_v2", ExpressionUtils.mergeArguments(arg,
varArgs));
+ }
+
+ /** constructor for withChildren and reuse signature */
+ private MurmurHash3U64V2(ScalarFunctionParams functionParams) {
+ super(functionParams);
+ }
+
+ /**
+ * withChildren.
+ */
+ @Override
+ public MurmurHash3U64V2 withChildren(List<Expression> children) {
+ Preconditions.checkArgument(!children.isEmpty());
+ return new MurmurHash3U64V2(getFunctionParams(children));
+ }
+
+ @Override
+ public List<FunctionSignature> getSignatures() {
+ return SIGNATURES;
+ }
+
+ @Override
+ public <R, C> R accept(ExpressionVisitor<R, C> visitor, C context) {
+ return visitor.visitMurmurHash3U64V2(this, context);
+ }
+}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/ScalarFunctionVisitor.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/ScalarFunctionVisitor.java
index a3cca3e5585..520e6193f3f 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/ScalarFunctionVisitor.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/ScalarFunctionVisitor.java
@@ -394,6 +394,7 @@ import
org.apache.doris.nereids.trees.expressions.functions.scalar.MultiSearchAl
import
org.apache.doris.nereids.trees.expressions.functions.scalar.MurmurHash332;
import
org.apache.doris.nereids.trees.expressions.functions.scalar.MurmurHash364;
import
org.apache.doris.nereids.trees.expressions.functions.scalar.MurmurHash364V2;
+import
org.apache.doris.nereids.trees.expressions.functions.scalar.MurmurHash3U64V2;
import org.apache.doris.nereids.trees.expressions.functions.scalar.Negative;
import org.apache.doris.nereids.trees.expressions.functions.scalar.NextDay;
import org.apache.doris.nereids.trees.expressions.functions.scalar.NgramSearch;
@@ -1989,6 +1990,10 @@ public interface ScalarFunctionVisitor<R, C> {
return visitScalarFunction(murmurHash364V2, context);
}
+ default R visitMurmurHash3U64V2(MurmurHash3U64V2 murmurHash3U64V2, C
context) {
+ return visitScalarFunction(murmurHash3U64V2, context);
+ }
+
default R visitXxHash32(XxHash32 xxHash32, C context) {
return visitScalarFunction(xxHash32, context);
}
diff --git
a/regression-test/data/query_p0/sql_functions/hash_functions/test_hash_function.out
b/regression-test/data/query_p0/sql_functions/hash_functions/test_hash_function.out
index e4d755e029e..764d533fcc7 100644
---
a/regression-test/data/query_p0/sql_functions/hash_functions/test_hash_function.out
+++
b/regression-test/data/query_p0/sql_functions/hash_functions/test_hash_function.out
@@ -29,6 +29,76 @@
-- !mmh3_64_v2_4 --
3669213779466221743
+-- !mmh3_64_v2_5 --
+-2648103510258542450
+
+-- !mmh3_64_v2_6 --
+-5640908359072688302
+
+-- !mmh3_u64_v2_1 --
+\N
+
+-- !mmh3_u64_v2_2 --
+4038800892574899471
+
+-- !mmh3_u64_v2_3 --
+5998619086395760910
+
+-- !mmh3_u64_v2_4 --
+3669213779466221743
+
+-- !mmh3_u64_v2_5 --
+15798640563451009166
+
+-- !mmh3_u64_v2_6 --
+12805835714636863314
+
+-- !mmh3_u64_v2_7 --
+0
+
+-- !mmh3_u64_v2_8 --
+9607679276477937801
+
+-- !mmh3_u64_v2_9 --
+17783800982478351481
+
+-- !mmh3_u64_v2_10 --
+10490885898849282672
+
+-- !mmh3_64_v2_table --
+1 4038800892574899471
+2 5998619086395760910
+3 \N
+4 0
+5 3669213779466221743
+6 -2648103510258542450
+7 -5640908359072688302
+8 5163374697039953916
+9 -1516026088323099476
+
+-- !mmh3_u64_v2_table --
+1 4038800892574899471
+2 5998619086395760910
+3 \N
+4 0
+5 3669213779466221743
+6 15798640563451009166
+7 12805835714636863314
+8 5163374697039953916
+9 16930717985386452140
+
+-- !mmh3_64_v2_fold_1 --
+-6017608668500074082
+
+-- !mmh3_64_v2_fold_2 --
+-4107623306750946434
+
+-- !mmh3_u64_v2_fold_1 --
+12429135405209477534
+
+-- !mmh3_u64_v2_fold_2 --
+14339120766958605182
+
-- !sql --
\N
@@ -46,3 +116,4 @@
-- !sql --
7001965798170371843
+
diff --git
a/regression-test/suites/query_p0/sql_functions/hash_functions/test_hash_function.groovy
b/regression-test/suites/query_p0/sql_functions/hash_functions/test_hash_function.groovy
index 74acd20a998..ace3379c99e 100644
---
a/regression-test/suites/query_p0/sql_functions/hash_functions/test_hash_function.groovy
+++
b/regression-test/suites/query_p0/sql_functions/hash_functions/test_hash_function.groovy
@@ -32,6 +32,73 @@ suite("test_hash_function", "arrow_flight_sql") {
qt_mmh3_64_v2_2 "SELECT MURMUR_HASH3_64_V2('1000209601_1756808272');"
qt_mmh3_64_v2_3 "SELECT MURMUR_HASH3_64_V2('hello world');"
qt_mmh3_64_v2_4 "SELECT MURMUR_HASH3_64_V2('apache doris');"
+ qt_mmh3_64_v2_5 "SELECT MURMUR_HASH3_64_V2('1013199993_1756808272');"
+ qt_mmh3_64_v2_6 "SELECT MURMUR_HASH3_64_V2('1020273884_1756808272');"
+
+ // murmur_hash3_u64_v2 tests
+ qt_mmh3_u64_v2_1 "SELECT MURMUR_HASH3_U64_V2(NULL);"
+ qt_mmh3_u64_v2_2 "SELECT MURMUR_HASH3_U64_V2('1000209601_1756808272');"
+ qt_mmh3_u64_v2_3 "SELECT MURMUR_HASH3_U64_V2('hello world');"
+ qt_mmh3_u64_v2_4 "SELECT MURMUR_HASH3_U64_V2('apache doris');"
+ qt_mmh3_u64_v2_5 "SELECT MURMUR_HASH3_U64_V2('1013199993_1756808272');"
+ qt_mmh3_u64_v2_6 "SELECT MURMUR_HASH3_U64_V2('1020273884_1756808272');"
+ qt_mmh3_u64_v2_7 "SELECT MURMUR_HASH3_U64_V2('');"
+ qt_mmh3_u64_v2_8 "SELECT MURMUR_HASH3_U64_V2('a');"
+ qt_mmh3_u64_v2_9 "SELECT MURMUR_HASH3_U64_V2('hello', 'world');"
+ qt_mmh3_u64_v2_10 "SELECT MURMUR_HASH3_U64_V2('hello', 'world', '!');"
+
+ // Validation: murmur_hash3_u64_v2 should equal (murmur_hash3_64_v2 &
2^64-1)
+ def validate_mmh3_u64_v2 = { String... args ->
+ def argList = args.collect { "'${it}'" }.join(', ')
+ def u64_res = sql "SELECT MURMUR_HASH3_U64_V2(${argList});"
+ def v2_masked = sql "SELECT CAST(MURMUR_HASH3_64_V2(${argList}) AS
LARGEINT) & 18446744073709551615;"
+ assertEquals(u64_res, v2_masked);
+ }
+
+ validate_mmh3_u64_v2('1000209601_1756808272');
+ validate_mmh3_u64_v2('hello world');
+ validate_mmh3_u64_v2('apache doris');
+ validate_mmh3_u64_v2('1013199993_1756808272');
+ validate_mmh3_u64_v2('1020273884_1756808272');
+ validate_mmh3_u64_v2('');
+ validate_mmh3_u64_v2('a');
+ validate_mmh3_u64_v2('你好🤣');
+ validate_mmh3_u64_v2('アパッチドリス');
+
+ // Table-based tests for mmh3_64_v2 and mmh3_u64_v2
+ sql "DROP TABLE IF EXISTS test_hash_tbl;"
+ sql """
+ CREATE TABLE test_hash_tbl (
+ id INT,
+ str_col VARCHAR(100)
+ ) DUPLICATE KEY(id)
+ DISTRIBUTED BY HASH(id) BUCKETS 1
+ PROPERTIES ("replication_num" = "1");
+ """
+
+ sql """
+ INSERT INTO test_hash_tbl VALUES
+ (1, '1000209601_1756808272'),
+ (2, 'hello world'),
+ (3, NULL),
+ (4, ''),
+ (5, 'apache doris'),
+ (6, '1013199993_1756808272'),
+ (7, '1020273884_1756808272'),
+ (8, '你好🤣'),
+ (9, 'アパッチドリス');
+ """
+
+ qt_mmh3_64_v2_table "SELECT id, MURMUR_HASH3_64_V2(str_col) FROM
test_hash_tbl ORDER BY id;"
+ qt_mmh3_u64_v2_table "SELECT id, MURMUR_HASH3_U64_V2(str_col) FROM
test_hash_tbl ORDER BY id;"
+
+ sql "DROP TABLE IF EXISTS test_hash_tbl;"
+
+ // Constant folding tests
+ qt_mmh3_64_v2_fold_1 "SELECT MURMUR_HASH3_64_V2('test') + 1;"
+ qt_mmh3_64_v2_fold_2 "SELECT MURMUR_HASH3_64_V2('a', 'b') * 2;"
+ qt_mmh3_u64_v2_fold_1 "SELECT MURMUR_HASH3_U64_V2('test') + 1;"
+ qt_mmh3_u64_v2_fold_2 "SELECT MURMUR_HASH3_U64_V2('a', 'b') * 2;"
qt_sql "SELECT xxhash_32(null);"
qt_sql "SELECT xxhash_32(\"hello\");"
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]