This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch branch-4.1
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-4.1 by this push:
     new 54d492c59ae branch-4.1: [Enhancement](mmhash) Support mmhash3_u64_v2 
#61846 (#61925)
54d492c59ae is described below

commit 54d492c59ae88c167e17fc57c37c52605181ed9e
Author: github-actions[bot] 
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Tue Mar 31 18:27:29 2026 +0800

    branch-4.1: [Enhancement](mmhash) Support mmhash3_u64_v2 #61846 (#61925)
    
    Cherry-picked from #61846
    
    Co-authored-by: linrrarity <[email protected]>
---
 be/src/exprs/function/function_hash.cpp            |  9 ++-
 .../doris/catalog/BuiltinScalarFunctions.java      |  2 +
 .../functions/scalar/MurmurHash3U64V2.java         | 75 ++++++++++++++++++++++
 .../expressions/visitor/ScalarFunctionVisitor.java |  5 ++
 .../hash_functions/test_hash_function.out          | 71 ++++++++++++++++++++
 .../hash_functions/test_hash_function.groovy       | 67 +++++++++++++++++++
 6 files changed, 227 insertions(+), 2 deletions(-)

diff --git a/be/src/exprs/function/function_hash.cpp 
b/be/src/exprs/function/function_hash.cpp
index 19145a80ba3..f07819c1f86 100644
--- a/be/src/exprs/function/function_hash.cpp
+++ b/be/src/exprs/function/function_hash.cpp
@@ -46,6 +46,8 @@ struct MurmurHash3Impl {
     static constexpr auto get_name() {
         if constexpr (ReturnType == TYPE_INT) {
             return "murmur_hash3_32";
+        } else if constexpr (ReturnType == TYPE_LARGEINT) {
+            return "murmur_hash3_u64_v2";
         } else if constexpr (is_mmh64_v2) {
             return "murmur_hash3_64_v2";
         } else {
@@ -98,7 +100,7 @@ struct MurmurHash3Impl {
                 } else {
                     col_to_data[i] = HashUtil::murmur_hash3_64<is_mmh64_v2>(
                             reinterpret_cast<const 
char*>(&data[current_offset]),
-                            offsets[i] - current_offset, col_to_data[i]);
+                            offsets[i] - current_offset, 
static_cast<uint64_t>(col_to_data[i]));
                 }
                 current_offset = offsets[i];
             }
@@ -111,7 +113,7 @@ struct MurmurHash3Impl {
                             HashUtil::murmur_hash3_32(value.data(), 
value.size(), col_to_data[i]);
                 } else {
                     col_to_data[i] = HashUtil::murmur_hash3_64<is_mmh64_v2>(
-                            value.data(), value.size(), col_to_data[i]);
+                            value.data(), value.size(), 
static_cast<uint64_t>(col_to_data[i]));
                 }
             }
         } else {
@@ -129,6 +131,8 @@ using FunctionMurmurHash3_64 =
         FunctionVariadicArgumentsBase<DataTypeInt64, 
MurmurHash3Impl<TYPE_BIGINT>>;
 using FunctionMurmurHash3_64_V2 =
         FunctionVariadicArgumentsBase<DataTypeInt64, 
MurmurHash3Impl<TYPE_BIGINT, true>>;
+using FunctionMurmurHash3U64V2 =
+        FunctionVariadicArgumentsBase<DataTypeInt128, 
MurmurHash3Impl<TYPE_LARGEINT, true>>;
 
 #ifdef BE_TEST
 const char* murmur_hash3_get_name_type_int_for_test() {
@@ -230,6 +234,7 @@ void register_function_hash(SimpleFunctionFactory& factory) 
{
     factory.register_function<FunctionMurmurHash3_32>();
     factory.register_function<FunctionMurmurHash3_64>();
     factory.register_function<FunctionMurmurHash3_64_V2>();
+    factory.register_function<FunctionMurmurHash3U64V2>();
     factory.register_function<FunctionXxHash_32>();
     factory.register_function<FunctionXxHash_64>();
     factory.register_alias("xxhash_64", "xxhash3_64");
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinScalarFunctions.java 
b/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinScalarFunctions.java
index bb6bb4977e0..c9ba5ccc61a 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinScalarFunctions.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinScalarFunctions.java
@@ -372,6 +372,7 @@ import 
org.apache.doris.nereids.trees.expressions.functions.scalar.MultiSearchAl
 import 
org.apache.doris.nereids.trees.expressions.functions.scalar.MurmurHash332;
 import 
org.apache.doris.nereids.trees.expressions.functions.scalar.MurmurHash364;
 import 
org.apache.doris.nereids.trees.expressions.functions.scalar.MurmurHash364V2;
+import 
org.apache.doris.nereids.trees.expressions.functions.scalar.MurmurHash3U64V2;
 import org.apache.doris.nereids.trees.expressions.functions.scalar.Negative;
 import org.apache.doris.nereids.trees.expressions.functions.scalar.NextDay;
 import org.apache.doris.nereids.trees.expressions.functions.scalar.NgramSearch;
@@ -938,6 +939,7 @@ public class BuiltinScalarFunctions implements 
FunctionHelper {
             scalar(MurmurHash332.class, "murmur_hash3_32"),
             scalar(MurmurHash364.class, "murmur_hash3_64"),
             scalar(MurmurHash364V2.class, "murmur_hash3_64_v2"),
+            scalar(MurmurHash3U64V2.class, "murmur_hash3_u64_v2"),
             scalar(Negative.class, "negative"),
             scalar(NextDay.class, "next_day"),
             scalar(NonNullable.class, "non_nullable"),
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/MurmurHash3U64V2.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/MurmurHash3U64V2.java
new file mode 100644
index 00000000000..9ef87a50820
--- /dev/null
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/MurmurHash3U64V2.java
@@ -0,0 +1,75 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.nereids.trees.expressions.functions.scalar;
+
+import org.apache.doris.catalog.FunctionSignature;
+import org.apache.doris.nereids.trees.expressions.Expression;
+import 
org.apache.doris.nereids.trees.expressions.functions.ExplicitlyCastableSignature;
+import org.apache.doris.nereids.trees.expressions.functions.PropagateNullable;
+import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor;
+import org.apache.doris.nereids.types.LargeIntType;
+import org.apache.doris.nereids.types.StringType;
+import org.apache.doris.nereids.types.VarcharType;
+import org.apache.doris.nereids.util.ExpressionUtils;
+
+import com.google.common.base.Preconditions;
+import com.google.common.collect.ImmutableList;
+
+import java.util.List;
+
+/**
+ * ScalarFunction 'murmur_hash3_u64_v2'.
+ */
+public class MurmurHash3U64V2 extends ScalarFunction
+        implements ExplicitlyCastableSignature, PropagateNullable {
+    public static final List<FunctionSignature> SIGNATURES = ImmutableList.of(
+            
FunctionSignature.ret(LargeIntType.INSTANCE).varArgs(VarcharType.SYSTEM_DEFAULT),
+            
FunctionSignature.ret(LargeIntType.INSTANCE).varArgs(StringType.INSTANCE)
+    );
+
+    /**
+     * constructor with 1 or more arguments.
+     */
+    public MurmurHash3U64V2(Expression arg, Expression... varArgs) {
+        super("murmur_hash3_u64_v2", ExpressionUtils.mergeArguments(arg, 
varArgs));
+    }
+
+    /** constructor for withChildren and reuse signature */
+    private MurmurHash3U64V2(ScalarFunctionParams functionParams) {
+        super(functionParams);
+    }
+
+    /**
+     * withChildren.
+     */
+    @Override
+    public MurmurHash3U64V2 withChildren(List<Expression> children) {
+        Preconditions.checkArgument(!children.isEmpty());
+        return new MurmurHash3U64V2(getFunctionParams(children));
+    }
+
+    @Override
+    public List<FunctionSignature> getSignatures() {
+        return SIGNATURES;
+    }
+
+    @Override
+    public <R, C> R accept(ExpressionVisitor<R, C> visitor, C context) {
+        return visitor.visitMurmurHash3U64V2(this, context);
+    }
+}
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/ScalarFunctionVisitor.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/ScalarFunctionVisitor.java
index a3cca3e5585..520e6193f3f 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/ScalarFunctionVisitor.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/ScalarFunctionVisitor.java
@@ -394,6 +394,7 @@ import 
org.apache.doris.nereids.trees.expressions.functions.scalar.MultiSearchAl
 import 
org.apache.doris.nereids.trees.expressions.functions.scalar.MurmurHash332;
 import 
org.apache.doris.nereids.trees.expressions.functions.scalar.MurmurHash364;
 import 
org.apache.doris.nereids.trees.expressions.functions.scalar.MurmurHash364V2;
+import 
org.apache.doris.nereids.trees.expressions.functions.scalar.MurmurHash3U64V2;
 import org.apache.doris.nereids.trees.expressions.functions.scalar.Negative;
 import org.apache.doris.nereids.trees.expressions.functions.scalar.NextDay;
 import org.apache.doris.nereids.trees.expressions.functions.scalar.NgramSearch;
@@ -1989,6 +1990,10 @@ public interface ScalarFunctionVisitor<R, C> {
         return visitScalarFunction(murmurHash364V2, context);
     }
 
+    default R visitMurmurHash3U64V2(MurmurHash3U64V2 murmurHash3U64V2, C 
context) {
+        return visitScalarFunction(murmurHash3U64V2, context);
+    }
+
     default R visitXxHash32(XxHash32 xxHash32, C context) {
         return visitScalarFunction(xxHash32, context);
     }
diff --git 
a/regression-test/data/query_p0/sql_functions/hash_functions/test_hash_function.out
 
b/regression-test/data/query_p0/sql_functions/hash_functions/test_hash_function.out
index e4d755e029e..764d533fcc7 100644
--- 
a/regression-test/data/query_p0/sql_functions/hash_functions/test_hash_function.out
+++ 
b/regression-test/data/query_p0/sql_functions/hash_functions/test_hash_function.out
@@ -29,6 +29,76 @@
 -- !mmh3_64_v2_4 --
 3669213779466221743
 
+-- !mmh3_64_v2_5 --
+-2648103510258542450
+
+-- !mmh3_64_v2_6 --
+-5640908359072688302
+
+-- !mmh3_u64_v2_1 --
+\N
+
+-- !mmh3_u64_v2_2 --
+4038800892574899471
+
+-- !mmh3_u64_v2_3 --
+5998619086395760910
+
+-- !mmh3_u64_v2_4 --
+3669213779466221743
+
+-- !mmh3_u64_v2_5 --
+15798640563451009166
+
+-- !mmh3_u64_v2_6 --
+12805835714636863314
+
+-- !mmh3_u64_v2_7 --
+0
+
+-- !mmh3_u64_v2_8 --
+9607679276477937801
+
+-- !mmh3_u64_v2_9 --
+17783800982478351481
+
+-- !mmh3_u64_v2_10 --
+10490885898849282672
+
+-- !mmh3_64_v2_table --
+1      4038800892574899471
+2      5998619086395760910
+3      \N
+4      0
+5      3669213779466221743
+6      -2648103510258542450
+7      -5640908359072688302
+8      5163374697039953916
+9      -1516026088323099476
+
+-- !mmh3_u64_v2_table --
+1      4038800892574899471
+2      5998619086395760910
+3      \N
+4      0
+5      3669213779466221743
+6      15798640563451009166
+7      12805835714636863314
+8      5163374697039953916
+9      16930717985386452140
+
+-- !mmh3_64_v2_fold_1 --
+-6017608668500074082
+
+-- !mmh3_64_v2_fold_2 --
+-4107623306750946434
+
+-- !mmh3_u64_v2_fold_1 --
+12429135405209477534
+
+-- !mmh3_u64_v2_fold_2 --
+14339120766958605182
+
 -- !sql --
 \N
 
@@ -46,3 +116,4 @@
 
 -- !sql --
 7001965798170371843
+
diff --git 
a/regression-test/suites/query_p0/sql_functions/hash_functions/test_hash_function.groovy
 
b/regression-test/suites/query_p0/sql_functions/hash_functions/test_hash_function.groovy
index 74acd20a998..ace3379c99e 100644
--- 
a/regression-test/suites/query_p0/sql_functions/hash_functions/test_hash_function.groovy
+++ 
b/regression-test/suites/query_p0/sql_functions/hash_functions/test_hash_function.groovy
@@ -32,6 +32,73 @@ suite("test_hash_function", "arrow_flight_sql") {
     qt_mmh3_64_v2_2 "SELECT MURMUR_HASH3_64_V2('1000209601_1756808272');"
     qt_mmh3_64_v2_3 "SELECT MURMUR_HASH3_64_V2('hello world');"
     qt_mmh3_64_v2_4 "SELECT MURMUR_HASH3_64_V2('apache doris');"
+    qt_mmh3_64_v2_5 "SELECT MURMUR_HASH3_64_V2('1013199993_1756808272');"
+    qt_mmh3_64_v2_6 "SELECT MURMUR_HASH3_64_V2('1020273884_1756808272');"
+
+    // murmur_hash3_u64_v2 tests
+    qt_mmh3_u64_v2_1 "SELECT MURMUR_HASH3_U64_V2(NULL);"
+    qt_mmh3_u64_v2_2 "SELECT MURMUR_HASH3_U64_V2('1000209601_1756808272');"
+    qt_mmh3_u64_v2_3 "SELECT MURMUR_HASH3_U64_V2('hello world');"
+    qt_mmh3_u64_v2_4 "SELECT MURMUR_HASH3_U64_V2('apache doris');"
+    qt_mmh3_u64_v2_5 "SELECT MURMUR_HASH3_U64_V2('1013199993_1756808272');"
+    qt_mmh3_u64_v2_6 "SELECT MURMUR_HASH3_U64_V2('1020273884_1756808272');"
+    qt_mmh3_u64_v2_7 "SELECT MURMUR_HASH3_U64_V2('');"
+    qt_mmh3_u64_v2_8 "SELECT MURMUR_HASH3_U64_V2('a');"
+    qt_mmh3_u64_v2_9 "SELECT MURMUR_HASH3_U64_V2('hello', 'world');"
+    qt_mmh3_u64_v2_10 "SELECT MURMUR_HASH3_U64_V2('hello', 'world', '!');"
+
+    // Validation: murmur_hash3_u64_v2 should equal (murmur_hash3_64_v2 & 
2^64-1)
+    def validate_mmh3_u64_v2 = { String... args ->
+        def argList = args.collect { "'${it}'" }.join(', ')
+        def u64_res = sql "SELECT MURMUR_HASH3_U64_V2(${argList});"
+        def v2_masked = sql "SELECT CAST(MURMUR_HASH3_64_V2(${argList}) AS 
LARGEINT) & 18446744073709551615;"
+        assertEquals(u64_res, v2_masked);
+    }
+
+    validate_mmh3_u64_v2('1000209601_1756808272');
+    validate_mmh3_u64_v2('hello world');
+    validate_mmh3_u64_v2('apache doris');
+    validate_mmh3_u64_v2('1013199993_1756808272');
+    validate_mmh3_u64_v2('1020273884_1756808272');
+    validate_mmh3_u64_v2('');
+    validate_mmh3_u64_v2('a');
+    validate_mmh3_u64_v2('你好🤣');
+    validate_mmh3_u64_v2('アパッチドリス');
+
+    // Table-based tests for mmh3_64_v2 and mmh3_u64_v2
+    sql "DROP TABLE IF EXISTS test_hash_tbl;"
+    sql """
+        CREATE TABLE test_hash_tbl (
+            id INT,
+            str_col VARCHAR(100)
+        ) DUPLICATE KEY(id)
+        DISTRIBUTED BY HASH(id) BUCKETS 1
+        PROPERTIES ("replication_num" = "1");
+    """
+
+    sql """
+        INSERT INTO test_hash_tbl VALUES
+        (1, '1000209601_1756808272'),
+        (2, 'hello world'),
+        (3, NULL),
+        (4, ''),
+        (5, 'apache doris'),
+        (6, '1013199993_1756808272'),
+        (7, '1020273884_1756808272'),
+        (8, '你好🤣'),
+        (9, 'アパッチドリス');
+    """
+
+    qt_mmh3_64_v2_table "SELECT id, MURMUR_HASH3_64_V2(str_col) FROM 
test_hash_tbl ORDER BY id;"
+    qt_mmh3_u64_v2_table "SELECT id, MURMUR_HASH3_U64_V2(str_col) FROM 
test_hash_tbl ORDER BY id;"
+
+    sql "DROP TABLE IF EXISTS test_hash_tbl;"
+
+    // Constant folding tests
+    qt_mmh3_64_v2_fold_1 "SELECT MURMUR_HASH3_64_V2('test') + 1;"
+    qt_mmh3_64_v2_fold_2 "SELECT MURMUR_HASH3_64_V2('a', 'b') * 2;"
+    qt_mmh3_u64_v2_fold_1 "SELECT MURMUR_HASH3_U64_V2('test') + 1;"
+    qt_mmh3_u64_v2_fold_2 "SELECT MURMUR_HASH3_U64_V2('a', 'b') * 2;"
 
     qt_sql "SELECT xxhash_32(null);"
     qt_sql "SELECT xxhash_32(\"hello\");"


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to