This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git

commit 8f77e6363a242b6153ff38c00f557942a9cfbb07
Author: yangshijie <sjyang2...@zju.edu.cn>
AuthorDate: Fri Feb 23 11:07:50 2024 +0800

    [Feature](function) Support xxhash function like murmur hash function 
(#31193)
---
 be/src/util/hash_util.hpp                          |  11 ++
 be/src/vec/functions/function_hash.cpp             | 153 +++++++++++++--------
 be/test/vec/function/function_hash_test.cpp        |  64 +++++++++
 .../hash-functions/murmur-hash3-32.md              |   2 +
 .../hash-functions/murmur-hash3-64.md              |   2 +
 .../{murmur-hash3-32.md => xxhash-32.md}           |  52 +++----
 .../sql-functions/hash-functions/xxhash-64.md      |  85 ++++++++++++
 docs/sidebars.json                                 |   4 +-
 .../hash-functions/murmur-hash3-32.md              |   4 +-
 .../hash-functions/murmur-hash3-64.md              |   4 +-
 .../{murmur-hash3-32.md => xxhash-32.md}           |  53 +++----
 .../sql-functions/hash-functions/xxhash-64.md      |  84 +++++++++++
 .../doris/catalog/BuiltinScalarFunctions.java      |   4 +
 .../expressions/functions/scalar/XxHash32.java     |  72 ++++++++++
 .../expressions/functions/scalar/XxHash64.java     |  72 ++++++++++
 .../expressions/visitor/ScalarFunctionVisitor.java |  10 ++
 gensrc/script/doris_builtins_functions.py          |   6 +-
 .../hash_functions/test_hash_function.out          |  17 +++
 .../hash_functions/test_hash_function.out          |  17 +++
 .../hash_functions/test_hash_function.groovy       |   8 ++
 .../hash_functions/test_hash_function.groovy       |   8 ++
 21 files changed, 617 insertions(+), 115 deletions(-)

diff --git a/be/src/util/hash_util.hpp b/be/src/util/hash_util.hpp
index afa8a145386..402797a8e35 100644
--- a/be/src/util/hash_util.hpp
+++ b/be/src/util/hash_util.hpp
@@ -335,6 +335,16 @@ public:
 #endif
     // xxHash function for a byte array.  For convenience, a 64-bit seed is 
also
     // hashed into the result.  The mapping may change from time to time.
+    static xxh_u32 xxHash32WithSeed(const char* s, size_t len, xxh_u32 seed) {
+        return XXH32(s, len, seed);
+    }
+
+    // same to the up function, just for null value
+    static xxh_u32 xxHash32NullWithSeed(xxh_u32 seed) {
+        static const int INT_VALUE = 0;
+        return XXH32(reinterpret_cast<const char*>(&INT_VALUE), sizeof(int), 
seed);
+    }
+
     static xxh_u64 xxHash64WithSeed(const char* s, size_t len, xxh_u64 seed) {
         return XXH3_64bits_withSeed(s, len, seed);
     }
@@ -344,6 +354,7 @@ public:
         static const int INT_VALUE = 0;
         return XXH3_64bits_withSeed(reinterpret_cast<const char*>(&INT_VALUE), 
sizeof(int), seed);
     }
+
 #if defined(__clang__)
 #pragma clang diagnostic pop
 #endif
diff --git a/be/src/vec/functions/function_hash.cpp 
b/be/src/vec/functions/function_hash.cpp
index cb8dfc09434..195dff94836 100644
--- a/be/src/vec/functions/function_hash.cpp
+++ b/be/src/vec/functions/function_hash.cpp
@@ -41,21 +41,82 @@ namespace doris::vectorized {
 constexpr uint64_t emtpy_value = 0xe28dbde7fe22e41c;
 
 template <typename ReturnType>
-struct MurmurHash3ImplName {};
+struct MurmurHash3Impl {
+    static constexpr auto name =
+            std::is_same_v<ReturnType, Int32> ? "murmur_hash3_32" : 
"murmur_hash3_64";
 
-template <>
-struct MurmurHash3ImplName<Int32> {
-    static constexpr auto name = "murmur_hash3_32";
-};
+    static Status empty_apply(IColumn& icolumn, size_t input_rows_count) {
+        ColumnVector<ReturnType>& vec_to = 
assert_cast<ColumnVector<ReturnType>&>(icolumn);
+        vec_to.get_data().assign(input_rows_count, 
static_cast<ReturnType>(emtpy_value));
+        return Status::OK();
+    }
+
+    static Status first_apply(const IDataType* type, const IColumn* column, 
size_t input_rows_count,
+                              IColumn& icolumn) {
+        return execute<true>(type, column, input_rows_count, icolumn);
+    }
+
+    static Status combine_apply(const IDataType* type, const IColumn* column,
+                                size_t input_rows_count, IColumn& icolumn) {
+        return execute<false>(type, column, input_rows_count, icolumn);
+    }
 
-template <>
-struct MurmurHash3ImplName<Int64> {
-    static constexpr auto name = "murmur_hash3_64";
+    template <bool first>
+    static Status execute(const IDataType* type, const IColumn* column, size_t 
input_rows_count,
+                          IColumn& col_to) {
+        auto& to_column = assert_cast<ColumnVector<ReturnType>&>(col_to);
+        if constexpr (first) {
+            if constexpr (std::is_same_v<ReturnType, Int32>) {
+                to_column.fill(static_cast<Int32>(HashUtil::MURMUR3_32_SEED), 
input_rows_count);
+            } else {
+                to_column.insert_many_defaults(input_rows_count);
+            }
+        }
+        auto& col_to_data = to_column.get_data();
+        if (const auto* col_from = check_and_get_column<ColumnString>(column)) 
{
+            const typename ColumnString::Chars& data = col_from->get_chars();
+            const typename ColumnString::Offsets& offsets = 
col_from->get_offsets();
+            size_t size = offsets.size();
+            ColumnString::Offset current_offset = 0;
+            for (size_t i = 0; i < size; ++i) {
+                if constexpr (std::is_same_v<ReturnType, Int32>) {
+                    col_to_data[i] = HashUtil::murmur_hash3_32(
+                            reinterpret_cast<const 
char*>(&data[current_offset]),
+                            offsets[i] - current_offset, col_to_data[i]);
+                } else {
+                    murmur_hash3_x64_64(reinterpret_cast<const 
char*>(&data[current_offset]),
+                                        offsets[i] - current_offset, 
col_to_data[i],
+                                        col_to_data.data() + i);
+                }
+                current_offset = offsets[i];
+            }
+        } else if (const ColumnConst* col_from_const =
+                           
check_and_get_column_const_string_or_fixedstring(column)) {
+            auto value = col_from_const->get_value<String>();
+            for (size_t i = 0; i < input_rows_count; ++i) {
+                if constexpr (std::is_same_v<ReturnType, Int32>) {
+                    col_to_data[i] =
+                            HashUtil::murmur_hash3_32(value.data(), 
value.size(), col_to_data[i]);
+                } else {
+                    murmur_hash3_x64_64(value.data(), value.size(), 
col_to_data[i],
+                                        col_to_data.data() + i);
+                }
+            }
+        } else {
+            DCHECK(false);
+            return Status::NotSupported("Illegal column {} of argument of 
function {}",
+                                        column->get_name(), name);
+        }
+        return Status::OK();
+    }
 };
 
+using FunctionMurmurHash3_32 = FunctionVariadicArgumentsBase<DataTypeInt32, 
MurmurHash3Impl<Int32>>;
+using FunctionMurmurHash3_64 = FunctionVariadicArgumentsBase<DataTypeInt64, 
MurmurHash3Impl<Int64>>;
+
 template <typename ReturnType>
-struct MurmurHash3Impl {
-    static constexpr auto name = MurmurHash3ImplName<ReturnType>::name;
+struct XxHashImpl {
+    static constexpr auto name = std::is_same_v<ReturnType, Int32> ? 
"xxhash_32" : "xxhash_64";
 
     static Status empty_apply(IColumn& icolumn, size_t input_rows_count) {
         ColumnVector<ReturnType>& vec_to = 
assert_cast<ColumnVector<ReturnType>&>(icolumn);
@@ -76,40 +137,25 @@ struct MurmurHash3Impl {
     template <bool first>
     static Status execute(const IDataType* type, const IColumn* column, size_t 
input_rows_count,
                           IColumn& col_to) {
-        auto* col_to_data = 
assert_cast<ColumnVector<ReturnType>&>(col_to).get_data().data();
+        auto& to_column = assert_cast<ColumnVector<ReturnType>&>(col_to);
+        if constexpr (first) {
+            to_column.insert_many_defaults(input_rows_count);
+        }
+        auto& col_to_data = to_column.get_data();
         if (const auto* col_from = check_and_get_column<ColumnString>(column)) 
{
             const typename ColumnString::Chars& data = col_from->get_chars();
             const typename ColumnString::Offsets& offsets = 
col_from->get_offsets();
             size_t size = offsets.size();
-
             ColumnString::Offset current_offset = 0;
             for (size_t i = 0; i < size; ++i) {
-                if (first) {
-                    if constexpr (std::is_same_v<ReturnType, Int32>) {
-                        UInt32 val = HashUtil::murmur_hash3_32(
-                                reinterpret_cast<const 
char*>(&data[current_offset]),
-                                offsets[i] - current_offset, 
HashUtil::MURMUR3_32_SEED);
-                        col_to.insert_data(const_cast<const 
char*>(reinterpret_cast<char*>(&val)),
-                                           0);
-                    } else {
-                        UInt64 val = 0;
-                        murmur_hash3_x64_64(reinterpret_cast<const 
char*>(&data[current_offset]),
-                                            offsets[i] - current_offset, 0, 
&val);
-                        col_to.insert_data(const_cast<const 
char*>(reinterpret_cast<char*>(&val)),
-                                           0);
-                    }
+                if constexpr (std::is_same_v<ReturnType, Int32>) {
+                    col_to_data[i] = HashUtil::xxHash32WithSeed(
+                            reinterpret_cast<const 
char*>(&data[current_offset]),
+                            offsets[i] - current_offset, col_to_data[i]);
                 } else {
-                    if constexpr (std::is_same_v<ReturnType, Int32>) {
-                        col_to_data[i] = HashUtil::murmur_hash3_32(
-                                reinterpret_cast<const 
char*>(&data[current_offset]),
-                                offsets[i] - current_offset,
-                                
assert_cast<ColumnInt32&>(col_to).get_data()[i]);
-                    } else {
-                        murmur_hash3_x64_64(reinterpret_cast<const 
char*>(&data[current_offset]),
-                                            offsets[i] - current_offset,
-                                            
assert_cast<ColumnInt64&>(col_to).get_data()[i],
-                                            col_to_data + i);
-                    }
+                    col_to_data[i] = HashUtil::xxHash64WithSeed(
+                            reinterpret_cast<const 
char*>(&data[current_offset]),
+                            offsets[i] - current_offset, col_to_data[i]);
                 }
                 current_offset = offsets[i];
             }
@@ -117,28 +163,12 @@ struct MurmurHash3Impl {
                            
check_and_get_column_const_string_or_fixedstring(column)) {
             auto value = col_from_const->get_value<String>();
             for (size_t i = 0; i < input_rows_count; ++i) {
-                if (first) {
-                    if constexpr (std::is_same_v<ReturnType, Int32>) {
-                        UInt32 val = HashUtil::murmur_hash3_32(value.data(), 
value.size(),
-                                                               
HashUtil::MURMUR3_32_SEED);
-                        col_to.insert_data(const_cast<const 
char*>(reinterpret_cast<char*>(&val)),
-                                           0);
-                    } else {
-                        UInt64 val = 0;
-                        murmur_hash3_x64_64(value.data(), value.size(), 0, 
&val);
-                        col_to.insert_data(const_cast<const 
char*>(reinterpret_cast<char*>(&val)),
-                                           0);
-                    }
+                if constexpr (std::is_same_v<ReturnType, Int32>) {
+                    col_to_data[i] =
+                            HashUtil::xxHash32WithSeed(value.data(), 
value.size(), col_to_data[i]);
                 } else {
-                    if constexpr (std::is_same_v<ReturnType, Int32>) {
-                        col_to_data[i] = HashUtil::murmur_hash3_32(
-                                value.data(), value.size(),
-                                
assert_cast<ColumnInt32&>(col_to).get_data()[i]);
-                    } else {
-                        murmur_hash3_x64_64(value.data(), value.size(),
-                                            
assert_cast<ColumnInt64&>(col_to).get_data()[i],
-                                            col_to_data + i);
-                    }
+                    col_to_data[i] =
+                            HashUtil::xxHash64WithSeed(value.data(), 
value.size(), col_to_data[i]);
                 }
             }
         } else {
@@ -149,11 +179,14 @@ struct MurmurHash3Impl {
         return Status::OK();
     }
 };
-using FunctionMurmurHash3_32 = FunctionVariadicArgumentsBase<DataTypeInt32, 
MurmurHash3Impl<Int32>>;
-using FunctionMurmurHash3_64 = FunctionVariadicArgumentsBase<DataTypeInt64, 
MurmurHash3Impl<Int64>>;
+
+using FunctionXxHash_32 = FunctionVariadicArgumentsBase<DataTypeInt32, 
XxHashImpl<Int32>>;
+using FunctionXxHash_64 = FunctionVariadicArgumentsBase<DataTypeInt64, 
XxHashImpl<Int64>>;
 
 void register_function_hash(SimpleFunctionFactory& factory) {
     factory.register_function<FunctionMurmurHash3_32>();
     factory.register_function<FunctionMurmurHash3_64>();
+    factory.register_function<FunctionXxHash_32>();
+    factory.register_function<FunctionXxHash_64>();
 }
 } // namespace doris::vectorized
\ No newline at end of file
diff --git a/be/test/vec/function/function_hash_test.cpp 
b/be/test/vec/function/function_hash_test.cpp
index 10c57d1c31f..4d2cf6be4b4 100644
--- a/be/test/vec/function/function_hash_test.cpp
+++ b/be/test/vec/function/function_hash_test.cpp
@@ -94,4 +94,68 @@ TEST(HashFunctionTest, murmur_hash_3_64_test) {
     };
 }
 
+TEST(HashFunctionTest, xxhash_32_test) {
+    std::string func_name = "xxhash_32";
+
+    {
+        InputTypeSet input_types = {TypeIndex::String};
+
+        DataSet data_set = {{{Null()}, Null()}, {{std::string("hello")}, 
(int32_t)-83855367}};
+
+        static_cast<void>(check_function<DataTypeInt32, true>(func_name, 
input_types, data_set));
+    };
+
+    {
+        InputTypeSet input_types = {TypeIndex::String, TypeIndex::String};
+
+        DataSet data_set = {{{std::string("hello"), std::string("world")}, 
(int32_t)-920844969},
+                            {{std::string("hello"), Null()}, Null()}};
+
+        static_cast<void>(check_function<DataTypeInt32, true>(func_name, 
input_types, data_set));
+    };
+
+    {
+        InputTypeSet input_types = {TypeIndex::String, TypeIndex::String, 
TypeIndex::String};
+
+        DataSet data_set = {{{std::string("hello"), std::string("world"), 
std::string("!")},
+                             (int32_t)352087701},
+                            {{std::string("hello"), std::string("world"), 
Null()}, Null()}};
+
+        static_cast<void>(check_function<DataTypeInt32, true>(func_name, 
input_types, data_set));
+    };
+}
+
+TEST(HashFunctionTest, xxhash_64_test) {
+    std::string func_name = "xxhash_64";
+
+    {
+        InputTypeSet input_types = {TypeIndex::String};
+
+        DataSet data_set = {{{Null()}, Null()},
+                            {{std::string("hello")}, 
(int64_t)-7685981735718036227}};
+
+        static_cast<void>(check_function<DataTypeInt64, true>(func_name, 
input_types, data_set));
+    };
+
+    {
+        InputTypeSet input_types = {TypeIndex::String, TypeIndex::String};
+
+        DataSet data_set = {
+                {{std::string("hello"), std::string("world")}, 
(int64_t)7001965798170371843},
+                {{std::string("hello"), Null()}, Null()}};
+
+        static_cast<void>(check_function<DataTypeInt64, true>(func_name, 
input_types, data_set));
+    };
+
+    {
+        InputTypeSet input_types = {TypeIndex::String, TypeIndex::String, 
TypeIndex::String};
+
+        DataSet data_set = {{{std::string("hello"), std::string("world"), 
std::string("!")},
+                             (int64_t)6796829678999971400},
+                            {{std::string("hello"), std::string("world"), 
Null()}, Null()}};
+
+        static_cast<void>(check_function<DataTypeInt64, true>(func_name, 
input_types, data_set));
+    };
+}
+
 } // namespace doris::vectorized
diff --git 
a/docs/en/docs/sql-manual/sql-functions/hash-functions/murmur-hash3-32.md 
b/docs/en/docs/sql-manual/sql-functions/hash-functions/murmur-hash3-32.md
index 7610d4ea27d..051a5c262ff 100644
--- a/docs/en/docs/sql-manual/sql-functions/hash-functions/murmur-hash3-32.md
+++ b/docs/en/docs/sql-manual/sql-functions/hash-functions/murmur-hash3-32.md
@@ -31,6 +31,8 @@ under the License.
 
 Return the 32 bits murmur3 hash of input string.
 
+Note: When calculating hash values, it is more recommended to use `xxhash_32` 
instead of `murmur_hash3_32`.
+
 ### example
 
 ```
diff --git 
a/docs/en/docs/sql-manual/sql-functions/hash-functions/murmur-hash3-64.md 
b/docs/en/docs/sql-manual/sql-functions/hash-functions/murmur-hash3-64.md
index d1965f3ed01..fb9d1dd6217 100644
--- a/docs/en/docs/sql-manual/sql-functions/hash-functions/murmur-hash3-64.md
+++ b/docs/en/docs/sql-manual/sql-functions/hash-functions/murmur-hash3-64.md
@@ -31,6 +31,8 @@ under the License.
 
 Return the 64 bits murmur3 hash of input string.
 
+Note: When calculating hash values, it is more recommended to use `xxhash_64` 
instead of `murmur_hash3_64`.
+
 ### example
 
 ```
diff --git 
a/docs/en/docs/sql-manual/sql-functions/hash-functions/murmur-hash3-32.md 
b/docs/en/docs/sql-manual/sql-functions/hash-functions/xxhash-32.md
similarity index 53%
copy from 
docs/en/docs/sql-manual/sql-functions/hash-functions/murmur-hash3-32.md
copy to docs/en/docs/sql-manual/sql-functions/hash-functions/xxhash-32.md
index 7610d4ea27d..3707d7a70c3 100644
--- a/docs/en/docs/sql-manual/sql-functions/hash-functions/murmur-hash3-32.md
+++ b/docs/en/docs/sql-manual/sql-functions/hash-functions/xxhash-32.md
@@ -1,6 +1,6 @@
 ---
 {
-    "title": "MURMUR_HASH3_32",
+    "title": "XXHASH_32",
     "language": "en"
 }
 ---
@@ -22,40 +22,42 @@ specific language governing permissions and limitations
 under the License.
 -->
 
-## murmur_hash3_32
+## xxhash_32
 
 ### description
 #### Syntax
 
-`INT MURMUR_HASH3_32(VARCHAR input, ...)`
+`INT XXHASH_32(VARCHAR input, ...)`
 
-Return the 32 bits murmur3 hash of input string.
+Return the 32 bits xxhash of input string.
+
+Note: When calculating hash values, it is more recommended to use `xxhash_32` 
instead of `murmur_hash3_32`.
 
 ### example
 
 ```
-mysql> select murmur_hash3_32(null);
-+-----------------------+
-| murmur_hash3_32(NULL) |
-+-----------------------+
-|                  NULL |
-+-----------------------+
-
-mysql> select murmur_hash3_32("hello");
-+--------------------------+
-| murmur_hash3_32('hello') |
-+--------------------------+
-|               1321743225 |
-+--------------------------+
-
-mysql> select murmur_hash3_32("hello", "world");
-+-----------------------------------+
-| murmur_hash3_32('hello', 'world') |
-+-----------------------------------+
-|                         984713481 |
-+-----------------------------------+
+mysql> select xxhash_32(NULL);
++-----------------+
+| xxhash_32(NULL) |
++-----------------+
+|            NULL |
++-----------------+
+
+mysql> select xxhash_32("hello");
++--------------------+
+| xxhash_32('hello') |
++--------------------+
+|          -83855367 |
++--------------------+
+
+mysql> select xxhash_32("hello", "world");
++-----------------------------+
+| xxhash_32('hello', 'world') |
++-----------------------------+
+|                  -920844969 |
++-----------------------------+
 ```
 
 ### keywords
 
-    MURMUR_HASH3_32,HASH
+XXHASH_32,HASH
diff --git a/docs/en/docs/sql-manual/sql-functions/hash-functions/xxhash-64.md 
b/docs/en/docs/sql-manual/sql-functions/hash-functions/xxhash-64.md
new file mode 100644
index 00000000000..506613177e9
--- /dev/null
+++ b/docs/en/docs/sql-manual/sql-functions/hash-functions/xxhash-64.md
@@ -0,0 +1,85 @@
+---
+{
+    "title": "XXHASH_64",
+    "language": "en"
+}
+---
+
+<!-- 
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+  http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+## xxhash_64
+
+### description
+#### Syntax
+
+`BIGINT XXHASH_64(VARCHAR input, ...)`
+
+Return the 64 bits xxhash of input string.
+
+Note: When calculating hash values, it is more recommended to use `xxhash_64` 
instead of `murmur_hash3_64`.
+
+### example
+
+```
+mysql> select xxhash_64(NULL);
++-----------------+
+| xxhash_64(NULL) |
++-----------------+
+|            NULL |
++-----------------+
+
+mysql> select xxhash_64("hello");
++----------------------+
+| xxhash_64('hello')   |
++----------------------+
+| -7685981735718036227 |
++----------------------+
+
+mysql> select xxhash_64("hello", "world");
++-----------------------------+
+| xxhash_64('hello', 'world') |
++-----------------------------+
+|         7001965798170371843 |
++-----------------------------+
+```
+
+### benchmark
+
+Through TPCH Benchmark testing, it was found that `xxhash_64` has 
significantly improved performance compared to `murmur_hash3_64`. Therefore, in 
scenarios where hash values need to be calculated, it is more recommended to 
use `xxhash_64`.
+
+```
+mysql> select count(murmur_hash3_64(l_comment)) from lineitem;
++-----------------------------------+
+| count(murmur_hash3_64(l_comment)) |
++-----------------------------------+
+|                         600037902 |
++-----------------------------------+
+1 row in set (17.18 sec)
+
+mysql> select count(xxhash_64(l_comment)) from lineitem;
++-----------------------------+
+| count(xxhash_64(l_comment)) |
++-----------------------------+
+|                   600037902 |
++-----------------------------+
+1 row in set (8.41 sec)
+```
+
+### keywords
+
+XXHASH_64,HASH
diff --git a/docs/sidebars.json b/docs/sidebars.json
index 48b3c3eea5f..0d07a890eac 100644
--- a/docs/sidebars.json
+++ b/docs/sidebars.json
@@ -708,7 +708,9 @@
                             "label": "Hash Functions",
                             "items": [
                                 
"sql-manual/sql-functions/hash-functions/murmur-hash3-32",
-                                
"sql-manual/sql-functions/hash-functions/murmur-hash3-64"
+                                
"sql-manual/sql-functions/hash-functions/murmur-hash3-64",
+                                
"sql-manual/sql-functions/hash-functions/xxhash-32",
+                                
"sql-manual/sql-functions/hash-functions/xxhash-64"
                             ]
                         },
                         {
diff --git 
a/docs/zh-CN/docs/sql-manual/sql-functions/hash-functions/murmur-hash3-32.md 
b/docs/zh-CN/docs/sql-manual/sql-functions/hash-functions/murmur-hash3-32.md
index 93100700c7d..57c840293d7 100644
--- a/docs/zh-CN/docs/sql-manual/sql-functions/hash-functions/murmur-hash3-32.md
+++ b/docs/zh-CN/docs/sql-manual/sql-functions/hash-functions/murmur-hash3-32.md
@@ -29,7 +29,9 @@ under the License.
 
 `INT MURMUR_HASH3_32(VARCHAR input, ...)`
 
-返回输入字符串的32位murmur3 hash值
+返回输入字符串的32位murmur3 hash值。
+
+注:在计算hash值时,更推荐使用`xxhash_32`,而不是`murmur_hash3_32`。
 
 ### example
 
diff --git 
a/docs/zh-CN/docs/sql-manual/sql-functions/hash-functions/murmur-hash3-64.md 
b/docs/zh-CN/docs/sql-manual/sql-functions/hash-functions/murmur-hash3-64.md
index 2a7f04d8f6c..e113d675898 100644
--- a/docs/zh-CN/docs/sql-manual/sql-functions/hash-functions/murmur-hash3-64.md
+++ b/docs/zh-CN/docs/sql-manual/sql-functions/hash-functions/murmur-hash3-64.md
@@ -29,7 +29,9 @@ under the License.
 
 `BIGINT MURMUR_HASH3_64(VARCHAR input, ...)`
 
-返回输入字符串的64位murmur3 hash值
+返回输入字符串的64位murmur3 hash值。
+
+注:在计算hash值时,更推荐使用`xxhash_64`,而不是`murmur_hash3_64`。
 
 ### example
 
diff --git 
a/docs/zh-CN/docs/sql-manual/sql-functions/hash-functions/murmur-hash3-32.md 
b/docs/zh-CN/docs/sql-manual/sql-functions/hash-functions/xxhash-32.md
similarity index 53%
copy from 
docs/zh-CN/docs/sql-manual/sql-functions/hash-functions/murmur-hash3-32.md
copy to docs/zh-CN/docs/sql-manual/sql-functions/hash-functions/xxhash-32.md
index 93100700c7d..9c839f90d8c 100644
--- a/docs/zh-CN/docs/sql-manual/sql-functions/hash-functions/murmur-hash3-32.md
+++ b/docs/zh-CN/docs/sql-manual/sql-functions/hash-functions/xxhash-32.md
@@ -1,6 +1,6 @@
 ---
 {
-    "title": "MURMUR_HASH3_32",
+    "title": "XXHASH_32",
     "language": "zh-CN"
 }
 ---
@@ -22,40 +22,41 @@ specific language governing permissions and limitations
 under the License.
 -->
 
-## murmur_hash3_32
+## xxhash_32
 
 ### description
 #### Syntax
 
-`INT MURMUR_HASH3_32(VARCHAR input, ...)`
+`INT XXHASH_32(VARCHAR input, ...)`
 
-返回输入字符串的32位murmur3 hash值
+返回输入字符串的32位xxhash值。
+
+注:在计算hash值时,更推荐使用`xxhash_32`,而不是`murmur_hash3_32`。
 
 ### example
 
 ```
-mysql> select murmur_hash3_32(null);
-+-----------------------+
-| murmur_hash3_32(NULL) |
-+-----------------------+
-|                  NULL |
-+-----------------------+
-
-mysql> select murmur_hash3_32("hello");
-+--------------------------+
-| murmur_hash3_32('hello') |
-+--------------------------+
-|               1321743225 |
-+--------------------------+
-
-mysql> select murmur_hash3_32("hello", "world");
-+-----------------------------------+
-| murmur_hash3_32('hello', 'world') |
-+-----------------------------------+
-|                         984713481 |
-+-----------------------------------+
+mysql> select xxhash_32(NULL);
++-----------------+
+| xxhash_32(NULL) |
++-----------------+
+|            NULL |
++-----------------+
+
+mysql> select xxhash_32("hello");
++--------------------+
+| xxhash_32('hello') |
++--------------------+
+|          -83855367 |
++--------------------+
+
+mysql> select xxhash_32("hello", "world");
++-----------------------------+
+| xxhash_32('hello', 'world') |
++-----------------------------+
+|                  -920844969 |
++-----------------------------+
 ```
 
 ### keywords
-
-    MURMUR_HASH3_32,HASH
+HASH_32,HASH
diff --git 
a/docs/zh-CN/docs/sql-manual/sql-functions/hash-functions/xxhash-64.md 
b/docs/zh-CN/docs/sql-manual/sql-functions/hash-functions/xxhash-64.md
new file mode 100644
index 00000000000..065e9242334
--- /dev/null
+++ b/docs/zh-CN/docs/sql-manual/sql-functions/hash-functions/xxhash-64.md
@@ -0,0 +1,84 @@
+---
+{
+    "title": "XXHASH_64",
+    "language": "zh-CN"
+}
+---
+
+<!-- 
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+  http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+## xxhash_64
+
+### description
+#### Syntax
+
+`BIGINT XXHASH_64(VARCHAR input, ...)`
+
+返回输入字符串的64位xxhash值。
+
+注:在计算hash值时,更推荐使用`xxhash_64`,而不是`murmur_hash3_64`。
+
+### example
+
+```
+mysql> select xxhash_64(NULL);
++-----------------+
+| xxhash_64(NULL) |
++-----------------+
+|            NULL |
++-----------------+
+
+mysql> select xxhash_64("hello");
++----------------------+
+| xxhash_64('hello')   |
++----------------------+
+| -7685981735718036227 |
++----------------------+
+
+mysql> select xxhash_64("hello", "world");
++-----------------------------+
+| xxhash_64('hello', 'world') |
++-----------------------------+
+|         7001965798170371843 |
++-----------------------------+
+```
+### benchmark
+
+通过TPCH 
Benchmark测试发现,`xxhash_64`相比`murmur_hash3_64`来说性能大幅提升,因此在需要计算hash值的场景下,更推荐使用`xxhash_64`。
+
+```
+mysql> select count(murmur_hash3_64(l_comment)) from lineitem;
++-----------------------------------+
+| count(murmur_hash3_64(l_comment)) |
++-----------------------------------+
+|                         600037902 |
++-----------------------------------+
+1 row in set (17.18 sec)
+
+mysql> select count(xxhash_64(l_comment)) from lineitem;
++-----------------------------+
+| count(xxhash_64(l_comment)) |
++-----------------------------+
+|                   600037902 |
++-----------------------------+
+1 row in set (8.41 sec)
+```
+
+### keywords
+
+XXHASH_64,HASH
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinScalarFunctions.java 
b/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinScalarFunctions.java
index 1ace763675f..f5928650efb 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinScalarFunctions.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinScalarFunctions.java
@@ -426,6 +426,8 @@ import 
org.apache.doris.nereids.trees.expressions.functions.scalar.WeeksAdd;
 import org.apache.doris.nereids.trees.expressions.functions.scalar.WeeksDiff;
 import org.apache.doris.nereids.trees.expressions.functions.scalar.WeeksSub;
 import org.apache.doris.nereids.trees.expressions.functions.scalar.WidthBucket;
+import org.apache.doris.nereids.trees.expressions.functions.scalar.XxHash32;
+import org.apache.doris.nereids.trees.expressions.functions.scalar.XxHash64;
 import org.apache.doris.nereids.trees.expressions.functions.scalar.Year;
 import org.apache.doris.nereids.trees.expressions.functions.scalar.YearCeil;
 import org.apache.doris.nereids.trees.expressions.functions.scalar.YearFloor;
@@ -876,6 +878,8 @@ public class BuiltinScalarFunctions implements 
FunctionHelper {
             scalar(WeeksDiff.class, "weeks_diff"),
             scalar(WeeksSub.class, "weeks_sub"),
             scalar(WidthBucket.class, "width_bucket"),
+            scalar(XxHash32.class, "xxhash_32"),
+            scalar(XxHash64.class, "xxhash_64"),
             scalar(Year.class, "year"),
             scalar(YearCeil.class, "year_ceil"),
             scalar(YearFloor.class, "year_floor"),
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/XxHash32.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/XxHash32.java
new file mode 100644
index 00000000000..149c2cbc766
--- /dev/null
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/XxHash32.java
@@ -0,0 +1,72 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.nereids.trees.expressions.functions.scalar;
+
+import org.apache.doris.catalog.FunctionSignature;
+import org.apache.doris.nereids.trees.expressions.Expression;
+import 
org.apache.doris.nereids.trees.expressions.functions.ExplicitlyCastableSignature;
+import org.apache.doris.nereids.trees.expressions.functions.PropagateNullable;
+import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor;
+import org.apache.doris.nereids.types.IntegerType;
+import org.apache.doris.nereids.types.StringType;
+import org.apache.doris.nereids.types.VarcharType;
+import org.apache.doris.nereids.util.ExpressionUtils;
+
+import com.google.common.base.Preconditions;
+import com.google.common.collect.ImmutableList;
+
+import java.util.List;
+
+/**
+ * ScalarFunction 'xxhash_32'.
+ */
+public class XxHash32 extends ScalarFunction
+        implements ExplicitlyCastableSignature, PropagateNullable {
+
+    public static final List<FunctionSignature> SIGNATURES = ImmutableList.of(
+            
FunctionSignature.ret(IntegerType.INSTANCE).varArgs(VarcharType.SYSTEM_DEFAULT),
+            
FunctionSignature.ret(IntegerType.INSTANCE).varArgs(StringType.INSTANCE)
+    );
+
+    /**
+     * constructor with 1 or more arguments.
+     */
+    public XxHash32(Expression arg, Expression... varArgs) {
+        super("xxhash_32", ExpressionUtils.mergeArguments(arg, varArgs));
+    }
+
+    /**
+     * withChildren.
+     */
+    @Override
+    public XxHash32 withChildren(List<Expression> children) {
+        Preconditions.checkArgument(children.size() >= 1);
+        return new XxHash32(children.get(0),
+                children.subList(1, children.size()).toArray(new 
Expression[0]));
+    }
+
+    @Override
+    public List<FunctionSignature> getSignatures() {
+        return SIGNATURES;
+    }
+
+    @Override
+    public <R, C> R accept(ExpressionVisitor<R, C> visitor, C context) {
+        return visitor.visitXxHash32(this, context);
+    }
+}
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/XxHash64.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/XxHash64.java
new file mode 100644
index 00000000000..bc23d8c2a5b
--- /dev/null
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/XxHash64.java
@@ -0,0 +1,72 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.nereids.trees.expressions.functions.scalar;
+
+import org.apache.doris.catalog.FunctionSignature;
+import org.apache.doris.nereids.trees.expressions.Expression;
+import 
org.apache.doris.nereids.trees.expressions.functions.ExplicitlyCastableSignature;
+import org.apache.doris.nereids.trees.expressions.functions.PropagateNullable;
+import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor;
+import org.apache.doris.nereids.types.BigIntType;
+import org.apache.doris.nereids.types.StringType;
+import org.apache.doris.nereids.types.VarcharType;
+import org.apache.doris.nereids.util.ExpressionUtils;
+
+import com.google.common.base.Preconditions;
+import com.google.common.collect.ImmutableList;
+
+import java.util.List;
+
+/**
+ * ScalarFunction 'xxhash_64'.
+ */
+public class XxHash64 extends ScalarFunction
+        implements ExplicitlyCastableSignature, PropagateNullable {
+
+    public static final List<FunctionSignature> SIGNATURES = ImmutableList.of(
+            
FunctionSignature.ret(BigIntType.INSTANCE).varArgs(VarcharType.SYSTEM_DEFAULT),
+            
FunctionSignature.ret(BigIntType.INSTANCE).varArgs(StringType.INSTANCE)
+    );
+
+    /**
+     * constructor with 1 or more arguments.
+     */
+    public XxHash64(Expression arg, Expression... varArgs) {
+        super("xxhash_64", ExpressionUtils.mergeArguments(arg, varArgs));
+    }
+
+    /**
+     * withChildren.
+     */
+    @Override
+    public XxHash64 withChildren(List<Expression> children) {
+        Preconditions.checkArgument(children.size() >= 1);
+        return new XxHash64(children.get(0),
+                children.subList(1, children.size()).toArray(new 
Expression[0]));
+    }
+
+    @Override
+    public List<FunctionSignature> getSignatures() {
+        return SIGNATURES;
+    }
+
+    @Override
+    public <R, C> R accept(ExpressionVisitor<R, C> visitor, C context) {
+        return visitor.visitXxHash64(this, context);
+    }
+}
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/ScalarFunctionVisitor.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/ScalarFunctionVisitor.java
index 9a1ed840482..183b4a73dac 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/ScalarFunctionVisitor.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/ScalarFunctionVisitor.java
@@ -416,6 +416,8 @@ import 
org.apache.doris.nereids.trees.expressions.functions.scalar.WeeksAdd;
 import org.apache.doris.nereids.trees.expressions.functions.scalar.WeeksDiff;
 import org.apache.doris.nereids.trees.expressions.functions.scalar.WeeksSub;
 import org.apache.doris.nereids.trees.expressions.functions.scalar.WidthBucket;
+import org.apache.doris.nereids.trees.expressions.functions.scalar.XxHash32;
+import org.apache.doris.nereids.trees.expressions.functions.scalar.XxHash64;
 import org.apache.doris.nereids.trees.expressions.functions.scalar.Year;
 import org.apache.doris.nereids.trees.expressions.functions.scalar.YearCeil;
 import org.apache.doris.nereids.trees.expressions.functions.scalar.YearFloor;
@@ -1515,6 +1517,14 @@ public interface ScalarFunctionVisitor<R, C> {
         return visitScalarFunction(murmurHash364, context);
     }
 
+    default R visitXxHash32(XxHash32 xxHash32, C context) {
+        return visitScalarFunction(xxHash32, context);
+    }
+
+    default R visitXxHash64(XxHash64 xxHash64, C context) {
+        return visitScalarFunction(xxHash64, context);
+    }
+
     default R visitNegative(Negative negative, C context) {
         return visitScalarFunction(negative, context);
     }
diff --git a/gensrc/script/doris_builtins_functions.py 
b/gensrc/script/doris_builtins_functions.py
index 1277f72db60..0ecb05612ac 100644
--- a/gensrc/script/doris_builtins_functions.py
+++ b/gensrc/script/doris_builtins_functions.py
@@ -1885,7 +1885,11 @@ visible_functions = {
         [['murmur_hash3_32'], 'INT', ['VARCHAR', '...'], ''],
         [['murmur_hash3_32'], 'INT', ['STRING', '...'], ''],
         [['murmur_hash3_64'], 'BIGINT', ['VARCHAR', '...'], ''],
-        [['murmur_hash3_64'], 'BIGINT', ['STRING', '...'], '']
+        [['murmur_hash3_64'], 'BIGINT', ['STRING', '...'], ''],
+        [['xxhash_32'], 'INT', ['VARCHAR', '...'], ''],
+        [['xxhash_32'], 'INT', ['STRING', '...'], ''],
+        [['xxhash_64'], 'BIGINT', ['VARCHAR', '...'], ''],
+        [['xxhash_64'], 'BIGINT', ['STRING', '...'], '']
     ],
 
     # aes and base64 function
diff --git 
a/regression-test/data/nereids_p0/sql_functions/hash_functions/test_hash_function.out
 
b/regression-test/data/nereids_p0/sql_functions/hash_functions/test_hash_function.out
index 221936613d3..984075ddeff 100644
--- 
a/regression-test/data/nereids_p0/sql_functions/hash_functions/test_hash_function.out
+++ 
b/regression-test/data/nereids_p0/sql_functions/hash_functions/test_hash_function.out
@@ -17,3 +17,20 @@
 -- !sql --
 3583109472027628045
 
+-- !sql --
+\N
+
+-- !sql --
+-83855367
+
+-- !sql --
+-920844969
+
+-- !sql --
+\N
+
+-- !sql --
+-7685981735718036227
+
+-- !sql --
+7001965798170371843
diff --git 
a/regression-test/data/query_p0/sql_functions/hash_functions/test_hash_function.out
 
b/regression-test/data/query_p0/sql_functions/hash_functions/test_hash_function.out
index 221936613d3..984075ddeff 100644
--- 
a/regression-test/data/query_p0/sql_functions/hash_functions/test_hash_function.out
+++ 
b/regression-test/data/query_p0/sql_functions/hash_functions/test_hash_function.out
@@ -17,3 +17,20 @@
 -- !sql --
 3583109472027628045
 
+-- !sql --
+\N
+
+-- !sql --
+-83855367
+
+-- !sql --
+-920844969
+
+-- !sql --
+\N
+
+-- !sql --
+-7685981735718036227
+
+-- !sql --
+7001965798170371843
diff --git 
a/regression-test/suites/nereids_p0/sql_functions/hash_functions/test_hash_function.groovy
 
b/regression-test/suites/nereids_p0/sql_functions/hash_functions/test_hash_function.groovy
index ae805f904c3..8cae71a2793 100644
--- 
a/regression-test/suites/nereids_p0/sql_functions/hash_functions/test_hash_function.groovy
+++ 
b/regression-test/suites/nereids_p0/sql_functions/hash_functions/test_hash_function.groovy
@@ -26,4 +26,12 @@ suite("test_hash_function") {
     qt_sql "SELECT murmur_hash3_64(null);"
     qt_sql "SELECT murmur_hash3_64(\"hello\");"
     qt_sql "SELECT murmur_hash3_64(\"hello\", \"world\");"
+
+    qt_sql "SELECT xxhash_32(null);"
+    qt_sql "SELECT xxhash_32(\"hello\");"
+    qt_sql "SELECT xxhash_32(\"hello\", \"world\");"
+
+    qt_sql "SELECT xxhash_64(null);"
+    qt_sql "SELECT xxhash_64(\"hello\");"
+    qt_sql "SELECT xxhash_64(\"hello\", \"world\");"
 }
diff --git 
a/regression-test/suites/query_p0/sql_functions/hash_functions/test_hash_function.groovy
 
b/regression-test/suites/query_p0/sql_functions/hash_functions/test_hash_function.groovy
index d44518509da..d547e9fb287 100644
--- 
a/regression-test/suites/query_p0/sql_functions/hash_functions/test_hash_function.groovy
+++ 
b/regression-test/suites/query_p0/sql_functions/hash_functions/test_hash_function.groovy
@@ -25,4 +25,12 @@ suite("test_hash_function", "arrow_flight_sql") {
     qt_sql "SELECT murmur_hash3_64(null);"
     qt_sql "SELECT murmur_hash3_64(\"hello\");"
     qt_sql "SELECT murmur_hash3_64(\"hello\", \"world\");"
+
+    qt_sql "SELECT xxhash_32(null);"
+    qt_sql "SELECT xxhash_32(\"hello\");"
+    qt_sql "SELECT xxhash_32(\"hello\", \"world\");"
+
+    qt_sql "SELECT xxhash_64(null);"
+    qt_sql "SELECT xxhash_64(\"hello\");"
+    qt_sql "SELECT xxhash_64(\"hello\", \"world\");"
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org


Reply via email to