This is an automated email from the ASF dual-hosted git repository.

lihaopeng pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new c1440cb9d68 [opt](hll) Use auto vectorization for HLL registers merge 
function (#54539)
c1440cb9d68 is described below

commit c1440cb9d68bcc4f9555b8b85f2eb044552e51ec
Author: Zhiguo Wu <[email protected]>
AuthorDate: Sun Aug 24 00:38:19 2025 +0800

    [opt](hll) Use auto vectorization for HLL registers merge function (#54539)
    
    Use auto vectorization to optimize HLL registers merge.
---
 be/benchmark/benchmark_hll_merge.hpp | 60 ++++++++++++++++++++++++++++++++++++
 be/benchmark/benchmark_main.cpp      |  1 +
 be/src/olap/hll.h                    | 24 +++------------
 bin/start_be.sh                      | 16 ++++++++--
 4 files changed, 80 insertions(+), 21 deletions(-)

diff --git a/be/benchmark/benchmark_hll_merge.hpp 
b/be/benchmark/benchmark_hll_merge.hpp
new file mode 100644
index 00000000000..d923d208fe4
--- /dev/null
+++ b/be/benchmark/benchmark_hll_merge.hpp
@@ -0,0 +1,60 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <benchmark/benchmark.h>
+
+#include "olap/hll.h"
+#include "util/hash_util.hpp"
+
+namespace doris {
+    static uint64_t hash(uint64_t value) {
+        return HashUtil::murmur_hash64A(&value, 8, 0);
+    }
+
+    static std::pair<HyperLogLog, HyperLogLog> prepare_test_data() {
+        HyperLogLog hll1, hll2;
+        for (int i = 0; i < 64 * 1024; ++i) {
+            hll1.update(hash(i));
+            hll2.update(hash(i + 1));
+        }
+        return {std::move(hll1), std::move(hll2)};
+    }
+} // namespace doris
+
+static auto [hll1, hll2] = doris::prepare_test_data();
+
+static void BM_HllMerge(benchmark::State& state) {
+    for (auto _ : state) {
+        doris::HyperLogLog copy1(hll1);
+        doris::HyperLogLog copy2(hll2);
+        copy1.merge(copy2);
+
+        benchmark::DoNotOptimize(copy1);
+    }
+}
+
+BENCHMARK(BM_HllMerge)
+        ->Unit(benchmark::kNanosecond)
+        ->Repetitions(5)
+        ->DisplayAggregatesOnly()
+        ->ComputeStatistics("min",
+                            [](const std::vector<double>& v) -> double {
+                                return *std::min_element(v.begin(), v.end());
+                            })
+        ->ComputeStatistics("max", [](const std::vector<double>& v) -> double {
+            return *std::max_element(v.begin(), v.end());
+        });
diff --git a/be/benchmark/benchmark_main.cpp b/be/benchmark/benchmark_main.cpp
index dbcce3231c4..950c55b5883 100644
--- a/be/benchmark/benchmark_main.cpp
+++ b/be/benchmark/benchmark_main.cpp
@@ -19,6 +19,7 @@
 
 #include "benchmark_bit_pack.hpp"
 #include "benchmark_fastunion.hpp"
+#include "benchmark_hll_merge.hpp"
 #include "binary_cast_benchmark.hpp"
 #include "vec/columns/column_string.h"
 #include "vec/core/block.h"
diff --git a/be/src/olap/hll.h b/be/src/olap/hll.h
index 776289c7588..2b004591c70 100644
--- a/be/src/olap/hll.h
+++ b/be/src/olap/hll.h
@@ -23,10 +23,6 @@
 #include <string>
 #include <utility>
 
-#ifdef __x86_64__
-#include <immintrin.h>
-#endif
-
 #include "vec/common/hash_table/phmap_fwd_decl.h"
 
 namespace doris {
@@ -276,23 +272,13 @@ private:
 
     // absorb other registers into this registers
     void _merge_registers(const uint8_t* other_registers) {
-#ifdef __AVX2__
-        int loop = HLL_REGISTERS_COUNT / 32; // 32 = 256/8
-        uint8_t* dst = _registers;
-        const uint8_t* src = other_registers;
-        for (int i = 0; i < loop; i++) {
-            __m256i xa = _mm256_loadu_si256((const __m256i*)dst);
-            __m256i xb = _mm256_loadu_si256((const __m256i*)src);
-            _mm256_storeu_si256((__m256i*)dst, _mm256_max_epu8(xa, xb));
-            src += 32;
-            dst += 32;
-        }
-#else
+        _do_simd_merge(_registers, other_registers);
+    }
+
+    void _do_simd_merge(uint8_t* __restrict registers, const uint8_t* 
__restrict other_registers) {
         for (int i = 0; i < HLL_REGISTERS_COUNT; ++i) {
-            _registers[i] =
-                    (_registers[i] < other_registers[i] ? other_registers[i] : 
_registers[i]);
+            registers[i] = (registers[i] < other_registers[i] ? 
other_registers[i] : registers[i]);
         }
-#endif
     }
 
     HllDataType _type = HLL_DATA_EMPTY;
diff --git a/bin/start_be.sh b/bin/start_be.sh
index f2709cb5a25..ebfb45de300 100755
--- a/bin/start_be.sh
+++ b/bin/start_be.sh
@@ -33,6 +33,7 @@ OPTS="$(getopt \
     -l 'console' \
     -l 'version' \
     -l 'benchmark' \
+    -l 'benchmark_filter:' \
     -- "$@")"
 
 eval set -- "${OPTS}"
@@ -41,6 +42,7 @@ RUN_DAEMON=0
 RUN_CONSOLE=0
 RUN_VERSION=0
 RUN_BENCHMARK=0
+BENCHMARK_FILTER=""
 
 while true; do
     case "$1" in
@@ -60,6 +62,10 @@ while true; do
         RUN_BENCHMARK=1
         shift
         ;;
+    --benchmark_filter)
+        BENCHMARK_FILTER="$2"
+        shift 2
+        ;;
     --)
         shift
         break
@@ -447,10 +453,16 @@ else
 fi
 
 if [[ "${RUN_BENCHMARK}" -eq 1 ]]; then
+    BENCHMARK_ARGS=()
+
+    if [[ -n ${BENCHMARK_FILTER} ]]; then
+        BENCHMARK_ARGS+=("--benchmark_filter=${BENCHMARK_FILTER}")
+    fi
+
     if [[ "$(uname -s)" == 'Darwin' ]]; then
-        env DYLD_LIBRARY_PATH="${DYLD_LIBRARY_PATH}" ${LIMIT:+${LIMIT}} 
"${DORIS_HOME}/lib/benchmark_test"
+        env DYLD_LIBRARY_PATH="${DYLD_LIBRARY_PATH}" ${LIMIT:+${LIMIT}} 
"${DORIS_HOME}/lib/benchmark_test" "${BENCHMARK_ARGS[@]}"
     else
-        ${LIMIT:+${LIMIT}} "${DORIS_HOME}/lib/benchmark_test"
+        ${LIMIT:+${LIMIT}} "${DORIS_HOME}/lib/benchmark_test" 
"${BENCHMARK_ARGS[@]}"
     fi
 elif [[ "${RUN_DAEMON}" -eq 1 ]]; then
     if [[ "$(uname -s)" == 'Darwin' ]]; then


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to