This is an automated email from the ASF dual-hosted git repository.
lihaopeng pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new c1440cb9d68 [opt](hll) Use auto vectorization for HLL registers merge
function (#54539)
c1440cb9d68 is described below
commit c1440cb9d68bcc4f9555b8b85f2eb044552e51ec
Author: Zhiguo Wu <[email protected]>
AuthorDate: Sun Aug 24 00:38:19 2025 +0800
[opt](hll) Use auto vectorization for HLL registers merge function (#54539)
Use auto vectorization to optimize HLL registers merge.
---
be/benchmark/benchmark_hll_merge.hpp | 60 ++++++++++++++++++++++++++++++++++++
be/benchmark/benchmark_main.cpp | 1 +
be/src/olap/hll.h | 24 +++------------
bin/start_be.sh | 16 ++++++++--
4 files changed, 80 insertions(+), 21 deletions(-)
diff --git a/be/benchmark/benchmark_hll_merge.hpp
b/be/benchmark/benchmark_hll_merge.hpp
new file mode 100644
index 00000000000..d923d208fe4
--- /dev/null
+++ b/be/benchmark/benchmark_hll_merge.hpp
@@ -0,0 +1,60 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <benchmark/benchmark.h>
+
+#include "olap/hll.h"
+#include "util/hash_util.hpp"
+
+namespace doris {
+ static uint64_t hash(uint64_t value) {
+ return HashUtil::murmur_hash64A(&value, 8, 0);
+ }
+
+ static std::pair<HyperLogLog, HyperLogLog> prepare_test_data() {
+ HyperLogLog hll1, hll2;
+ for (int i = 0; i < 64 * 1024; ++i) {
+ hll1.update(hash(i));
+ hll2.update(hash(i + 1));
+ }
+ return {std::move(hll1), std::move(hll2)};
+ }
+} // namespace doris
+
+static auto [hll1, hll2] = doris::prepare_test_data();
+
+static void BM_HllMerge(benchmark::State& state) {
+ for (auto _ : state) {
+ doris::HyperLogLog copy1(hll1);
+ doris::HyperLogLog copy2(hll2);
+ copy1.merge(copy2);
+
+ benchmark::DoNotOptimize(copy1);
+ }
+}
+
+BENCHMARK(BM_HllMerge)
+ ->Unit(benchmark::kNanosecond)
+ ->Repetitions(5)
+ ->DisplayAggregatesOnly()
+ ->ComputeStatistics("min",
+ [](const std::vector<double>& v) -> double {
+ return *std::min_element(v.begin(), v.end());
+ })
+ ->ComputeStatistics("max", [](const std::vector<double>& v) -> double {
+ return *std::max_element(v.begin(), v.end());
+ });
diff --git a/be/benchmark/benchmark_main.cpp b/be/benchmark/benchmark_main.cpp
index dbcce3231c4..950c55b5883 100644
--- a/be/benchmark/benchmark_main.cpp
+++ b/be/benchmark/benchmark_main.cpp
@@ -19,6 +19,7 @@
#include "benchmark_bit_pack.hpp"
#include "benchmark_fastunion.hpp"
+#include "benchmark_hll_merge.hpp"
#include "binary_cast_benchmark.hpp"
#include "vec/columns/column_string.h"
#include "vec/core/block.h"
diff --git a/be/src/olap/hll.h b/be/src/olap/hll.h
index 776289c7588..2b004591c70 100644
--- a/be/src/olap/hll.h
+++ b/be/src/olap/hll.h
@@ -23,10 +23,6 @@
#include <string>
#include <utility>
-#ifdef __x86_64__
-#include <immintrin.h>
-#endif
-
#include "vec/common/hash_table/phmap_fwd_decl.h"
namespace doris {
@@ -276,23 +272,13 @@ private:
// absorb other registers into this registers
void _merge_registers(const uint8_t* other_registers) {
-#ifdef __AVX2__
- int loop = HLL_REGISTERS_COUNT / 32; // 32 = 256/8
- uint8_t* dst = _registers;
- const uint8_t* src = other_registers;
- for (int i = 0; i < loop; i++) {
- __m256i xa = _mm256_loadu_si256((const __m256i*)dst);
- __m256i xb = _mm256_loadu_si256((const __m256i*)src);
- _mm256_storeu_si256((__m256i*)dst, _mm256_max_epu8(xa, xb));
- src += 32;
- dst += 32;
- }
-#else
+ _do_simd_merge(_registers, other_registers);
+ }
+
+ void _do_simd_merge(uint8_t* __restrict registers, const uint8_t*
__restrict other_registers) {
for (int i = 0; i < HLL_REGISTERS_COUNT; ++i) {
- _registers[i] =
- (_registers[i] < other_registers[i] ? other_registers[i] :
_registers[i]);
+ registers[i] = (registers[i] < other_registers[i] ?
other_registers[i] : registers[i]);
}
-#endif
}
HllDataType _type = HLL_DATA_EMPTY;
diff --git a/bin/start_be.sh b/bin/start_be.sh
index f2709cb5a25..ebfb45de300 100755
--- a/bin/start_be.sh
+++ b/bin/start_be.sh
@@ -33,6 +33,7 @@ OPTS="$(getopt \
-l 'console' \
-l 'version' \
-l 'benchmark' \
+ -l 'benchmark_filter:' \
-- "$@")"
eval set -- "${OPTS}"
@@ -41,6 +42,7 @@ RUN_DAEMON=0
RUN_CONSOLE=0
RUN_VERSION=0
RUN_BENCHMARK=0
+BENCHMARK_FILTER=""
while true; do
case "$1" in
@@ -60,6 +62,10 @@ while true; do
RUN_BENCHMARK=1
shift
;;
+ --benchmark_filter)
+ BENCHMARK_FILTER="$2"
+ shift 2
+ ;;
--)
shift
break
@@ -447,10 +453,16 @@ else
fi
if [[ "${RUN_BENCHMARK}" -eq 1 ]]; then
+ BENCHMARK_ARGS=()
+
+ if [[ -n ${BENCHMARK_FILTER} ]]; then
+ BENCHMARK_ARGS+=("--benchmark_filter=${BENCHMARK_FILTER}")
+ fi
+
if [[ "$(uname -s)" == 'Darwin' ]]; then
- env DYLD_LIBRARY_PATH="${DYLD_LIBRARY_PATH}" ${LIMIT:+${LIMIT}}
"${DORIS_HOME}/lib/benchmark_test"
+ env DYLD_LIBRARY_PATH="${DYLD_LIBRARY_PATH}" ${LIMIT:+${LIMIT}}
"${DORIS_HOME}/lib/benchmark_test" "${BENCHMARK_ARGS[@]}"
else
- ${LIMIT:+${LIMIT}} "${DORIS_HOME}/lib/benchmark_test"
+ ${LIMIT:+${LIMIT}} "${DORIS_HOME}/lib/benchmark_test"
"${BENCHMARK_ARGS[@]}"
fi
elif [[ "${RUN_DAEMON}" -eq 1 ]]; then
if [[ "$(uname -s)" == 'Darwin' ]]; then
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]