This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.1 by this push:
     new 8867a826bca [opt](arm) Optimize the BlockBloomFilter::bucket_find on 
ARM platform… (#43508)
8867a826bca is described below

commit 8867a826bca6982e6fc88672409212b704ab75d7
Author: Mryange <59914473+mrya...@users.noreply.github.com>
AuthorDate: Sun Nov 10 18:23:58 2024 +0800

    [opt](arm) Optimize the BlockBloomFilter::bucket_find on ARM platform… 
(#43508)
    
    …s using NEON instructions. (#38888)
    https://github.com/apache/doris/pull/38888
    ## Proposed changes
    
    ```
    --------------------------------------------------------------
    Benchmark                    Time             CPU   Iterations
    --------------------------------------------------------------
    BM_BucketFindNeon         8.14 ns         8.14 ns    344002441
    BM_BucketFindNative       17.5 ns         17.5 ns    160152491
    ```
---
 be/src/exprs/block_bloom_filter.hpp     | 36 ++++++++++++++++++++++++++++++++-
 be/src/exprs/block_bloom_filter_impl.cc | 29 +++++++++++++++++++++++---
 2 files changed, 61 insertions(+), 4 deletions(-)

diff --git a/be/src/exprs/block_bloom_filter.hpp 
b/be/src/exprs/block_bloom_filter.hpp
index f31d7f7d4c0..b7d488a3003 100644
--- a/be/src/exprs/block_bloom_filter.hpp
+++ b/be/src/exprs/block_bloom_filter.hpp
@@ -124,6 +124,39 @@ public:
         return false;
     }
 
+#ifdef __ARM_NEON
+    void make_find_mask(uint32_t key, uint32x4_t* masks) const noexcept {
+        uint32x4_t hash_data_1 = vdupq_n_u32(key);
+        uint32x4_t hash_data_2 = vdupq_n_u32(key);
+
+        uint32x4_t rehash_1 = vld1q_u32(&kRehash[0]);
+        uint32x4_t rehash_2 = vld1q_u32(&kRehash[4]);
+
+        //  masks[i] = key * kRehash[i];
+        hash_data_1 = vmulq_u32(rehash_1, hash_data_1);
+        hash_data_2 = vmulq_u32(rehash_2, hash_data_2);
+        //  masks[i] = masks[i] >> shift_num;
+        hash_data_1 = vshrq_n_u32(hash_data_1, shift_num);
+        hash_data_2 = vshrq_n_u32(hash_data_2, shift_num);
+
+        const uint32x4_t ones = vdupq_n_u32(1);
+
+        // masks[i] = 0x1 << masks[i];
+        masks[0] = vshlq_u32(ones, reinterpret_cast<int32x4_t>(hash_data_1));
+        masks[1] = vshlq_u32(ones, reinterpret_cast<int32x4_t>(hash_data_2));
+    }
+#else
+    void make_find_mask(uint32_t key, uint32_t* masks) const noexcept {
+        for (int i = 0; i < kBucketWords; ++i) {
+            masks[i] = key * kRehash[i];
+
+            masks[i] = masks[i] >> shift_num;
+
+            masks[i] = 0x1 << masks[i];
+        }
+    }
+#endif
+
     // Computes the logical OR of this filter with 'other' and stores the 
result in this
     // filter.
     // Notes:
@@ -163,7 +196,8 @@ private:
     // log2(number of bits in a BucketWord)
     static constexpr int kLogBucketWordBits = 5;
     static constexpr BucketWord kBucketWordMask = (1 << kLogBucketWordBits) - 
1;
-
+    // (>> 27) is equivalent to (mod 32)
+    static constexpr auto shift_num = ((1 << kLogBucketWordBits) - 
kLogBucketWordBits);
     // log2(number of bytes in a bucket)
     static constexpr int kLogBucketByteSize = 5;
     // Bucket size in bytes.
diff --git a/be/src/exprs/block_bloom_filter_impl.cc 
b/be/src/exprs/block_bloom_filter_impl.cc
index d285edcb310..e89b9142266 100644
--- a/be/src/exprs/block_bloom_filter_impl.cc
+++ b/be/src/exprs/block_bloom_filter_impl.cc
@@ -138,14 +138,37 @@ void BlockBloomFilter::bucket_insert(const uint32_t 
bucket_idx, const uint32_t h
 }
 
 bool BlockBloomFilter::bucket_find(const uint32_t bucket_idx, const uint32_t 
hash) const noexcept {
+#if defined(__ARM_NEON)
+    uint32x4_t masks[2];
+
+    uint32x4_t directory_1 = vld1q_u32(&_directory[bucket_idx][0]);
+    uint32x4_t directory_2 = vld1q_u32(&_directory[bucket_idx][4]);
+
+    make_find_mask(hash, masks);
+    // The condition for returning true is that all the bits in 
_directory[bucket_idx][i] specified by masks[i] are 1.
+    // This can be equivalently expressed as all the bits in not( 
_directory[bucket_idx][i]) specified by masks[i] are 0.
+    // vbicq_u32(vec1, vec2) : Result of (vec1 AND NOT vec2)
+    // If true is returned, out_1 and out_2 should be all zeros.
+    uint32x4_t out_1 = vbicq_u32(masks[0], directory_1);
+    uint32x4_t out_2 = vbicq_u32(masks[1], directory_2);
+
+    out_1 = vorrq_u32(out_1, out_2);
+
+    uint32x2_t low = vget_low_u32(out_1);
+    uint32x2_t high = vget_high_u32(out_1);
+    low = vorr_u32(low, high);
+    uint32_t res = vget_lane_u32(low, 0) | vget_lane_u32(low, 1);
+    return !(res);
+#else
+    uint32_t masks[kBucketWords];
+    make_find_mask(hash, masks);
     for (int i = 0; i < kBucketWords; ++i) {
-        BucketWord hval = (kRehash[i] * hash) >> ((1 << kLogBucketWordBits) - 
kLogBucketWordBits);
-        hval = 1U << hval;
-        if (!(DCHECK_NOTNULL(_directory)[bucket_idx][i] & hval)) {
+        if ((DCHECK_NOTNULL(_directory)[bucket_idx][i] & masks[i]) == 0) {
             return false;
         }
     }
     return true;
+#endif
 }
 
 void BlockBloomFilter::insert_no_avx2(const uint32_t hash) noexcept {


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to