emkornfield commented on code in PR #9628:
URL: https://github.com/apache/arrow-rs/pull/9628#discussion_r3037265797


##########
parquet/src/bloom_filter/mod.rs:
##########
@@ -191,7 +197,55 @@ impl std::ops::IndexMut<usize> for Block {
     }
 }
 
-/// A split block Bloom filter.
+impl std::ops::BitOr for Block {
+    type Output = Self;
+
+    #[inline]
+    fn bitor(self, rhs: Self) -> Self {
+        let mut result = [0u32; 8];
+        for (i, item) in result.iter_mut().enumerate() {
+            *item = self.0[i] | rhs.0[i];
+        }
+        Self(result)
+    }
+}
+
+impl std::ops::BitOrAssign for Block {
+    #[inline]
+    fn bitor_assign(&mut self, rhs: Self) {
+        for i in 0..8 {
+            self.0[i] |= rhs.0[i];
+        }
+    }
+}
+
+impl Block {
+    /// Count the total number of set bits across all 8 words.
+    ///
+    /// Computes popcount on each word separately and sums. Keeping the 
popcount
+    /// separate from the OR allows the compiler to batch SIMD popcount 
instructions
+    /// (e.g., `cnt.16b` on ARM NEON) instead of interleaving them with OR 
operations.
+    #[inline]
+    fn count_ones(self) -> u32 {
+        // Written as a fold over the array so the compiler sees 8 independent
+        // popcount operations it can vectorize into cnt.16b + horizontal sum.
+        self.0.iter().map(|w| w.count_ones()).sum()
+    }
+}
+
+/// A split block Bloom filter (SBBF).
+///
+/// An SBBF partitions its bit space into fixed-size 256-bit (32-byte) blocks, 
each fitting in a
+/// single CPU cache line. Each block contains eight 32-bit words, aligned 
with SIMD lanes for

Review Comment:
   nit: do we need the detail about cacheline?  For most modern CPUs I think we 
actually have two per cache line so it is a little bit confusing?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to