This is an automated email from the ASF dual-hosted git repository.
dheres pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/main by this push:
new 96637fc8b9 Speed up binary kernels (30% faster `and` and `or`), add
`BooleanBuffer::from_bitwise_binary_op` (#9090)
96637fc8b9 is described below
commit 96637fc8b928a94de53bbec3501337c0ecfbf936
Author: Andrew Lamb <[email protected]>
AuthorDate: Fri Jan 9 05:51:52 2026 -0500
Speed up binary kernels (30% faster `and` and `or`), add
`BooleanBuffer::from_bitwise_binary_op` (#9090)
# Which issue does this PR close?
- Part of https://github.com/apache/arrow-rs/issues/8806
- Closes https://github.com/apache/arrow-rs/pull/8854
- Closes https://github.com/apache/arrow-rs/pull/8807
This is the next step after
- https://github.com/apache/arrow-rs/pull/8996
# Rationale for this change
- we can help rust / LLVM generate more optimal code by processing u64
words at a time when the buffer is already u64 aligned (see #8807)
Also, it is hard to find the code to create new Buffers by applying
bitwise unary operations.
# What changes are included in this PR?
- Introduce optimized `BooleanBuffer::from_bitwise_binary`
- Migrate several kernels that use `bitwise_bin_op_helper` to use the
new BooleanBuffer
# Are these changes tested?
Yes new tests are added
Performance results show 30% performance improvement for the `and` and
`or` kernels for aligned buffers (common case)
# Are there any user-facing changes?
A new API
---
arrow-arith/src/boolean.rs | 24 +++---
arrow-buffer/src/buffer/boolean.rs | 150 ++++++++++++++++++++++++++++++++++++-
arrow-buffer/src/buffer/ops.rs | 13 ++--
arrow-select/src/nullif.rs | 2 +-
4 files changed, 169 insertions(+), 20 deletions(-)
diff --git a/arrow-arith/src/boolean.rs b/arrow-arith/src/boolean.rs
index d94df49de2..6bf438e646 100644
--- a/arrow-arith/src/boolean.rs
+++ b/arrow-arith/src/boolean.rs
@@ -23,7 +23,7 @@
//! [here](https://doc.rust-lang.org/stable/core/arch/) for more information.
use arrow_array::*;
-use arrow_buffer::buffer::{bitwise_bin_op_helper,
bitwise_quaternary_op_helper};
+use arrow_buffer::buffer::bitwise_quaternary_op_helper;
use arrow_buffer::{BooleanBuffer, NullBuffer, buffer_bin_and_not};
use arrow_schema::ArrowError;
@@ -74,7 +74,7 @@ pub fn and_kleene(left: &BooleanArray, right: &BooleanArray)
-> Result<BooleanAr
// The final null bit is set only if:
// 1. left null bit is set, or
// 2. right data bit is false (because null AND false = false).
- Some(bitwise_bin_op_helper(
+ Some(BooleanBuffer::from_bitwise_binary_op(
left_null_buffer.buffer(),
left_null_buffer.offset(),
right_values.inner(),
@@ -85,7 +85,7 @@ pub fn and_kleene(left: &BooleanArray, right: &BooleanArray)
-> Result<BooleanAr
}
(None, Some(right_null_buffer)) => {
// Same as above
- Some(bitwise_bin_op_helper(
+ Some(BooleanBuffer::from_bitwise_binary_op(
right_null_buffer.buffer(),
right_null_buffer.offset(),
left_values.inner(),
@@ -100,7 +100,7 @@ pub fn and_kleene(left: &BooleanArray, right:
&BooleanArray) -> Result<BooleanAr
// d is right data bits.
// The final null bits are:
// (a | (c & !d)) & (c | (a & !b))
- Some(bitwise_quaternary_op_helper(
+ let buffer = bitwise_quaternary_op_helper(
[
left_null_buffer.buffer(),
left_values.inner(),
@@ -115,10 +115,11 @@ pub fn and_kleene(left: &BooleanArray, right:
&BooleanArray) -> Result<BooleanAr
],
left.len(),
|a, b, c, d| (a | (c & !d)) & (c | (a & !b)),
- ))
+ );
+ Some(BooleanBuffer::new(buffer, 0, left.len()))
}
};
- let nulls = buffer.map(|b| NullBuffer::new(BooleanBuffer::new(b, 0,
left.len())));
+ let nulls = buffer.map(NullBuffer::new);
Ok(BooleanArray::new(left_values & right_values, nulls))
}
@@ -169,7 +170,7 @@ pub fn or_kleene(left: &BooleanArray, right: &BooleanArray)
-> Result<BooleanArr
// The final null bit is set only if:
// 1. left null bit is set, or
// 2. right data bit is true (because null OR true = true).
- Some(bitwise_bin_op_helper(
+ Some(BooleanBuffer::from_bitwise_binary_op(
left_nulls.buffer(),
left_nulls.offset(),
right_values.inner(),
@@ -180,7 +181,7 @@ pub fn or_kleene(left: &BooleanArray, right: &BooleanArray)
-> Result<BooleanArr
}
(None, Some(right_nulls)) => {
// Same as above
- Some(bitwise_bin_op_helper(
+ Some(BooleanBuffer::from_bitwise_binary_op(
right_nulls.buffer(),
right_nulls.offset(),
left_values.inner(),
@@ -195,7 +196,7 @@ pub fn or_kleene(left: &BooleanArray, right: &BooleanArray)
-> Result<BooleanArr
// d is right data bits.
// The final null bits are:
// (a | (c & d)) & (c | (a & b))
- Some(bitwise_quaternary_op_helper(
+ let buffer = bitwise_quaternary_op_helper(
[
left_nulls.buffer(),
left_values.inner(),
@@ -210,11 +211,12 @@ pub fn or_kleene(left: &BooleanArray, right:
&BooleanArray) -> Result<BooleanArr
],
left.len(),
|a, b, c, d| (a | (c & d)) & (c | (a & b)),
- ))
+ );
+ Some(BooleanBuffer::new(buffer, 0, left.len()))
}
};
- let nulls = buffer.map(|b| NullBuffer::new(BooleanBuffer::new(b, 0,
left.len())));
+ let nulls = buffer.map(NullBuffer::new);
Ok(BooleanArray::new(left_values | right_values, nulls))
}
diff --git a/arrow-buffer/src/buffer/boolean.rs
b/arrow-buffer/src/buffer/boolean.rs
index ecd7de38c0..f7f1b025ed 100644
--- a/arrow-buffer/src/buffer/boolean.rs
+++ b/arrow-buffer/src/buffer/boolean.rs
@@ -169,9 +169,10 @@ impl BooleanBuffer {
/// * The output always has zero offset
///
/// # See Also
+ /// - [`BooleanBuffer::from_bitwise_binary_op`] to create a new buffer
from a binary operation
/// - [`apply_bitwise_unary_op`](bit_util::apply_bitwise_unary_op) for
in-place unary bitwise operations
///
- /// # Example: Create new [`BooleanBuffer`] from bitwise `NOT` of an input
[`Buffer`]
+ /// # Example: Create new [`BooleanBuffer`] from bitwise `NOT` of a byte
slice
/// ```
/// # use arrow_buffer::BooleanBuffer;
/// let input = [0b11001100u8, 0b10111010u8]; // 2 bytes = 16 bits
@@ -221,9 +222,8 @@ impl BooleanBuffer {
result.truncate(chunks.num_bytes());
}
- let buffer = Buffer::from(result);
BooleanBuffer {
- buffer,
+ buffer: Buffer::from(result),
bit_offset: 0,
bit_len: len_in_bits,
}
@@ -254,6 +254,112 @@ impl BooleanBuffer {
Some(BooleanBuffer::new(buffer, 0, len_in_bits))
}
+ /// Create a new [`BooleanBuffer`] by applying the bitwise operation `op`
to
+ /// the relevant bits from two input buffers.
+ ///
+ /// This function is faster than applying the operation bit by bit as
+ /// it processes input buffers in chunks of 64 bits (8 bytes) at a time
+ ///
+ /// # Notes:
+ /// See notes on [Self::from_bitwise_unary_op]
+ ///
+ /// # See Also
+ /// - [`BooleanBuffer::from_bitwise_unary_op`] for unary operations on a
single input buffer.
+ /// - [`apply_bitwise_binary_op`](bit_util::apply_bitwise_binary_op) for
in-place binary bitwise operations
+ ///
+ /// # Example: Create new [`BooleanBuffer`] from bitwise `AND` of two
[`Buffer`]s
+ /// ```
+ /// # use arrow_buffer::{Buffer, BooleanBuffer};
+ /// let left = Buffer::from(vec![0b11001100u8, 0b10111010u8]); // 2 bytes
= 16 bits
+ /// let right = Buffer::from(vec![0b10101010u8, 0b11011100u8,
0b11110000u8]); // 3 bytes = 24 bits
+ /// // AND of the first 12 bits
+ /// let result = BooleanBuffer::from_bitwise_binary_op(
+ /// &left, 0, &right, 0, 12, |a, b| a & b
+ /// );
+ /// assert_eq!(result.inner().as_slice(), &[0b10001000u8, 0b00001000u8]);
+ /// ```
+ ///
+ /// # Example: Create new [`BooleanBuffer`] from bitwise `OR` of two byte
slices
+ /// ```
+ /// # use arrow_buffer::BooleanBuffer;
+ /// let left = [0b11001100u8, 0b10111010u8];
+ /// let right = [0b10101010u8, 0b11011100u8];
+ /// // OR of bits 4..16 from left and bits 0..12 from right
+ /// let result = BooleanBuffer::from_bitwise_binary_op(
+ /// &left, 4, &right, 0, 12, |a, b| a | b
+ /// );
+ /// assert_eq!(result.inner().as_slice(), &[0b10101110u8, 0b00001111u8]);
+ /// ```
+ pub fn from_bitwise_binary_op<F>(
+ left: impl AsRef<[u8]>,
+ left_offset_in_bits: usize,
+ right: impl AsRef<[u8]>,
+ right_offset_in_bits: usize,
+ len_in_bits: usize,
+ mut op: F,
+ ) -> Self
+ where
+ F: FnMut(u64, u64) -> u64,
+ {
+ let left = left.as_ref();
+ let right = right.as_ref();
+ // try fast path for aligned input
+ // If the underlying buffers are aligned to u64 we can apply the
operation directly on the u64 slices
+ // to improve performance.
+ if left_offset_in_bits & 0x7 == 0 && right_offset_in_bits & 0x7 == 0 {
+ // align to byte boundary
+ let left = &left[left_offset_in_bits / 8..];
+ let right = &right[right_offset_in_bits / 8..];
+
+ unsafe {
+ let (left_prefix, left_u64s, left_suffix) =
left.align_to::<u64>();
+ let (right_prefix, right_u64s, right_suffix) =
right.align_to::<u64>();
+ // if there is no prefix or suffix, both buffers are aligned
and
+ // we can do the operation directly on u64s.
+ // TODO: consider `slice::as_chunks` and `u64::from_le_bytes`
when MSRV reaches 1.88.
+ //
https://github.com/apache/arrow-rs/pull/9022#discussion_r2639949361
+ if left_prefix.is_empty()
+ && right_prefix.is_empty()
+ && left_suffix.is_empty()
+ && right_suffix.is_empty()
+ {
+ let result_u64s = left_u64s
+ .iter()
+ .zip(right_u64s.iter())
+ .map(|(l, r)| op(*l, *r))
+ .collect::<Vec<u64>>();
+ return BooleanBuffer {
+ buffer: Buffer::from(result_u64s),
+ bit_offset: 0,
+ bit_len: len_in_bits,
+ };
+ }
+ }
+ }
+ let left_chunks = BitChunks::new(left, left_offset_in_bits,
len_in_bits);
+ let right_chunks = BitChunks::new(right, right_offset_in_bits,
len_in_bits);
+
+ let chunks = left_chunks
+ .iter()
+ .zip(right_chunks.iter())
+ .map(|(left, right)| op(left, right));
+ // Soundness: `BitChunks` is a `BitChunks` trusted length iterator
which
+ // correctly reports its upper bound
+ let mut buffer = unsafe { MutableBuffer::from_trusted_len_iter(chunks)
};
+
+ let remainder_bytes = bit_util::ceil(left_chunks.remainder_len(), 8);
+ let rem = op(left_chunks.remainder_bits(),
right_chunks.remainder_bits());
+ // we are counting its starting from the least significant bit, to
to_le_bytes should be correct
+ let rem = &rem.to_le_bytes()[0..remainder_bytes];
+ buffer.extend_from_slice(rem);
+
+ BooleanBuffer {
+ buffer: Buffer::from(buffer),
+ bit_offset: 0,
+ bit_len: len_in_bits,
+ }
+ }
+
/// Returns the number of set bits in this buffer
pub fn count_set_bits(&self) -> usize {
self.buffer
@@ -656,4 +762,42 @@ mod tests {
assert_eq!(result, expected);
}
}
+
+ #[test]
+ fn test_from_bitwise_binary_op() {
+ // pick random boolean inputs
+ let input_bools_left = (0..1024)
+ .map(|_| rand::random::<bool>())
+ .collect::<Vec<bool>>();
+ let input_bools_right = (0..1024)
+ .map(|_| rand::random::<bool>())
+ .collect::<Vec<bool>>();
+ let input_buffer_left = BooleanBuffer::from(&input_bools_left[..]);
+ let input_buffer_right = BooleanBuffer::from(&input_bools_right[..]);
+
+ for left_offset in 0..200 {
+ for right_offset in [0, 4, 5, 17, 33, 24, 45, 64, 65, 100, 200] {
+ for len_offset in [0, 1, 44, 100, 256, 300, 512] {
+ let len = 1024 - len_offset -
left_offset.max(right_offset); // ensure we don't go out of bounds
+ // compute with AND
+ let result = BooleanBuffer::from_bitwise_binary_op(
+ input_buffer_left.values(),
+ left_offset,
+ input_buffer_right.values(),
+ right_offset,
+ len,
+ |a, b| a & b,
+ );
+ // compute directly from bools
+ let expected = input_bools_left[left_offset..]
+ .iter()
+ .zip(&input_bools_right[right_offset..])
+ .take(len)
+ .map(|(a, b)| *a & *b)
+ .collect::<BooleanBuffer>();
+ assert_eq!(result, expected);
+ }
+ }
+ }
+ }
}
diff --git a/arrow-buffer/src/buffer/ops.rs b/arrow-buffer/src/buffer/ops.rs
index cb0925bb2c..36efe87643 100644
--- a/arrow-buffer/src/buffer/ops.rs
+++ b/arrow-buffer/src/buffer/ops.rs
@@ -150,7 +150,7 @@ pub fn buffer_bin_and(
right_offset_in_bits: usize,
len_in_bits: usize,
) -> Buffer {
- bitwise_bin_op_helper(
+ BooleanBuffer::from_bitwise_binary_op(
left,
left_offset_in_bits,
right,
@@ -158,6 +158,7 @@ pub fn buffer_bin_and(
len_in_bits,
|a, b| a & b,
)
+ .into_inner()
}
/// Apply a bitwise or to two inputs and return the result as a Buffer.
@@ -169,7 +170,7 @@ pub fn buffer_bin_or(
right_offset_in_bits: usize,
len_in_bits: usize,
) -> Buffer {
- bitwise_bin_op_helper(
+ BooleanBuffer::from_bitwise_binary_op(
left,
left_offset_in_bits,
right,
@@ -177,6 +178,7 @@ pub fn buffer_bin_or(
len_in_bits,
|a, b| a | b,
)
+ .into_inner()
}
/// Apply a bitwise xor to two inputs and return the result as a Buffer.
@@ -188,7 +190,7 @@ pub fn buffer_bin_xor(
right_offset_in_bits: usize,
len_in_bits: usize,
) -> Buffer {
- bitwise_bin_op_helper(
+ BooleanBuffer::from_bitwise_binary_op(
left,
left_offset_in_bits,
right,
@@ -196,6 +198,7 @@ pub fn buffer_bin_xor(
len_in_bits,
|a, b| a ^ b,
)
+ .into_inner()
}
/// Apply a bitwise and_not to two inputs and return the result as a Buffer.
@@ -207,7 +210,7 @@ pub fn buffer_bin_and_not(
right_offset_in_bits: usize,
len_in_bits: usize,
) -> Buffer {
- bitwise_bin_op_helper(
+ BooleanBuffer::from_bitwise_binary_op(
left,
left_offset_in_bits,
right,
@@ -215,11 +218,11 @@ pub fn buffer_bin_and_not(
len_in_bits,
|a, b| a & !b,
)
+ .into_inner()
}
/// Apply a bitwise not to one input and return the result as a Buffer.
/// The input is treated as a bitmap, meaning that offset and length are
specified in number of bits.
pub fn buffer_unary_not(left: &Buffer, offset_in_bits: usize, len_in_bits:
usize) -> Buffer {
- // TODO: should we deprecate this function in favor of the Buffer ! impl ?
BooleanBuffer::from_bitwise_unary_op(left, offset_in_bits, len_in_bits,
|a| !a).into_inner()
}
diff --git a/arrow-select/src/nullif.rs b/arrow-select/src/nullif.rs
index e51016f9ba..fa875c20e3 100644
--- a/arrow-select/src/nullif.rs
+++ b/arrow-select/src/nullif.rs
@@ -23,7 +23,7 @@ use arrow_buffer::{BooleanBuffer, NullBuffer,
bitwise_unary_op_helper};
use arrow_schema::{ArrowError, DataType};
/// Returns a new array with the same values and the validity bit to false
where
-/// the corresponding element of`right` is true.
+/// the corresponding element of `right` is true.
///
/// This can be used to implement SQL `NULLIF`
///