This is an automated email from the ASF dual-hosted git repository.

dheres pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/main by this push:
     new 6d01dd9faf Perf: Optimize comparison kernels for inlined views (#7731)
6d01dd9faf is described below

commit 6d01dd9faf99683ecac5e58f9a1a281ac025efa8
Author: Qi Zhu <[email protected]>
AuthorDate: Mon Jun 23 20:37:05 2025 +0800

    Perf: Optimize comparison kernels for inlined views (#7731)
    
    # Which issue does this PR close?
    
    Add fast path for empty buffer case to skip compute len, and use
    view(u128) to compare directly.
    
    Closes [#7621](https://github.com/apache/arrow-rs/issues/7621)
    
    # Rationale for this change
    
    Add fast path for empty buffer case to skip compute len, and use
    view(u128) to compare directly.
    
    
    # What changes are included in this PR?
    
    Add fast path for empty buffer case to skip compute len, and use
    view(u128) to compare directly.
    
    
    # Are there any user-facing changes?
    No
    
    If there are user-facing changes then we may require documentation to be
    updated before approving the PR.
    
    If there are any breaking changes to public APIs, please call them out.
---
 arrow-ord/src/cmp.rs                | 33 +++++++++++++++++++++++++++++----
 arrow/benches/comparison_kernels.rs | 28 ++++++++++++++++++++++++++++
 2 files changed, 57 insertions(+), 4 deletions(-)

diff --git a/arrow-ord/src/cmp.rs b/arrow-ord/src/cmp.rs
index 2727ff9961..46cab1bb8e 100644
--- a/arrow-ord/src/cmp.rs
+++ b/arrow-ord/src/cmp.rs
@@ -565,13 +565,16 @@ impl<'a, T: ByteViewType> ArrayOrd for &'a 
GenericByteViewArray<T> {
     /// Item.0 is the array, Item.1 is the index
     type Item = (&'a GenericByteViewArray<T>, usize);
 
+    #[inline(always)]
     fn is_eq(l: Self::Item, r: Self::Item) -> bool {
-        // # Safety
-        // The index is within bounds as it is checked in value()
         let l_view = unsafe { l.0.views().get_unchecked(l.1) };
-        let l_len = *l_view as u32;
-
         let r_view = unsafe { r.0.views().get_unchecked(r.1) };
+        if l.0.data_buffers().is_empty() && r.0.data_buffers().is_empty() {
+            // For eq case, we can directly compare the inlined bytes
+            return l_view.cmp(r_view).is_eq();
+        }
+
+        let l_len = *l_view as u32;
         let r_len = *r_view as u32;
         // This is a fast path for equality check.
         // We don't need to look at the actual bytes to determine if they are 
equal.
@@ -579,10 +582,22 @@ impl<'a, T: ByteViewType> ArrayOrd for &'a 
GenericByteViewArray<T> {
             return false;
         }
 
+        // # Safety
+        // The index is within bounds as it is checked in value()
         unsafe { GenericByteViewArray::compare_unchecked(l.0, l.1, r.0, 
r.1).is_eq() }
     }
 
+    #[inline(always)]
     fn is_lt(l: Self::Item, r: Self::Item) -> bool {
+        if l.0.data_buffers().is_empty() && r.0.data_buffers().is_empty() {
+            let l_view = unsafe { l.0.views().get_unchecked(l.1) };
+            let r_view = unsafe { r.0.views().get_unchecked(r.1) };
+            let l_len = *l_view as u32 as usize;
+            let r_len = *r_view as u32 as usize;
+            let l_bytes = unsafe { 
GenericByteViewArray::<T>::inline_value(l_view, l_len) };
+            let r_bytes = unsafe { 
GenericByteViewArray::<T>::inline_value(r_view, r_len) };
+            return l_bytes.cmp(r_bytes).is_lt();
+        }
         // # Safety
         // The index is within bounds as it is checked in value()
         unsafe { GenericByteViewArray::compare_unchecked(l.0, l.1, r.0, 
r.1).is_lt() }
@@ -618,6 +633,7 @@ impl<'a> ArrayOrd for &'a FixedSizeBinaryArray {
 }
 
 /// Compares two [`GenericByteViewArray`] at index `left_idx` and `right_idx`
+#[inline(always)]
 pub fn compare_byte_view<T: ByteViewType>(
     left: &GenericByteViewArray<T>,
     left_idx: usize,
@@ -626,6 +642,15 @@ pub fn compare_byte_view<T: ByteViewType>(
 ) -> std::cmp::Ordering {
     assert!(left_idx < left.len());
     assert!(right_idx < right.len());
+    if left.data_buffers().is_empty() && right.data_buffers().is_empty() {
+        let l_view = unsafe { left.views().get_unchecked(left_idx) };
+        let r_view = unsafe { right.views().get_unchecked(right_idx) };
+        let l_len = *l_view as u32 as usize;
+        let r_len = *r_view as u32 as usize;
+        let l_bytes = unsafe { GenericByteViewArray::<T>::inline_value(l_view, 
l_len) };
+        let r_bytes = unsafe { GenericByteViewArray::<T>::inline_value(r_view, 
r_len) };
+        return l_bytes.cmp(r_bytes);
+    }
     unsafe { GenericByteViewArray::compare_unchecked(left, left_idx, right, 
right_idx) }
 }
 
diff --git a/arrow/benches/comparison_kernels.rs 
b/arrow/benches/comparison_kernels.rs
index c29200d800..6a02deb41a 100644
--- a/arrow/benches/comparison_kernels.rs
+++ b/arrow/benches/comparison_kernels.rs
@@ -69,6 +69,17 @@ fn make_string_array(size: usize, rng: &mut StdRng) -> impl 
Iterator<Item = Opti
     })
 }
 
+fn make_inlined_string_array(
+    size: usize,
+    rng: &mut StdRng,
+) -> impl Iterator<Item = Option<String>> + '_ {
+    (0..size).map(|_| {
+        let len = rng.random_range(0..12);
+        let bytes = (0..len).map(|_| rng.random_range(0..128)).collect();
+        Some(String::from_utf8(bytes).unwrap())
+    })
+}
+
 fn add_benchmark(c: &mut Criterion) {
     let arr_a = create_primitive_array_with_seed::<Float32Type>(SIZE, 0.0, 42);
     let arr_b = create_primitive_array_with_seed::<Float32Type>(SIZE, 0.0, 43);
@@ -226,6 +237,23 @@ fn add_benchmark(c: &mut Criterion) {
         b.iter(|| eq(&string_view_left, &string_view_right).unwrap())
     });
 
+    let array_gen = make_inlined_string_array(1024 * 1024 * 8, &mut rng);
+    let string_left = StringArray::from_iter(array_gen);
+    let string_view_inlined_left = 
StringViewArray::from_iter(string_left.iter());
+
+    let array_gen = make_inlined_string_array(1024 * 1024 * 8, &mut rng);
+    let string_right = StringArray::from_iter(array_gen);
+    let string_view_inlined_right = 
StringViewArray::from_iter(string_right.iter());
+
+    // Add fast path benchmarks for StringViewArray, both side are inlined 
views < 12 bytes
+    c.bench_function("eq StringViewArray StringViewArray inlined bytes", |b| {
+        b.iter(|| eq(&string_view_inlined_left, 
&string_view_inlined_right).unwrap())
+    });
+
+    c.bench_function("lt StringViewArray StringViewArray inlined bytes", |b| {
+        b.iter(|| lt(&string_view_inlined_left, 
&string_view_inlined_right).unwrap())
+    });
+
     // eq benchmarks for long strings with the same prefix
     c.bench_function("eq long same prefix strings StringArray", |b| {
         b.iter(|| eq(&left_arr_long_string, &right_arr_long_string).unwrap())

Reply via email to