This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/main by this push:
     new 814ee4227c Add benchmarks for Utf8View scalars for zip (#8988)
814ee4227c is described below

commit 814ee4227c01fce478bdd3594dd156250286b46e
Author: Michael Kleen <[email protected]>
AuthorDate: Sun Dec 28 13:49:15 2025 +0100

    Add benchmarks for Utf8View scalars for zip (#8988)
    
    # Which issue does this PR close?
    
    N/A
    
    # Rationale for this change
    
    I have a PR to improve zip perf for Utf8View/BinaryView scalars and I
    need benchmarks for that.
    
    - https://github.com/apache/arrow-rs/pull/8963
    
    # What changes are included in this PR?
    
    This extends the zip benchmarks by one new Input Generator for
    StringViews and two more functions to test scalar combinations of
    different StringViews combinations.
    
    # Are these changes tested?
    
    N/A
    
    # Are there any user-facing changes?
    
    No
---
 arrow/benches/zip_kernels.rs | 48 ++++++++++++++++++++++++++++++++++++++++++++
 arrow/src/util/bench_util.rs | 27 +++++++++++++++++++++++++
 2 files changed, 75 insertions(+)

diff --git a/arrow/benches/zip_kernels.rs b/arrow/benches/zip_kernels.rs
index 31cbca6397..65f6bb280f 100644
--- a/arrow/benches/zip_kernels.rs
+++ b/arrow/benches/zip_kernels.rs
@@ -21,6 +21,7 @@ use rand::distr::{Distribution, StandardUniform};
 use rand::prelude::StdRng;
 use rand::{Rng, SeedableRng};
 use std::hint;
+use std::ops::Range;
 use std::sync::Arc;
 
 use arrow::array::*;
@@ -133,6 +134,35 @@ where
     }
 }
 
+struct GenerateStringView {
+    range: Range<usize>,
+    description: String,
+    _marker: std::marker::PhantomData<StringViewType>,
+}
+
+impl InputGenerator for GenerateStringView {
+    fn name(&self) -> &str {
+        self.description.as_str()
+    }
+    fn generate_scalar_with_null_value(&self) -> ArrayRef {
+        new_null_array(&DataType::Utf8View, 1)
+    }
+
+    fn generate_non_null_scalars(&self, seed: u64, number_of_scalars: usize) 
-> Vec<ArrayRef> {
+        let array = self.generate_array(seed, number_of_scalars, 0.0);
+        (0..number_of_scalars).map(|i| array.slice(i, 1)).collect()
+    }
+
+    fn generate_array(&self, seed: u64, array_length: usize, null_percentage: 
f32) -> ArrayRef {
+        Arc::new(create_string_view_array_with_len_range_and_seed(
+            array_length,
+            null_percentage,
+            self.range.clone(),
+            seed,
+        ))
+    }
+}
+
 fn mask_cases(len: usize) -> Vec<(&'static str, BooleanArray)> {
     vec![
         ("all_true", create_boolean_array(len, 0.0, 1.0)),
@@ -273,6 +303,24 @@ fn add_benchmark(c: &mut Criterion) {
             _marker: std::marker::PhantomData,
         },
     );
+
+    bench_zip_on_input_generator(
+        c,
+        &GenerateStringView {
+            description: "string_views size (3..10)".to_string(),
+            range: 3..10,
+            _marker: std::marker::PhantomData,
+        },
+    );
+
+    bench_zip_on_input_generator(
+        c,
+        &GenerateStringView {
+            description: "string_views size (10..100)".to_string(),
+            range: 10..100,
+            _marker: std::marker::PhantomData,
+        },
+    );
 }
 
 criterion_group!(benches, add_benchmark);
diff --git a/arrow/src/util/bench_util.rs b/arrow/src/util/bench_util.rs
index 9f83a50f4f..1f1dcff9b6 100644
--- a/arrow/src/util/bench_util.rs
+++ b/arrow/src/util/bench_util.rs
@@ -208,6 +208,33 @@ pub fn 
create_string_array_with_len_range_and_prefix_and_seed<Offset: OffsetSize
         })
         .collect()
 }
+/// Creates a string view array of a given range, null density and length
+///
+/// Arguments:
+/// - `size`: number of  string view array
+/// - `null_density`: density of nulls in the string view array
+/// - `range`: range size of each string in the string view array
+/// - `seed`: seed for the random number generator
+pub fn create_string_view_array_with_len_range_and_seed(
+    size: usize,
+    null_density: f32,
+    range: Range<usize>,
+    seed: u64,
+) -> StringViewArray {
+    let rng = &mut StdRng::seed_from_u64(seed);
+    (0..size)
+        .map(|_| {
+            if rng.random::<f32>() < null_density {
+                None
+            } else {
+                let str_len = rng.random_range(range.clone());
+                let value = 
rng.sample_iter(&Alphanumeric).take(str_len).collect();
+                let value = String::from_utf8(value).unwrap();
+                Some(value)
+            }
+        })
+        .collect()
+}
 
 fn create_string_view_array_with_len_range_and_prefix(
     size: usize,

Reply via email to