This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/main by this push:
     new 57f96f24a1 Added number to string benches for json_writer (#7864)
57f96f24a1 is described below

commit 57f96f24a1f69504588ecb2c7c42fb91c0592262
Author: Mark Nash <[email protected]>
AuthorDate: Sat Jul 5 04:33:55 2025 -0700

    Added number to string benches for json_writer (#7864)
    
    # Which issue does this PR close?
    
    - Closes: None
    
    # Rationale for this change
    
    It is suggested to merge benches before merging a speed optimization
    (see #7819)
    
    # What changes are included in this PR?
    
    adding the following benches to convert the following type arrays to a
    string
    
    - i64
    - i32
    - f64
    - f32
    - i64, i32, f64, f32
    
    # Are these changes tested?
    
    I am not sure we are testing benches
    
    # Are there any user-facing changes?
    
    No
---
 arrow/benches/json_writer.rs | 140 ++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 139 insertions(+), 1 deletion(-)

diff --git a/arrow/benches/json_writer.rs b/arrow/benches/json_writer.rs
index ff76ecdd62..c636c076ec 100644
--- a/arrow/benches/json_writer.rs
+++ b/arrow/benches/json_writer.rs
@@ -25,8 +25,9 @@ use arrow::util::bench_util::{
 use arrow::util::test_util::seedable_rng;
 use arrow_array::{Array, ListArray, RecordBatch, StructArray};
 use arrow_buffer::{BooleanBuffer, NullBuffer, OffsetBuffer};
-use arrow_json::LineDelimitedWriter;
+use arrow_json::{LineDelimitedWriter, ReaderBuilder};
 use rand::Rng;
+use serde::Serialize;
 use std::sync::Arc;
 
 const NUM_ROWS: usize = 65536;
@@ -181,6 +182,138 @@ fn bench_struct_list(c: &mut Criterion) {
     do_bench(c, "bench_struct_list", &batch)
 }
 
+fn do_number_to_string_bench<S: Serialize>(
+    name: &str,
+    c: &mut Criterion,
+    schema: Arc<Schema>,
+    rows: Vec<S>,
+) {
+    c.bench_function(name, |b| {
+        b.iter(|| {
+            let mut decoder = ReaderBuilder::new(schema.clone())
+                .with_coerce_primitive(true) // important for coercion
+                .build_decoder()
+                .expect("Failed to build decoder");
+
+            decoder.serialize(&rows).expect("Failed to serialize rows");
+
+            decoder
+                .flush()
+                .expect("Failed to flush")
+                .expect("No RecordBatch produced");
+        })
+    });
+}
+
+fn bench_i64_to_string(c: &mut Criterion) {
+    #[derive(Serialize)]
+    struct TestRow {
+        val: i64,
+    }
+
+    let schema = Arc::new(Schema::new(vec![Field::new("val", DataType::Utf8, 
false)]));
+
+    let a_bunch_of_numbers = create_primitive_array::<Int64Type>(NUM_ROWS, 
0.0);
+
+    let rows: Vec<TestRow> = (0..NUM_ROWS)
+        .map(|i| TestRow {
+            val: a_bunch_of_numbers.value(i),
+        })
+        .collect();
+
+    do_number_to_string_bench("i64_to_string", c, schema, rows)
+}
+
+fn bench_i32_to_string(c: &mut Criterion) {
+    #[derive(Serialize)]
+    struct TestRow {
+        val: i32,
+    }
+
+    let schema = Arc::new(Schema::new(vec![Field::new("val", DataType::Utf8, 
false)]));
+
+    let a_bunch_of_numbers = create_primitive_array::<Int32Type>(NUM_ROWS, 
0.0);
+
+    let rows: Vec<TestRow> = (0..NUM_ROWS)
+        .map(|i| TestRow {
+            val: a_bunch_of_numbers.value(i),
+        })
+        .collect();
+
+    do_number_to_string_bench("i32_to_string", c, schema, rows)
+}
+
+fn bench_f32_to_string(c: &mut Criterion) {
+    #[derive(Serialize)]
+    struct TestRow {
+        val: f32,
+    }
+
+    let schema = Arc::new(Schema::new(vec![Field::new("val", DataType::Utf8, 
false)]));
+
+    let a_bunch_of_numbers = create_primitive_array::<Float32Type>(NUM_ROWS, 
0.0);
+
+    let rows: Vec<TestRow> = (0..NUM_ROWS)
+        .map(|i| TestRow {
+            val: a_bunch_of_numbers.value(i),
+        })
+        .collect();
+
+    do_number_to_string_bench("f32_to_string", c, schema, rows)
+}
+
+fn bench_f64_to_string(c: &mut Criterion) {
+    #[derive(Serialize)]
+    struct TestRow {
+        val: f64,
+    }
+
+    let schema = Arc::new(Schema::new(vec![Field::new("val", DataType::Utf8, 
false)]));
+
+    let a_bunch_of_numbers = create_primitive_array::<Float64Type>(NUM_ROWS, 
0.0);
+
+    let rows: Vec<TestRow> = (0..NUM_ROWS)
+        .map(|i| TestRow {
+            val: a_bunch_of_numbers.value(i),
+        })
+        .collect();
+
+    do_number_to_string_bench("f64_to_string", c, schema, rows)
+}
+
+fn bench_mixed_numbers_to_string(c: &mut Criterion) {
+    #[derive(Serialize)]
+    struct TestRow {
+        val1: f64,
+        val2: f32,
+        val3: i64,
+        val4: i32,
+    }
+
+    let schema = Arc::new(Schema::new(vec![
+        Field::new("val1", DataType::Utf8, false),
+        Field::new("val2", DataType::Utf8, false),
+        Field::new("val3", DataType::Utf8, false),
+        Field::new("val4", DataType::Utf8, false),
+    ]));
+
+    let f64_array = create_primitive_array::<Float64Type>(NUM_ROWS, 0.0);
+    let f32_array = create_primitive_array::<Float32Type>(NUM_ROWS, 0.0);
+    let i64_array = create_primitive_array::<Int64Type>(NUM_ROWS, 0.0);
+    let i32_array = create_primitive_array::<Int32Type>(NUM_ROWS, 0.0);
+
+    let rows: Vec<TestRow> = (0..NUM_ROWS)
+        .map(|i| TestRow {
+            val1: f64_array.value(i),
+            val2: f32_array.value(i),
+            val3: i64_array.value(i),
+            val4: i32_array.value(i),
+        })
+        .collect();
+
+    do_number_to_string_bench("mixed_numbers_to_string", c, schema, rows)
+}
+
 fn criterion_benchmark(c: &mut Criterion) {
     bench_integer(c);
     bench_float(c);
@@ -192,6 +325,11 @@ fn criterion_benchmark(c: &mut Criterion) {
     bench_list(c);
     bench_nullable_list(c);
     bench_struct_list(c);
+    bench_f64_to_string(c);
+    bench_f32_to_string(c);
+    bench_i64_to_string(c);
+    bench_i32_to_string(c);
+    bench_mixed_numbers_to_string(c);
 }
 
 criterion_group!(benches, criterion_benchmark);

Reply via email to