This is an automated email from the ASF dual-hosted git repository.

dheres pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/main by this push:
     new 200fb06189 Avoid overallocating arrays in coalesce primitives / views 
(#9132)
200fb06189 is described below

commit 200fb061896525b81a0a13bff050cc621c1621cf
Author: Daniël Heres <[email protected]>
AuthorDate: Sat Jan 10 18:16:53 2026 +0100

    Avoid overallocating arrays in coalesce primitives / views (#9132)
    
    # Which issue does this PR close?
    
    - Closes #9135
    
    # Rationale for this change
    The code was calling `.reserve(batch_size)` which reserves space to at
    least `batch_size` additional elements
    (https://doc.rust-lang.org/std/vec/struct.Vec.html#method.reserve).
    
    This also improves performance a bit:
    
    ```
    filter: primitive, 8192, nulls: 0, selectivity: 0.001
                            time:   [59.509 ms 59.660 ms 59.856 ms]
                            change: [−3.0781% −2.7917% −2.4795%] (p = 0.00 < 
0.05)
                            Performance has improved.
    Found 1 outliers among 100 measurements (1.00%)
      1 (1.00%) high severe
    
    filter: primitive, 8192, nulls: 0, selectivity: 0.01
                            time:   [6.0072 ms 6.0226 ms 6.0428 ms]
                            change: [−8.7042% −7.1161% −6.0455%] (p = 0.00 < 
0.05)
                            Performance has improved.
    Found 4 outliers among 100 measurements (4.00%)
      2 (2.00%) high mild
      2 (2.00%) high severe
    
    Benchmarking filter: primitive, 8192, nulls: 0, selectivity: 0.1: Warming 
up for 3.0000 s
    Warning: Unable to complete 100 samples in 5.0s. You may wish to increase 
target time to 9.5s, enable flat sampling, or reduce sample count to 50.
    filter: primitive, 8192, nulls: 0, selectivity: 0.1
                            time:   [1.8664 ms 1.8709 ms 1.8772 ms]
                            change: [−15.187% −14.905% −14.632%] (p = 0.00 < 
0.05)
                            Performance has improved.
    Found 11 outliers among 100 measurements (11.00%)
      5 (5.00%) high mild
      6 (6.00%) high severe
    
    filter: primitive, 8192, nulls: 0, selectivity: 0.8
                            time:   [2.5191 ms 2.5444 ms 2.5717 ms]
                            change: [−13.064% −11.414% −10.003%] (p = 0.00 < 
0.05)
                            Performance has improved.
    Found 2 outliers among 100 measurements (2.00%)
      1 (1.00%) low mild
      1 (1.00%) high severe
    
    Benchmarking filter: primitive, 8192, nulls: 0.1, selectivity: 0.001: 
Warming up for 3.0000 s
    Warning: Unable to complete 100 samples in 5.0s. You may wish to increase 
target time to 7.7s, or reduce sample count to 60.
    filter: primitive, 8192, nulls: 0.1, selectivity: 0.001
                            time:   [76.422 ms 76.671 ms 76.973 ms]
                            change: [−5.5096% −4.0229% −2.8048%] (p = 0.00 < 
0.05)
                            Performance has improved.
    Found 2 outliers among 100 measurements (2.00%)
      1 (1.00%) high mild
      1 (1.00%) high severe
    
    filter: primitive, 8192, nulls: 0.1, selectivity: 0.01
                            time:   [10.197 ms 10.228 ms 10.262 ms]
                            change: [−3.6627% −3.0569% −2.4919%] (p = 0.00 < 
0.05)
                            Performance has improved.
    Found 1 outliers among 100 measurements (1.00%)
      1 (1.00%) high mild
    
    filter: primitive, 8192, nulls: 0.1, selectivity: 0.1
                            time:   [4.6635 ms 4.6750 ms 4.6915 ms]
                            change: [−9.4939% −8.5908% −7.8383%] (p = 0.00 < 
0.05)
                            Performance has improved.
    Found 7 outliers among 100 measurements (7.00%)
      5 (5.00%) high mild
      2 (2.00%) high severe
    
    filter: primitive, 8192, nulls: 0.1, selectivity: 0.8
                            time:   [4.7777 ms 4.8115 ms 4.8467 ms]
                            change: [−9.9226% −9.1384% −8.3813%] (p = 0.00 < 
0.05)
                            Performance has improved.
    Found 1 outliers among 100 measurements (1.00%)
      1 (1.00%) high mild
    ```
    
    # What changes are included in this PR?
    Changes it to use `self.views.reserve(self.batch_size -
    self.views.len())` to avoid allocating more than necessary (i.e. 2x the
    amount).
    
    # Are these changes tested?
    
    Existing tests
    
    # Are there any user-facing changes?
---
 arrow-select/src/coalesce.rs           | 6 ++++++
 arrow-select/src/coalesce/byte_view.rs | 5 ++++-
 arrow-select/src/coalesce/primitive.rs | 5 ++++-
 3 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/arrow-select/src/coalesce.rs b/arrow-select/src/coalesce.rs
index ddb1c41c8c..8cf1f024c3 100644
--- a/arrow-select/src/coalesce.rs
+++ b/arrow-select/src/coalesce.rs
@@ -615,6 +615,7 @@ mod tests {
     use crate::concat::concat_batches;
     use arrow_array::builder::StringViewBuilder;
     use arrow_array::cast::AsArray;
+    use arrow_array::types::Int32Type;
     use arrow_array::{
         BinaryViewArray, Int32Array, Int64Array, RecordBatchOptions, 
StringArray, StringViewArray,
         TimestampNanosecondArray, UInt32Array, UInt64Array,
@@ -1625,6 +1626,11 @@ mod tests {
         // Now should have a completed batch (100 rows total)
         assert!(coalescer.has_completed_batch());
         let output_batch = coalescer.next_completed_batch().unwrap();
+        let size = output_batch
+            .column(0)
+            .as_primitive::<Int32Type>()
+            .get_buffer_memory_size();
+        assert_eq!(size, 400); // 100 rows * 4 bytes each
         assert_eq!(output_batch.num_rows(), 100);
 
         assert_eq!(coalescer.get_buffered_rows(), 0);
diff --git a/arrow-select/src/coalesce/byte_view.rs 
b/arrow-select/src/coalesce/byte_view.rs
index 6d3bcc8ae0..bca811fff1 100644
--- a/arrow-select/src/coalesce/byte_view.rs
+++ b/arrow-select/src/coalesce/byte_view.rs
@@ -98,7 +98,10 @@ impl<B: ByteViewType> InProgressByteViewArray<B> {
     /// This is done on write (when we know it is necessary) rather than
     /// eagerly to avoid allocations that are not used.
     fn ensure_capacity(&mut self) {
-        self.views.reserve(self.batch_size);
+        if self.views.capacity() == 0 {
+            self.views.reserve(self.batch_size);
+        }
+        debug_assert_eq!(self.views.capacity(), self.batch_size);
     }
 
     /// Finishes in progress buffer, if any
diff --git a/arrow-select/src/coalesce/primitive.rs 
b/arrow-select/src/coalesce/primitive.rs
index 85b653357b..69dad221bd 100644
--- a/arrow-select/src/coalesce/primitive.rs
+++ b/arrow-select/src/coalesce/primitive.rs
@@ -55,7 +55,10 @@ impl<T: ArrowPrimitiveType> InProgressPrimitiveArray<T> {
     /// This is done on write (when we know it is necessary) rather than
     /// eagerly to avoid allocations that are not used.
     fn ensure_capacity(&mut self) {
-        self.current.reserve(self.batch_size);
+        if self.current.capacity() == 0 {
+            self.current.reserve(self.batch_size);
+        }
+        debug_assert_eq!(self.current.capacity(), self.batch_size);
     }
 }
 

Reply via email to