This is an automated email from the ASF dual-hosted git repository.

dheres pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/main by this push:
     new 860b2db748 parquet: reduce clone in delta byte array decoder (#9282)
860b2db748 is described below

commit 860b2db748f11fc93960793ede9315f2962d4dfc
Author: Lanqing Yang <[email protected]>
AuthorDate: Sat Jan 31 04:41:38 2026 -0800

    parquet: reduce clone in delta byte array decoder (#9282)
    
    # Which issue does this PR close?
    
    small optimization
    
    # Rationale for this change
    key insight is the byte clone is cheap just a ref count compare to vec
    clone is a alloc + memcopy.
    
    before
    ```
    let mut result = Vec::new();          // alloc #1
    result.extend_from_slice(prefix);
    result.extend_from_slice(suffix);
    
    let data = Bytes::from(result.clone()); // alloc #2 + memcpy
    item.set_from_bytes(data);
    self.previous_value = result;          // keep Vec
    ```
    
    after
    ```
    let mut result = Vec::with_capacity(prefix_len + suffix.len()); // alloc #1
    result.extend_from_slice(&self.previous_value[..prefix_len]);
    result.extend_from_slice(suffix);
    
    let data = Bytes::from(result);       // no alloc, takes Vec buffer
    item.set_from_bytes(data.clone());    // cheap refcount bump
    self.previous_value = data;           // move, no alloc
    ```
    
    # What changes are included in this PR?
    previous_value type changed to Bytes
    preallocate result vec capacity.
    
    # Are these changes tested?
    
    the existing test should pass
    
    # Are there any user-facing changes?
    
    no
---
 parquet/src/encodings/decoding.rs | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/parquet/src/encodings/decoding.rs 
b/parquet/src/encodings/decoding.rs
index 1f81c67dab..58430820a9 100644
--- a/parquet/src/encodings/decoding.rs
+++ b/parquet/src/encodings/decoding.rs
@@ -1008,7 +1008,8 @@ pub struct DeltaByteArrayDecoder<T: DataType> {
     suffix_decoder: Option<DeltaLengthByteArrayDecoder<ByteArrayType>>,
 
     // The last byte array, used to derive the current prefix
-    previous_value: Vec<u8>,
+    // Stored as Bytes to avoid clone allocation when creating output
+    previous_value: Bytes,
 
     // Number of values left
     num_values: usize,
@@ -1030,7 +1031,7 @@ impl<T: DataType> DeltaByteArrayDecoder<T> {
             prefix_lengths: vec![],
             current_idx: 0,
             suffix_decoder: None,
-            previous_value: vec![],
+            previous_value: Bytes::new(),
             num_values: 0,
             _phantom: PhantomData,
         }
@@ -1053,7 +1054,7 @@ impl<T: DataType> Decoder<T> for DeltaByteArrayDecoder<T> 
{
                 self.suffix_decoder = Some(suffix_decoder);
                 self.num_values = num_prefixes;
                 self.current_idx = 0;
-                self.previous_value.clear();
+                self.previous_value = Bytes::new();
                 Ok(())
             }
             _ => Err(general_err!(
@@ -1081,14 +1082,14 @@ impl<T: DataType> Decoder<T> for 
DeltaByteArrayDecoder<T> {
                     let prefix_len = self.prefix_lengths[self.current_idx] as 
usize;
 
                     // Concatenate prefix with suffix
-                    let mut result = Vec::new();
+                    let mut result = Vec::with_capacity(prefix_len + 
suffix.len());
                     
result.extend_from_slice(&self.previous_value[0..prefix_len]);
                     result.extend_from_slice(suffix);
 
-                    let data = Bytes::from(result.clone());
-                    item.set_from_bytes(data);
+                    let data = Bytes::from(result);
+                    item.set_from_bytes(data.clone());
 
-                    self.previous_value = result;
+                    self.previous_value = data;
                     self.current_idx += 1;
                 }
 

Reply via email to