This is an automated email from the ASF dual-hosted git repository.

dheres pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/main by this push:
     new d610468d22 Avoid a clone when creating StringArray/BinaryArray from 
ArrayData (#9160)
d610468d22 is described below

commit d610468d22406772e1aa01600aeb23b7a0444120
Author: Andrew Lamb <[email protected]>
AuthorDate: Wed Jan 14 07:31:06 2026 -0500

    Avoid a clone when creating StringArray/BinaryArray from ArrayData (#9160)
    
    # Which issue does this PR close?
    
    - Part of https://github.com/apache/arrow-rs/issues/9061
    - broken out of https://github.com/apache/arrow-rs/pull/9058
    
    # Rationale for this change
    
    Let's make arrow-rs the fastest we can and the fewer allocations the
    better
    
    # What changes are included in this PR?
    
    Apply pattern from https://github.com/apache/arrow-rs/pull/9114
    
    # Are these changes tested?
    
    Existing tests
    
    # Are there any user-facing changes?
    
    No
---
 arrow-array/src/array/byte_array.rs | 20 ++++++++++++--------
 arrow-array/src/array/mod.rs        | 23 ++++++++++++++++++++++-
 2 files changed, 34 insertions(+), 9 deletions(-)

diff --git a/arrow-array/src/array/byte_array.rs 
b/arrow-array/src/array/byte_array.rs
index bd85bffcfe..8e8ad91cea 100644
--- a/arrow-array/src/array/byte_array.rs
+++ b/arrow-array/src/array/byte_array.rs
@@ -15,7 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use crate::array::{get_offsets, print_long_array};
+use crate::array::{get_offsets_from_buffer, print_long_array};
 use crate::builder::GenericByteBuilder;
 use crate::iterator::ArrayIter;
 use crate::types::ByteArrayType;
@@ -542,30 +542,34 @@ impl<'a, T: ByteArrayType> ArrayAccessor for &'a 
GenericByteArray<T> {
 
 impl<T: ByteArrayType> From<ArrayData> for GenericByteArray<T> {
     fn from(data: ArrayData) -> Self {
+        let (data_type, len, nulls, offset, mut buffers, _child_data) = 
data.into_parts();
         assert_eq!(
-            data.data_type(),
-            &Self::DATA_TYPE,
+            data_type,
+            Self::DATA_TYPE,
             "{}{}Array expects DataType::{}",
             T::Offset::PREFIX,
             T::PREFIX,
             Self::DATA_TYPE
         );
         assert_eq!(
-            data.buffers().len(),
+            buffers.len(),
             2,
             "{}{}Array data should contain 2 buffers only (offsets and 
values)",
             T::Offset::PREFIX,
             T::PREFIX,
         );
+        // buffers are offset then value, so pop in reverse
+        let value_data = buffers.pop().expect("checked above");
+        let offset_buffer = buffers.pop().expect("checked above");
+
         // SAFETY:
         // ArrayData is valid, and verified type above
-        let value_offsets = unsafe { get_offsets(&data) };
-        let value_data = data.buffers()[1].clone();
+        let value_offsets = unsafe { get_offsets_from_buffer(offset_buffer, 
offset, len) };
         Self {
             value_offsets,
             value_data,
-            data_type: T::DATA_TYPE,
-            nulls: data.nulls().cloned(),
+            data_type,
+            nulls,
         }
     }
 }
diff --git a/arrow-array/src/array/mod.rs b/arrow-array/src/array/mod.rs
index aae382ace7..6fcb80c533 100644
--- a/arrow-array/src/array/mod.rs
+++ b/arrow-array/src/array/mod.rs
@@ -20,7 +20,7 @@
 mod binary_array;
 
 use crate::types::*;
-use arrow_buffer::{ArrowNativeType, NullBuffer, OffsetBuffer, ScalarBuffer};
+use arrow_buffer::{ArrowNativeType, Buffer, NullBuffer, OffsetBuffer, 
ScalarBuffer};
 use arrow_data::ArrayData;
 use arrow_schema::{DataType, IntervalUnit, TimeUnit};
 use std::any::Any;
@@ -939,6 +939,27 @@ unsafe fn get_offsets<O: ArrowNativeType>(data: 
&ArrayData) -> OffsetBuffer<O> {
     }
 }
 
+/// Helper function that creates an [`OffsetBuffer`] from a buffer and array 
offset/ length
+///
+/// # Safety
+///
+/// - buffer must contain valid arrow offsets ( [`OffsetBuffer`] ) for the
+///   given length and offset.
+unsafe fn get_offsets_from_buffer<O: ArrowNativeType>(
+    buffer: Buffer,
+    offset: usize,
+    len: usize,
+) -> OffsetBuffer<O> {
+    if len == 0 && buffer.is_empty() {
+        return OffsetBuffer::new_empty();
+    }
+
+    let scalar_buffer = ScalarBuffer::new(buffer, offset, len + 1);
+    // Safety:
+    // Arguments were valid
+    unsafe { OffsetBuffer::new_unchecked(scalar_buffer) }
+}
+
 /// Helper function for printing potentially long arrays.
 fn print_long_array<A, F>(array: &A, f: &mut std::fmt::Formatter, print_item: 
F) -> std::fmt::Result
 where

Reply via email to