This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/main by this push:
     new 721150286b feat: support append_nulls on additional builders (#7606)
721150286b is described below

commit 721150286b00bece40ffcc6f5ac14ebb5d64785b
Author: albertlockett <[email protected]>
AuthorDate: Tue Jun 10 12:29:23 2025 -0400

    feat: support append_nulls on additional builders (#7606)
    
    # Which issue does this PR close?
    
    
    - Closes https://github.com/apache/arrow-rs/issues/7605
    
    # Rationale for this change
    
    I thought it would be nice if `append_nulls` was supported for
    additional types of array builders. Currently it is available on some
    builder types, but not all.
    
    # What changes are included in this PR?
    
    Add an `append_nulls` method to:
    - FixedSizeBinaryDictionaryBuilder
    - FixedSizedBinaryBuilder
    - GenericBytesBuilder
    - GenericListBuilder
    - StructBuilder
    
    # Are there any user-facing changes?
    
    ---------
    
    Co-authored-by: Andrew Lamb <[email protected]>
---
 .../src/builder/fixed_size_binary_builder.rs       | 19 +++++++++--
 .../fixed_size_binary_dictionary_builder.rs        | 19 ++++++++++-
 arrow-array/src/builder/generic_bytes_builder.rs   | 37 ++++++++++++++++++----
 arrow-array/src/builder/generic_list_builder.rs    | 20 ++++++++++--
 arrow-array/src/builder/struct_builder.rs          | 36 +++++++++++++++------
 5 files changed, 107 insertions(+), 24 deletions(-)

diff --git a/arrow-array/src/builder/fixed_size_binary_builder.rs 
b/arrow-array/src/builder/fixed_size_binary_builder.rs
index b7fc461559..b5f268917c 100644
--- a/arrow-array/src/builder/fixed_size_binary_builder.rs
+++ b/arrow-array/src/builder/fixed_size_binary_builder.rs
@@ -93,6 +93,14 @@ impl FixedSizeBinaryBuilder {
         self.null_buffer_builder.append_null();
     }
 
+    /// Appends `n` `null`s into the builder.
+    #[inline]
+    pub fn append_nulls(&mut self, n: usize) {
+        self.values_builder
+            .append_slice(&vec![0u8; self.value_length as usize * n][..]);
+        self.null_buffer_builder.append_n_nulls(n);
+    }
+
     /// Returns the current values buffer as a slice
     pub fn values_slice(&self) -> &[u8] {
         self.values_builder.as_slice()
@@ -169,17 +177,22 @@ mod tests {
     fn test_fixed_size_binary_builder() {
         let mut builder = FixedSizeBinaryBuilder::with_capacity(3, 5);
 
-        //  [b"hello", null, "arrow"]
+        //  [b"hello", null, "arrow", null, null, "world"]
         builder.append_value(b"hello").unwrap();
         builder.append_null();
         builder.append_value(b"arrow").unwrap();
+        builder.append_nulls(2);
+        builder.append_value(b"world").unwrap();
         let array: FixedSizeBinaryArray = builder.finish();
 
         assert_eq!(&DataType::FixedSizeBinary(5), array.data_type());
-        assert_eq!(3, array.len());
-        assert_eq!(1, array.null_count());
+        assert_eq!(6, array.len());
+        assert_eq!(3, array.null_count());
         assert_eq!(10, array.value_offset(2));
+        assert_eq!(15, array.value_offset(3));
         assert_eq!(5, array.value_length());
+        assert!(array.is_null(3));
+        assert!(array.is_null(4));
     }
 
     #[test]
diff --git a/arrow-array/src/builder/fixed_size_binary_dictionary_builder.rs 
b/arrow-array/src/builder/fixed_size_binary_dictionary_builder.rs
index 007f3de0a2..f3460353b1 100644
--- a/arrow-array/src/builder/fixed_size_binary_dictionary_builder.rs
+++ b/arrow-array/src/builder/fixed_size_binary_dictionary_builder.rs
@@ -192,6 +192,12 @@ where
         self.keys_builder.append_null()
     }
 
+    /// Appends `n` `null`s into the builder.
+    #[inline]
+    pub fn append_nulls(&mut self, n: usize) {
+        self.keys_builder.append_nulls(n);
+    }
+
     /// Infallibly append a value to this builder
     ///
     /// # Panics
@@ -265,11 +271,22 @@ mod tests {
         assert_eq!(b.append(values[1]).unwrap(), 1);
         assert_eq!(b.append(values[1]).unwrap(), 1);
         assert_eq!(b.append(values[0]).unwrap(), 0);
+        b.append_nulls(2);
+        assert_eq!(b.append(values[0]).unwrap(), 0);
         let array = b.finish();
 
         assert_eq!(
             array.keys(),
-            &Int8Array::from(vec![Some(0), None, Some(1), Some(1), Some(0)]),
+            &Int8Array::from(vec![
+                Some(0),
+                None,
+                Some(1),
+                Some(1),
+                Some(0),
+                None,
+                None,
+                Some(0)
+            ]),
         );
 
         // Values are polymorphic and so require a downcast.
diff --git a/arrow-array/src/builder/generic_bytes_builder.rs 
b/arrow-array/src/builder/generic_bytes_builder.rs
index ae82921b0b..91ac2a483e 100644
--- a/arrow-array/src/builder/generic_bytes_builder.rs
+++ b/arrow-array/src/builder/generic_bytes_builder.rs
@@ -129,6 +129,14 @@ impl<T: ByteArrayType> GenericByteBuilder<T> {
         self.offsets_builder.append(self.next_offset());
     }
 
+    /// Appends `n` `null`s into the builder.
+    #[inline]
+    pub fn append_nulls(&mut self, n: usize) {
+        self.null_buffer_builder.append_n_nulls(n);
+        let next_offset = self.next_offset();
+        self.offsets_builder.append_n(n, next_offset);
+    }
+
     /// Appends array values and null to this builder as is
     /// (this means that underlying null values are copied as is).
     #[inline]
@@ -439,15 +447,18 @@ mod tests {
         builder.append_null();
         builder.append_null();
         builder.append_null();
-        assert_eq!(3, builder.len());
+        builder.append_nulls(2);
+        assert_eq!(5, builder.len());
         assert!(!builder.is_empty());
 
         let array = builder.finish();
-        assert_eq!(3, array.null_count());
-        assert_eq!(3, array.len());
+        assert_eq!(5, array.null_count());
+        assert_eq!(5, array.len());
         assert!(array.is_null(0));
         assert!(array.is_null(1));
         assert!(array.is_null(2));
+        assert!(array.is_null(3));
+        assert!(array.is_null(4));
     }
 
     #[test]
@@ -475,16 +486,23 @@ mod tests {
         builder.append_null();
         builder.append_value(b"arrow");
         builder.append_value(b"");
+        builder.append_nulls(2);
+        builder.append_value(b"hi");
         let array = builder.finish();
 
-        assert_eq!(4, array.len());
-        assert_eq!(1, array.null_count());
+        assert_eq!(7, array.len());
+        assert_eq!(3, array.null_count());
         assert_eq!(b"parquet", array.value(0));
         assert!(array.is_null(1));
+        assert!(array.is_null(4));
+        assert!(array.is_null(5));
         assert_eq!(b"arrow", array.value(2));
         assert_eq!(b"", array.value(1));
+        assert_eq!(b"hi", array.value(6));
+
         assert_eq!(O::zero(), array.value_offsets()[0]);
         assert_eq!(O::from_usize(7).unwrap(), array.value_offsets()[2]);
+        assert_eq!(O::from_usize(14).unwrap(), array.value_offsets()[7]);
         assert_eq!(O::from_usize(5).unwrap(), array.value_length(2));
     }
 
@@ -509,7 +527,9 @@ mod tests {
         builder.append_option(Some("rust"));
         builder.append_option(None::<&str>);
         builder.append_option(None::<String>);
-        assert_eq!(7, builder.len());
+        builder.append_nulls(2);
+        builder.append_value("parquet");
+        assert_eq!(10, builder.len());
 
         assert_eq!(
             GenericStringArray::<O>::from(vec![
@@ -519,7 +539,10 @@ mod tests {
                 None,
                 Some("rust"),
                 None,
-                None
+                None,
+                None,
+                None,
+                Some("parquet")
             ]),
             builder.finish()
         );
diff --git a/arrow-array/src/builder/generic_list_builder.rs 
b/arrow-array/src/builder/generic_list_builder.rs
index a9c88ec6c5..463b498c55 100644
--- a/arrow-array/src/builder/generic_list_builder.rs
+++ b/arrow-array/src/builder/generic_list_builder.rs
@@ -270,6 +270,14 @@ where
         self.null_buffer_builder.append_null();
     }
 
+    /// Appends `n` `null`s into the builder.
+    #[inline]
+    pub fn append_nulls(&mut self, n: usize) {
+        let next_offset = self.next_offset();
+        self.offsets_builder.append_n(n, next_offset);
+        self.null_buffer_builder.append_n_nulls(n);
+    }
+
     /// Appends an optional value into this [`GenericListBuilder`]
     ///
     /// If `Some` calls [`Self::append_value`] otherwise calls 
[`Self::append_null`]
@@ -406,7 +414,7 @@ mod tests {
         let values_builder = Int32Builder::with_capacity(10);
         let mut builder = GenericListBuilder::<O, _>::new(values_builder);
 
-        //  [[0, 1, 2], null, [3, null, 5], [6, 7]]
+        //  [[0, 1, 2], null, [3, null, 5], [6, 7], null, null, [8]]
         builder.values().append_value(0);
         builder.values().append_value(1);
         builder.values().append_value(2);
@@ -419,14 +427,20 @@ mod tests {
         builder.values().append_value(6);
         builder.values().append_value(7);
         builder.append(true);
+        builder.append_nulls(2);
+        builder.values().append_value(8);
+        builder.append(true);
 
         let list_array = builder.finish();
 
         assert_eq!(DataType::Int32, list_array.value_type());
-        assert_eq!(4, list_array.len());
-        assert_eq!(1, list_array.null_count());
+        assert_eq!(7, list_array.len());
+        assert_eq!(3, list_array.null_count());
         assert_eq!(O::from_usize(3).unwrap(), list_array.value_offsets()[2]);
+        assert_eq!(O::from_usize(9).unwrap(), list_array.value_offsets()[7]);
         assert_eq!(O::from_usize(3).unwrap(), list_array.value_length(2));
+        assert!(list_array.is_null(4));
+        assert!(list_array.is_null(5));
     }
 
     #[test]
diff --git a/arrow-array/src/builder/struct_builder.rs 
b/arrow-array/src/builder/struct_builder.rs
index 245e9df41e..3afee5863f 100644
--- a/arrow-array/src/builder/struct_builder.rs
+++ b/arrow-array/src/builder/struct_builder.rs
@@ -214,6 +214,12 @@ impl StructBuilder {
         self.append(false)
     }
 
+    /// Appends `n` `null`s into the builder.
+    #[inline]
+    pub fn append_nulls(&mut self, n: usize) {
+        self.null_buffer_builder.append_slice(&vec![false; n]);
+    }
+
     /// Builds the `StructArray` and reset this builder.
     pub fn finish(&mut self) -> StructArray {
         self.validate_content();
@@ -313,6 +319,8 @@ mod tests {
         string_builder.append_null();
         string_builder.append_null();
         string_builder.append_value("mark");
+        string_builder.append_nulls(2);
+        string_builder.append_value("terry");
 
         let int_builder = builder
             .field_builder::<Int32Builder>(1)
@@ -321,35 +329,43 @@ mod tests {
         int_builder.append_value(2);
         int_builder.append_null();
         int_builder.append_value(4);
+        int_builder.append_nulls(2);
+        int_builder.append_value(3);
 
         builder.append(true);
         builder.append(true);
         builder.append_null();
         builder.append(true);
 
+        builder.append_nulls(2);
+        builder.append(true);
+
         let struct_data = builder.finish().into_data();
 
-        assert_eq!(4, struct_data.len());
-        assert_eq!(1, struct_data.null_count());
-        assert_eq!(&[11_u8], struct_data.nulls().unwrap().validity());
+        assert_eq!(7, struct_data.len());
+        assert_eq!(3, struct_data.null_count());
+        assert_eq!(&[75_u8], struct_data.nulls().unwrap().validity());
 
         let expected_string_data = ArrayData::builder(DataType::Utf8)
-            .len(4)
-            .null_bit_buffer(Some(Buffer::from(&[9_u8])))
-            .add_buffer(Buffer::from_slice_ref([0, 3, 3, 3, 7]))
-            .add_buffer(Buffer::from_slice_ref(b"joemark"))
+            .len(7)
+            .null_bit_buffer(Some(Buffer::from(&[73_u8])))
+            .add_buffer(Buffer::from_slice_ref([0, 3, 3, 3, 7, 7, 7, 12]))
+            .add_buffer(Buffer::from_slice_ref(b"joemarkterry"))
             .build()
             .unwrap();
 
         let expected_int_data = ArrayData::builder(DataType::Int32)
-            .len(4)
-            .null_bit_buffer(Some(Buffer::from_slice_ref([11_u8])))
-            .add_buffer(Buffer::from_slice_ref([1, 2, 0, 4]))
+            .len(7)
+            .null_bit_buffer(Some(Buffer::from_slice_ref([75_u8])))
+            .add_buffer(Buffer::from_slice_ref([1, 2, 0, 4, 4, 4, 3]))
             .build()
             .unwrap();
 
         assert_eq!(expected_string_data, struct_data.child_data()[0]);
         assert_eq!(expected_int_data, struct_data.child_data()[1]);
+
+        assert!(struct_data.is_null(4));
+        assert!(struct_data.is_null(5));
     }
 
     #[test]

Reply via email to