This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/main by this push:
     new 9b16fb3a7e fix: don't generate nulls for `Decimal128` and `Decimal256` 
when field is non-nullable and have non-zero `null_density` (#9046)
9b16fb3a7e is described below

commit 9b16fb3a7e1001e70e2e4195857eda6d74589ac4
Author: Raz Luvaton <[email protected]>
AuthorDate: Tue Dec 30 14:33:51 2025 +0200

    fix: don't generate nulls for `Decimal128` and `Decimal256` when field is 
non-nullable and have non-zero `null_density` (#9046)
    
    # Which issue does this PR close?
    
    N/A
    
    # Rationale for this change
    
    if decimal field is non nullable and have null density we should not
    have nulls in the genrated array
    
    # What changes are included in this PR?
    
    Override `null_density` for non nested and non dictionary to avoid
    future problems like this
    added assertion and test
    
    # Are these changes tested?
    
    yes
    
    # Are there any user-facing changes?
    
    working generate for `Decimal128` and `Decimal256`
    
    Co-authored-by: Andrew Lamb <[email protected]>
---
 arrow/src/util/data_gen.rs | 160 ++++++++++++++++++++-------------------------
 1 file changed, 71 insertions(+), 89 deletions(-)

diff --git a/arrow/src/util/data_gen.rs b/arrow/src/util/data_gen.rs
index 89bbe4b1fb..023436e0a7 100644
--- a/arrow/src/util/data_gen.rs
+++ b/arrow/src/util/data_gen.rs
@@ -66,110 +66,72 @@ pub fn create_random_batch(
 pub fn create_random_array(
     field: &Field,
     size: usize,
-    null_density: f32,
+    mut null_density: f32,
     true_density: f32,
 ) -> Result<ArrayRef> {
-    // Override null density with 0.0 if the array is non-nullable
-    // and a primitive type in case a nested field is nullable
-    let primitive_null_density = match field.is_nullable() {
-        true => null_density,
-        false => 0.0,
-    };
+    // Override nullability in case of not nested and not dictionary
+    // For nested we don't want to override as we want to keep the nullability 
for the children
+    // For dictionary it handle the nullability internally
+    if !field.data_type().is_nested() && !matches!(field.data_type(), 
Dictionary(_, _)) {
+        // Override null density with 0.0 if the array is non-nullable
+        null_density = match field.is_nullable() {
+            true => null_density,
+            false => 0.0,
+        };
+    }
+
     use DataType::*;
-    Ok(match field.data_type() {
+    let array = match field.data_type() {
         Null => Arc::new(NullArray::new(size)) as ArrayRef,
-        Boolean => Arc::new(create_boolean_array(
-            size,
-            primitive_null_density,
-            true_density,
-        )),
-        Int8 => Arc::new(create_primitive_array::<Int8Type>(
-            size,
-            primitive_null_density,
-        )),
-        Int16 => Arc::new(create_primitive_array::<Int16Type>(
-            size,
-            primitive_null_density,
-        )),
-        Int32 => Arc::new(create_primitive_array::<Int32Type>(
-            size,
-            primitive_null_density,
-        )),
-        Int64 => Arc::new(create_primitive_array::<Int64Type>(
-            size,
-            primitive_null_density,
-        )),
-        UInt8 => Arc::new(create_primitive_array::<UInt8Type>(
-            size,
-            primitive_null_density,
-        )),
-        UInt16 => Arc::new(create_primitive_array::<UInt16Type>(
-            size,
-            primitive_null_density,
-        )),
-        UInt32 => Arc::new(create_primitive_array::<UInt32Type>(
-            size,
-            primitive_null_density,
-        )),
-        UInt64 => Arc::new(create_primitive_array::<UInt64Type>(
-            size,
-            primitive_null_density,
-        )),
+        Boolean => Arc::new(create_boolean_array(size, null_density, 
true_density)),
+        Int8 => Arc::new(create_primitive_array::<Int8Type>(size, 
null_density)),
+        Int16 => Arc::new(create_primitive_array::<Int16Type>(size, 
null_density)),
+        Int32 => Arc::new(create_primitive_array::<Int32Type>(size, 
null_density)),
+        Int64 => Arc::new(create_primitive_array::<Int64Type>(size, 
null_density)),
+        UInt8 => Arc::new(create_primitive_array::<UInt8Type>(size, 
null_density)),
+        UInt16 => Arc::new(create_primitive_array::<UInt16Type>(size, 
null_density)),
+        UInt32 => Arc::new(create_primitive_array::<UInt32Type>(size, 
null_density)),
+        UInt64 => Arc::new(create_primitive_array::<UInt64Type>(size, 
null_density)),
         Float16 => {
             return Err(ArrowError::NotYetImplemented(
                 "Float16 is not implemented".to_string(),
             ));
         }
-        Float32 => Arc::new(create_primitive_array::<Float32Type>(
-            size,
-            primitive_null_density,
-        )),
-        Float64 => Arc::new(create_primitive_array::<Float64Type>(
-            size,
-            primitive_null_density,
-        )),
+        Float32 => Arc::new(create_primitive_array::<Float32Type>(size, 
null_density)),
+        Float64 => Arc::new(create_primitive_array::<Float64Type>(size, 
null_density)),
         Timestamp(unit, tz) => match unit {
             TimeUnit::Second => Arc::new(
-                create_random_temporal_array::<TimestampSecondType>(size, 
primitive_null_density)
+                create_random_temporal_array::<TimestampSecondType>(size, 
null_density)
                     .with_timezone_opt(tz.clone()),
-            ),
+            ) as ArrayRef,
             TimeUnit::Millisecond => Arc::new(
-                create_random_temporal_array::<TimestampMillisecondType>(
-                    size,
-                    primitive_null_density,
-                )
-                .with_timezone_opt(tz.clone()),
+                create_random_temporal_array::<TimestampMillisecondType>(size, 
null_density)
+                    .with_timezone_opt(tz.clone()),
             ),
             TimeUnit::Microsecond => Arc::new(
-                create_random_temporal_array::<TimestampMicrosecondType>(
-                    size,
-                    primitive_null_density,
-                )
-                .with_timezone_opt(tz.clone()),
+                create_random_temporal_array::<TimestampMicrosecondType>(size, 
null_density)
+                    .with_timezone_opt(tz.clone()),
             ),
             TimeUnit::Nanosecond => Arc::new(
-                create_random_temporal_array::<TimestampNanosecondType>(
-                    size,
-                    primitive_null_density,
-                )
-                .with_timezone_opt(tz.clone()),
+                create_random_temporal_array::<TimestampNanosecondType>(size, 
null_density)
+                    .with_timezone_opt(tz.clone()),
             ),
         },
         Date32 => Arc::new(create_random_temporal_array::<Date32Type>(
             size,
-            primitive_null_density,
+            null_density,
         )),
         Date64 => Arc::new(create_random_temporal_array::<Date64Type>(
             size,
-            primitive_null_density,
+            null_density,
         )),
         Time32(unit) => match unit {
             TimeUnit::Second => 
Arc::new(create_random_temporal_array::<Time32SecondType>(
                 size,
-                primitive_null_density,
+                null_density,
             )) as ArrayRef,
             TimeUnit::Millisecond => Arc::new(
-                create_random_temporal_array::<Time32MillisecondType>(size, 
primitive_null_density),
+                create_random_temporal_array::<Time32MillisecondType>(size, 
null_density),
             ),
             _ => {
                 return Err(ArrowError::InvalidArgumentError(format!(
@@ -179,11 +141,11 @@ pub fn create_random_array(
         },
         Time64(unit) => match unit {
             TimeUnit::Microsecond => Arc::new(
-                create_random_temporal_array::<Time64MicrosecondType>(size, 
primitive_null_density),
+                create_random_temporal_array::<Time64MicrosecondType>(size, 
null_density),
             ) as ArrayRef,
             TimeUnit::Nanosecond => 
Arc::new(create_random_temporal_array::<Time64NanosecondType>(
                 size,
-                primitive_null_density,
+                null_density,
             )),
             _ => {
                 return Err(ArrowError::InvalidArgumentError(format!(
@@ -191,24 +153,19 @@ pub fn create_random_array(
                 )));
             }
         },
-        Utf8 => Arc::new(create_string_array::<i32>(size, 
primitive_null_density)),
-        LargeUtf8 => Arc::new(create_string_array::<i64>(size, 
primitive_null_density)),
+        Utf8 => Arc::new(create_string_array::<i32>(size, null_density)),
+        LargeUtf8 => Arc::new(create_string_array::<i64>(size, null_density)),
         Utf8View => Arc::new(create_string_view_array_with_len(
             size,
-            primitive_null_density,
+            null_density,
             4,
             false,
         )),
-        Binary => Arc::new(create_binary_array::<i32>(size, 
primitive_null_density)),
-        LargeBinary => Arc::new(create_binary_array::<i64>(size, 
primitive_null_density)),
-        FixedSizeBinary(len) => Arc::new(create_fsb_array(
-            size,
-            primitive_null_density,
-            *len as usize,
-        )),
+        Binary => Arc::new(create_binary_array::<i32>(size, null_density)),
+        LargeBinary => Arc::new(create_binary_array::<i64>(size, 
null_density)),
+        FixedSizeBinary(len) => Arc::new(create_fsb_array(size, null_density, 
*len as usize)),
         BinaryView => Arc::new(
-            create_string_view_array_with_len(size, primitive_null_density, 4, 
false)
-                .to_binary_view(),
+            create_string_view_array_with_len(size, null_density, 4, 
false).to_binary_view(),
         ),
         List(_) => create_random_list_array(field, size, null_density, 
true_density)?,
         LargeList(_) => create_random_list_array(field, size, null_density, 
true_density)?,
@@ -230,7 +187,13 @@ pub fn create_random_array(
                 "Generating random arrays not yet implemented for {other:?}"
             )));
         }
-    })
+    };
+
+    if !field.is_nullable() {
+        assert_eq!(array.null_count(), 0);
+    }
+
+    Ok(array)
 }
 
 #[inline]
@@ -812,4 +775,23 @@ mod tests {
             assert_eq!(array.len(), size);
         }
     }
+
+    #[test]
+    fn create_non_nullable_decimal_array_with_null_density() {
+        let size = 10;
+        let fields = vec![
+            Field::new("a", DataType::Decimal128(10, -2), false),
+            Field::new("b", DataType::Decimal256(10, -2), false),
+        ];
+        let schema = Schema::new(fields);
+        let schema_ref = Arc::new(schema);
+        let batch = create_random_batch(schema_ref.clone(), size, 0.35, 
0.7).unwrap();
+
+        assert_eq!(batch.schema(), schema_ref);
+        assert_eq!(batch.num_columns(), schema_ref.fields().len());
+        for array in batch.columns() {
+            assert_eq!(array.len(), size);
+            assert_eq!(array.null_count(), 0);
+        }
+    }
 }

Reply via email to