This is an automated email from the ASF dual-hosted git repository.

jeffreyvo pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/main by this push:
     new de84ff555c Propagate errors instead of panics: Replace usages of `new` 
with `try_new` for Array types (#8397)
de84ff555c is described below

commit de84ff555c779a1fba2e576b90e62ad6a48e4277
Author: Jeffrey Vo <[email protected]>
AuthorDate: Sun Sep 21 22:46:57 2025 +1000

    Propagate errors instead of panics: Replace usages of `new` with `try_new` 
for Array types (#8397)
    
    # Which issue does this PR close?
    
    Related to #7806
    
    # Rationale for this change
    
    Some methods in `arrow` use `new()` of array types which internally use
    `try_new()` and unwraps the result; this can lead to panics instead of
    propagating errors to downstream users. For example, this DataFusion
    issue: https://github.com/apache/datafusion/issues/12598
    
    # What changes are included in this PR?
    
    Replace usages of `new()` with `try_new()` where appropriate (the
    function they are used inside already return a `Result`.
    
    # Are these changes tested?
    
    Covered by existing tests.
    
    # Are there any user-facing changes?
    
    No.
---
 arrow-avro/src/reader/record.rs        | 19 ++++++++++---------
 arrow-cast/src/base64.rs               |  6 +-----
 arrow-cast/src/cast/list.rs            | 14 +++++++-------
 arrow-cast/src/cast/map.rs             |  8 ++++----
 arrow-cast/src/cast/mod.rs             |  4 ++--
 arrow-cast/src/cast/string.rs          |  2 +-
 arrow-ord/src/sort.rs                  |  2 +-
 arrow-row/src/run.rs                   |  4 ++--
 arrow-select/src/concat.rs             |  2 +-
 arrow-select/src/dictionary.rs         |  2 +-
 arrow-select/src/filter.rs             |  7 +++++--
 arrow-select/src/interleave.rs         |  2 +-
 arrow-select/src/take.rs               |  6 +++---
 arrow-select/src/union_extract.rs      |  2 +-
 arrow-string/src/length.rs             | 19 +++++++++++--------
 parquet-variant-compute/src/to_json.rs |  6 +-----
 16 files changed, 52 insertions(+), 53 deletions(-)

diff --git a/arrow-avro/src/reader/record.rs b/arrow-avro/src/reader/record.rs
index 80a3c19d5c..3295e330a1 100644
--- a/arrow-avro/src/reader/record.rs
+++ b/arrow-avro/src/reader/record.rs
@@ -53,7 +53,7 @@ macro_rules! decode_decimal {
 macro_rules! flush_decimal {
     ($builder:expr, $precision:expr, $scale:expr, $nulls:expr, $ArrayTy:ty) => 
{{
         let (_, vals, _) = $builder.finish().into_parts();
-        let dec = <$ArrayTy>::new(vals, $nulls)
+        let dec = <$ArrayTy>::try_new(vals, $nulls)?
             .with_precision_and_scale(*$precision as u8, $scale.unwrap_or(0) 
as i8)
             .map_err(|e| ArrowError::ParseError(e.to_string()))?;
         Arc::new(dec) as ArrayRef
@@ -889,17 +889,17 @@ impl Decoder {
             Self::StringToBytes(offsets, values) | Self::Binary(offsets, 
values) => {
                 let offsets = flush_offsets(offsets);
                 let values = flush_values(values).into();
-                Arc::new(BinaryArray::new(offsets, values, nulls))
+                Arc::new(BinaryArray::try_new(offsets, values, nulls)?)
             }
             Self::BytesToString(offsets, values) | Self::String(offsets, 
values) => {
                 let offsets = flush_offsets(offsets);
                 let values = flush_values(values).into();
-                Arc::new(StringArray::new(offsets, values, nulls))
+                Arc::new(StringArray::try_new(offsets, values, nulls)?)
             }
             Self::StringView(offsets, values) => {
                 let offsets = flush_offsets(offsets);
                 let values = flush_values(values);
-                let array = StringArray::new(offsets, values.into(), 
nulls.clone());
+                let array = StringArray::try_new(offsets, values.into(), 
nulls.clone())?;
                 let values: Vec<&str> = (0..array.len())
                     .map(|i| {
                         if array.is_valid(i) {
@@ -914,21 +914,21 @@ impl Decoder {
             Self::Array(field, offsets, values) => {
                 let values = values.flush(None)?;
                 let offsets = flush_offsets(offsets);
-                Arc::new(ListArray::new(field.clone(), offsets, values, nulls))
+                Arc::new(ListArray::try_new(field.clone(), offsets, values, 
nulls)?)
             }
             Self::Record(fields, encodings, _) => {
                 let arrays = encodings
                     .iter_mut()
                     .map(|x| x.flush(None))
                     .collect::<Result<Vec<_>, _>>()?;
-                Arc::new(StructArray::new(fields.clone(), arrays, nulls))
+                Arc::new(StructArray::try_new(fields.clone(), arrays, nulls)?)
             }
             Self::Map(map_field, k_off, m_off, kdata, valdec) => {
                 let moff = flush_offsets(m_off);
                 let koff = flush_offsets(k_off);
                 let kd = flush_values(kdata).into();
                 let val_arr = valdec.flush(None)?;
-                let key_arr = StringArray::new(koff, kd, None);
+                let key_arr = StringArray::try_new(koff, kd, None)?;
                 if key_arr.len() != val_arr.len() {
                     return Err(ArrowError::InvalidArgumentError(format!(
                         "Map keys length ({}) != map values length ({})",
@@ -954,8 +954,9 @@ impl Decoder {
                     }
                 };
                 let entries_struct =
-                    StructArray::new(entries_fields, vec![Arc::new(key_arr), 
val_arr], None);
-                let map_arr = MapArray::new(map_field.clone(), moff, 
entries_struct, nulls, false);
+                    StructArray::try_new(entries_fields, 
vec![Arc::new(key_arr), val_arr], None)?;
+                let map_arr =
+                    MapArray::try_new(map_field.clone(), moff, entries_struct, 
nulls, false)?;
                 Arc::new(map_arr)
             }
             Self::Fixed(sz, accum) => {
diff --git a/arrow-cast/src/base64.rs b/arrow-cast/src/base64.rs
index e7bb84ebe2..27a946b780 100644
--- a/arrow-cast/src/base64.rs
+++ b/arrow-cast/src/base64.rs
@@ -79,11 +79,7 @@ pub fn b64_decode<E: Engine, O: OffsetSizeTrait>(
     // Safety: offsets monotonically increasing by construction
     let offsets = unsafe { OffsetBuffer::new_unchecked(offsets.into()) };
 
-    Ok(GenericBinaryArray::new(
-        offsets,
-        Buffer::from_vec(buffer),
-        array.nulls().cloned(),
-    ))
+    GenericBinaryArray::try_new(offsets, Buffer::from_vec(buffer), 
array.nulls().cloned())
 }
 
 #[cfg(test)]
diff --git a/arrow-cast/src/cast/list.rs b/arrow-cast/src/cast/list.rs
index 1728cc4061..0bc313da53 100644
--- a/arrow-cast/src/cast/list.rs
+++ b/arrow-cast/src/cast/list.rs
@@ -25,7 +25,7 @@ pub(crate) fn cast_values_to_list<O: OffsetSizeTrait>(
 ) -> Result<ArrayRef, ArrowError> {
     let values = cast_with_options(array, to.data_type(), cast_options)?;
     let offsets = OffsetBuffer::from_lengths(std::iter::repeat_n(1, 
values.len()));
-    let list = GenericListArray::<O>::new(to.clone(), offsets, values, None);
+    let list = GenericListArray::<O>::try_new(to.clone(), offsets, values, 
None)?;
     Ok(Arc::new(list))
 }
 
@@ -37,7 +37,7 @@ pub(crate) fn cast_values_to_fixed_size_list(
     cast_options: &CastOptions,
 ) -> Result<ArrayRef, ArrowError> {
     let values = cast_with_options(array, to.data_type(), cast_options)?;
-    let list = FixedSizeListArray::new(to.clone(), size, values, None);
+    let list = FixedSizeListArray::try_new(to.clone(), size, values, None)?;
     Ok(Arc::new(list))
 }
 
@@ -140,7 +140,7 @@ where
 
     // Construct the FixedSizeListArray
     let nulls = nulls.map(|mut x| x.finish().into());
-    let array = FixedSizeListArray::new(field.clone(), size, values, nulls);
+    let array = FixedSizeListArray::try_new(field.clone(), size, values, 
nulls)?;
     Ok(Arc::new(array))
 }
 
@@ -152,12 +152,12 @@ pub(crate) fn cast_list_values<O: OffsetSizeTrait>(
 ) -> Result<ArrayRef, ArrowError> {
     let list = array.as_list::<O>();
     let values = cast_with_options(list.values(), to.data_type(), 
cast_options)?;
-    Ok(Arc::new(GenericListArray::<O>::new(
+    Ok(Arc::new(GenericListArray::<O>::try_new(
         to.clone(),
         list.offsets().clone(),
         values,
         list.nulls().cloned(),
-    )))
+    )?))
 }
 
 /// Cast the container type of List/Largelist array along with the inner 
datatype
@@ -184,10 +184,10 @@ pub(crate) fn cast_list<I: OffsetSizeTrait, O: 
OffsetSizeTrait>(
     // Safety: valid offsets and checked for overflow
     let offsets = unsafe { OffsetBuffer::new_unchecked(offsets.into()) };
 
-    Ok(Arc::new(GenericListArray::<O>::new(
+    Ok(Arc::new(GenericListArray::<O>::try_new(
         field.clone(),
         offsets,
         values,
         nulls,
-    )))
+    )?))
 }
diff --git a/arrow-cast/src/cast/map.rs b/arrow-cast/src/cast/map.rs
index d62a9519b7..e7a9b7495e 100644
--- a/arrow-cast/src/cast/map.rs
+++ b/arrow-cast/src/cast/map.rs
@@ -42,17 +42,17 @@ pub(crate) fn cast_map_values(
     let key_array = cast_with_options(from.keys(), key_field.data_type(), 
cast_options)?;
     let value_array = cast_with_options(from.values(), 
value_field.data_type(), cast_options)?;
 
-    Ok(Arc::new(MapArray::new(
+    Ok(Arc::new(MapArray::try_new(
         entries_field.clone(),
         from.offsets().clone(),
-        StructArray::new(
+        StructArray::try_new(
             Fields::from(vec![key_field, value_field]),
             vec![key_array, value_array],
             from.entries().nulls().cloned(),
-        ),
+        )?,
         from.nulls().cloned(),
         to_ordered,
-    )))
+    )?))
 }
 
 /// Gets the key field from the entries of a map.  For all other types returns 
None.
diff --git a/arrow-cast/src/cast/mod.rs b/arrow-cast/src/cast/mod.rs
index fc241bea48..72b2de99bd 100644
--- a/arrow-cast/src/cast/mod.rs
+++ b/arrow-cast/src/cast/mod.rs
@@ -2381,11 +2381,11 @@ fn cast_numeric_to_binary<FROM: ArrowPrimitiveType, O: 
OffsetSizeTrait>(
     let array = array.as_primitive::<FROM>();
     let size = std::mem::size_of::<FROM::Native>();
     let offsets = OffsetBuffer::from_lengths(std::iter::repeat_n(size, 
array.len()));
-    Ok(Arc::new(GenericBinaryArray::<O>::new(
+    Ok(Arc::new(GenericBinaryArray::<O>::try_new(
         offsets,
         array.values().inner().clone(),
         array.nulls().cloned(),
-    )))
+    )?))
 }
 
 fn adjust_timestamp_to_timezone<T: ArrowTimestampType>(
diff --git a/arrow-cast/src/cast/string.rs b/arrow-cast/src/cast/string.rs
index 7f22c4fd64..09a9978ff7 100644
--- a/arrow-cast/src/cast/string.rs
+++ b/arrow-cast/src/cast/string.rs
@@ -115,7 +115,7 @@ fn parse_string_iter<
                 None => Ok(P::Native::default()),
             })
             .collect::<Result<Vec<_>, ArrowError>>()?;
-        PrimitiveArray::new(v.into(), nulls())
+        PrimitiveArray::try_new(v.into(), nulls())?
     };
 
     Ok(Arc::new(array) as ArrayRef)
diff --git a/arrow-ord/src/sort.rs b/arrow-ord/src/sort.rs
index 797c224673..21e8d18593 100644
--- a/arrow-ord/src/sort.rs
+++ b/arrow-ord/src/sort.rs
@@ -120,7 +120,7 @@ where
     }
 
     Ok(Arc::new(
-        PrimitiveArray::<T>::new(mutable_buffer.into(), null_bit_buffer)
+        PrimitiveArray::<T>::try_new(mutable_buffer.into(), null_bit_buffer)?
             .with_data_type(primitive_values.data_type().clone()),
     ))
 }
diff --git a/arrow-row/src/run.rs b/arrow-row/src/run.rs
index ff7c0ffe54..6ed246ce6f 100644
--- a/arrow-row/src/run.rs
+++ b/arrow-row/src/run.rs
@@ -98,7 +98,7 @@ pub unsafe fn decode<R: RunEndIndexType>(
 ) -> Result<RunArray<R>, ArrowError> {
     if rows.is_empty() {
         let values = converter.convert_raw(&mut [], validate_utf8)?;
-        let run_ends_array = 
PrimitiveArray::<R>::new(ScalarBuffer::from(vec![]), None);
+        let run_ends_array = 
PrimitiveArray::<R>::try_new(ScalarBuffer::from(vec![]), None)?;
         return RunArray::<R>::try_new(&run_ends_array, &values[0]);
     }
 
@@ -149,7 +149,7 @@ pub unsafe fn decode<R: RunEndIndexType>(
     };
 
     // Create run ends array
-    let run_ends_array = 
PrimitiveArray::<R>::new(ScalarBuffer::from(run_ends), None);
+    let run_ends_array = 
PrimitiveArray::<R>::try_new(ScalarBuffer::from(run_ends), None)?;
 
     // Create the RunEndEncodedArray
     RunArray::<R>::try_new(&run_ends_array, &values[0])
diff --git a/arrow-select/src/concat.rs b/arrow-select/src/concat.rs
index bd93650055..d300644792 100644
--- a/arrow-select/src/concat.rs
+++ b/arrow-select/src/concat.rs
@@ -134,7 +134,7 @@ fn concat_dictionaries<K: ArrowDictionaryKeyType>(
         NullBuffer::new(nulls.finish())
     });
 
-    let keys = PrimitiveArray::<K>::new(key_values.into(), nulls);
+    let keys = PrimitiveArray::<K>::try_new(key_values.into(), nulls)?;
     // Sanity check
     assert_eq!(keys.len(), output_len);
 
diff --git a/arrow-select/src/dictionary.rs b/arrow-select/src/dictionary.rs
index ff1198cf70..3b3cad257b 100644
--- a/arrow-select/src/dictionary.rs
+++ b/arrow-select/src/dictionary.rs
@@ -75,7 +75,7 @@ pub fn garbage_collect_dictionary<K: ArrowDictionaryKeyType>(
     // Create a new values array by filtering using the mask
     let values = filter(dictionary.values(), &BooleanArray::new(mask, None))?;
 
-    Ok(DictionaryArray::new(new_keys, values))
+    DictionaryArray::try_new(new_keys, values)
 }
 
 /// Equivalent to [`garbage_collect_dictionary`] but without requiring casting 
to a specific key type.
diff --git a/arrow-select/src/filter.rs b/arrow-select/src/filter.rs
index 641599cea6..73877bb88c 100644
--- a/arrow-select/src/filter.rs
+++ b/arrow-select/src/filter.rs
@@ -433,7 +433,7 @@ where
     let values = array.values();
     let values = filter(&values, &pred)?;
 
-    let run_ends = PrimitiveArray::<R>::new(new_run_ends.into(), None);
+    let run_ends = PrimitiveArray::<R>::try_new(new_run_ends.into(), None)?;
     RunArray::try_new(&run_ends, &values)
 }
 
@@ -845,7 +845,10 @@ fn filter_sparse_union(
         unreachable!()
     };
 
-    let type_ids = filter_primitive(&Int8Array::new(array.type_ids().clone(), 
None), predicate);
+    let type_ids = filter_primitive(
+        &Int8Array::try_new(array.type_ids().clone(), None)?,
+        predicate,
+    );
 
     let children = fields
         .iter()
diff --git a/arrow-select/src/interleave.rs b/arrow-select/src/interleave.rs
index ba2a032d3a..10f903d5e8 100644
--- a/arrow-select/src/interleave.rs
+++ b/arrow-select/src/interleave.rs
@@ -157,7 +157,7 @@ fn interleave_primitive<T: ArrowPrimitiveType>(
         .map(|(a, b)| interleaved.arrays[*a].value(*b))
         .collect::<Vec<_>>();
 
-    let array = PrimitiveArray::<T>::new(values.into(), interleaved.nulls);
+    let array = PrimitiveArray::<T>::try_new(values.into(), 
interleaved.nulls)?;
     Ok(Arc::new(array.with_data_type(data_type.clone())))
 }
 
diff --git a/arrow-select/src/take.rs b/arrow-select/src/take.rs
index 7680b82d4c..5bb966c678 100644
--- a/arrow-select/src/take.rs
+++ b/arrow-select/src/take.rs
@@ -314,8 +314,8 @@ fn take_impl<IndexType: ArrowPrimitiveType>(
         DataType::Union(fields, UnionMode::Dense) => {
             let values = values.as_any().downcast_ref::<UnionArray>().unwrap();
 
-            let type_ids = 
<PrimitiveArray<Int8Type>>::new(take_native(values.type_ids(), indices), None);
-            let offsets = 
<PrimitiveArray<Int32Type>>::new(take_native(values.offsets().unwrap(), 
indices), None);
+            let type_ids = 
<PrimitiveArray<Int8Type>>::try_new(take_native(values.type_ids(), indices), 
None)?;
+            let offsets = 
<PrimitiveArray<Int32Type>>::try_new(take_native(values.offsets().unwrap(), 
indices), None)?;
 
             let children = fields.iter()
                 .map(|(field_type_id, _)| {
@@ -387,7 +387,7 @@ where
 {
     let values_buf = take_native(values.values(), indices);
     let nulls = take_nulls(values.nulls(), indices);
-    Ok(PrimitiveArray::new(values_buf, 
nulls).with_data_type(values.data_type().clone()))
+    Ok(PrimitiveArray::try_new(values_buf, 
nulls)?.with_data_type(values.data_type().clone()))
 }
 
 #[inline(never)]
diff --git a/arrow-select/src/union_extract.rs 
b/arrow-select/src/union_extract.rs
index 62d660b804..b07ea32f4d 100644
--- a/arrow-select/src/union_extract.rs
+++ b/arrow-select/src/union_extract.rs
@@ -257,7 +257,7 @@ fn extract_dense(
                 //case 6: some type_ids matches our target, but not all. For 
selected values, take the value pointed by the offset. For unselected, use a 
valid null
                 Ok(take(
                     target,
-                    &Int32Array::new(offsets.clone(), Some(selected.into())),
+                    &Int32Array::try_new(offsets.clone(), 
Some(selected.into()))?,
                     None,
                 )?)
             }
diff --git a/arrow-string/src/length.rs b/arrow-string/src/length.rs
index 49fc244e72..b0f6eb0632 100644
--- a/arrow-string/src/length.rs
+++ b/arrow-string/src/length.rs
@@ -78,10 +78,10 @@ pub fn length(array: &dyn Array) -> Result<ArrayRef, 
ArrowError> {
         DataType::Utf8View => {
             let list = array.as_string_view();
             let v = list.views().iter().map(|v| *v as i32).collect::<Vec<_>>();
-            Ok(Arc::new(PrimitiveArray::<Int32Type>::new(
+            Ok(Arc::new(PrimitiveArray::<Int32Type>::try_new(
                 v.into(),
                 list.nulls().cloned(),
-            )))
+            )?))
         }
         DataType::Binary => {
             let list = array.as_binary::<i32>();
@@ -92,15 +92,15 @@ pub fn length(array: &dyn Array) -> Result<ArrayRef, 
ArrowError> {
             Ok(length_impl::<Int64Type>(list.offsets(), list.nulls()))
         }
         DataType::FixedSizeBinary(len) | DataType::FixedSizeList(_, len) => 
Ok(Arc::new(
-            Int32Array::new(vec![*len; array.len()].into(), 
array.nulls().cloned()),
+            Int32Array::try_new(vec![*len; array.len()].into(), 
array.nulls().cloned())?,
         )),
         DataType::BinaryView => {
             let list = array.as_binary_view();
             let v = list.views().iter().map(|v| *v as i32).collect::<Vec<_>>();
-            Ok(Arc::new(PrimitiveArray::<Int32Type>::new(
+            Ok(Arc::new(PrimitiveArray::<Int32Type>::try_new(
                 v.into(),
                 list.nulls().cloned(),
-            )))
+            )?))
         }
         other => Err(ArrowError::ComputeError(format!(
             "length not supported for {other:?}"
@@ -144,7 +144,10 @@ pub fn bit_length(array: &dyn Array) -> Result<ArrayRef, 
ArrowError> {
                 .iter()
                 .map(|view| (*view as i32).wrapping_mul(8))
                 .collect();
-            Ok(Arc::new(Int32Array::new(values, array.nulls().cloned())))
+            Ok(Arc::new(Int32Array::try_new(
+                values,
+                array.nulls().cloned(),
+            )?))
         }
         DataType::Binary => {
             let list = array.as_binary::<i32>();
@@ -154,10 +157,10 @@ pub fn bit_length(array: &dyn Array) -> Result<ArrayRef, 
ArrowError> {
             let list = array.as_binary::<i64>();
             Ok(bit_length_impl::<Int64Type>(list.offsets(), list.nulls()))
         }
-        DataType::FixedSizeBinary(len) => Ok(Arc::new(Int32Array::new(
+        DataType::FixedSizeBinary(len) => Ok(Arc::new(Int32Array::try_new(
             vec![*len * 8; array.len()].into(),
             array.nulls().cloned(),
-        ))),
+        )?)),
         other => Err(ArrowError::ComputeError(format!(
             "bit_length not supported for {other:?}"
         ))),
diff --git a/parquet-variant-compute/src/to_json.rs 
b/parquet-variant-compute/src/to_json.rs
index 1d6f51ca24..fdb32d883a 100644
--- a/parquet-variant-compute/src/to_json.rs
+++ b/parquet-variant-compute/src/to_json.rs
@@ -95,11 +95,7 @@ pub fn variant_to_json(input: &ArrayRef) -> 
Result<StringArray, ArrowError> {
     let value_buffer = Buffer::from_vec(json_buffer);
     let null_buffer = NullBuffer::new(validity.finish());
 
-    Ok(StringArray::new(
-        offsets_buffer,
-        value_buffer,
-        Some(null_buffer),
-    ))
+    StringArray::try_new(offsets_buffer, value_buffer, Some(null_buffer))
 }
 
 #[cfg(test)]

Reply via email to