This is an automated email from the ASF dual-hosted git repository.
jeffreyvo pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/main by this push:
new de84ff555c Propagate errors instead of panics: Replace usages of `new`
with `try_new` for Array types (#8397)
de84ff555c is described below
commit de84ff555c779a1fba2e576b90e62ad6a48e4277
Author: Jeffrey Vo <[email protected]>
AuthorDate: Sun Sep 21 22:46:57 2025 +1000
Propagate errors instead of panics: Replace usages of `new` with `try_new`
for Array types (#8397)
# Which issue does this PR close?
Related to #7806
# Rationale for this change
Some methods in `arrow` use `new()` of array types which internally use
`try_new()` and unwraps the result; this can lead to panics instead of
propagating errors to downstream users. For example, this DataFusion
issue: https://github.com/apache/datafusion/issues/12598
# What changes are included in this PR?
Replace usages of `new()` with `try_new()` where appropriate (the
function they are used inside already return a `Result`.
# Are these changes tested?
Covered by existing tests.
# Are there any user-facing changes?
No.
---
arrow-avro/src/reader/record.rs | 19 ++++++++++---------
arrow-cast/src/base64.rs | 6 +-----
arrow-cast/src/cast/list.rs | 14 +++++++-------
arrow-cast/src/cast/map.rs | 8 ++++----
arrow-cast/src/cast/mod.rs | 4 ++--
arrow-cast/src/cast/string.rs | 2 +-
arrow-ord/src/sort.rs | 2 +-
arrow-row/src/run.rs | 4 ++--
arrow-select/src/concat.rs | 2 +-
arrow-select/src/dictionary.rs | 2 +-
arrow-select/src/filter.rs | 7 +++++--
arrow-select/src/interleave.rs | 2 +-
arrow-select/src/take.rs | 6 +++---
arrow-select/src/union_extract.rs | 2 +-
arrow-string/src/length.rs | 19 +++++++++++--------
parquet-variant-compute/src/to_json.rs | 6 +-----
16 files changed, 52 insertions(+), 53 deletions(-)
diff --git a/arrow-avro/src/reader/record.rs b/arrow-avro/src/reader/record.rs
index 80a3c19d5c..3295e330a1 100644
--- a/arrow-avro/src/reader/record.rs
+++ b/arrow-avro/src/reader/record.rs
@@ -53,7 +53,7 @@ macro_rules! decode_decimal {
macro_rules! flush_decimal {
($builder:expr, $precision:expr, $scale:expr, $nulls:expr, $ArrayTy:ty) =>
{{
let (_, vals, _) = $builder.finish().into_parts();
- let dec = <$ArrayTy>::new(vals, $nulls)
+ let dec = <$ArrayTy>::try_new(vals, $nulls)?
.with_precision_and_scale(*$precision as u8, $scale.unwrap_or(0)
as i8)
.map_err(|e| ArrowError::ParseError(e.to_string()))?;
Arc::new(dec) as ArrayRef
@@ -889,17 +889,17 @@ impl Decoder {
Self::StringToBytes(offsets, values) | Self::Binary(offsets,
values) => {
let offsets = flush_offsets(offsets);
let values = flush_values(values).into();
- Arc::new(BinaryArray::new(offsets, values, nulls))
+ Arc::new(BinaryArray::try_new(offsets, values, nulls)?)
}
Self::BytesToString(offsets, values) | Self::String(offsets,
values) => {
let offsets = flush_offsets(offsets);
let values = flush_values(values).into();
- Arc::new(StringArray::new(offsets, values, nulls))
+ Arc::new(StringArray::try_new(offsets, values, nulls)?)
}
Self::StringView(offsets, values) => {
let offsets = flush_offsets(offsets);
let values = flush_values(values);
- let array = StringArray::new(offsets, values.into(),
nulls.clone());
+ let array = StringArray::try_new(offsets, values.into(),
nulls.clone())?;
let values: Vec<&str> = (0..array.len())
.map(|i| {
if array.is_valid(i) {
@@ -914,21 +914,21 @@ impl Decoder {
Self::Array(field, offsets, values) => {
let values = values.flush(None)?;
let offsets = flush_offsets(offsets);
- Arc::new(ListArray::new(field.clone(), offsets, values, nulls))
+ Arc::new(ListArray::try_new(field.clone(), offsets, values,
nulls)?)
}
Self::Record(fields, encodings, _) => {
let arrays = encodings
.iter_mut()
.map(|x| x.flush(None))
.collect::<Result<Vec<_>, _>>()?;
- Arc::new(StructArray::new(fields.clone(), arrays, nulls))
+ Arc::new(StructArray::try_new(fields.clone(), arrays, nulls)?)
}
Self::Map(map_field, k_off, m_off, kdata, valdec) => {
let moff = flush_offsets(m_off);
let koff = flush_offsets(k_off);
let kd = flush_values(kdata).into();
let val_arr = valdec.flush(None)?;
- let key_arr = StringArray::new(koff, kd, None);
+ let key_arr = StringArray::try_new(koff, kd, None)?;
if key_arr.len() != val_arr.len() {
return Err(ArrowError::InvalidArgumentError(format!(
"Map keys length ({}) != map values length ({})",
@@ -954,8 +954,9 @@ impl Decoder {
}
};
let entries_struct =
- StructArray::new(entries_fields, vec![Arc::new(key_arr),
val_arr], None);
- let map_arr = MapArray::new(map_field.clone(), moff,
entries_struct, nulls, false);
+ StructArray::try_new(entries_fields,
vec![Arc::new(key_arr), val_arr], None)?;
+ let map_arr =
+ MapArray::try_new(map_field.clone(), moff, entries_struct,
nulls, false)?;
Arc::new(map_arr)
}
Self::Fixed(sz, accum) => {
diff --git a/arrow-cast/src/base64.rs b/arrow-cast/src/base64.rs
index e7bb84ebe2..27a946b780 100644
--- a/arrow-cast/src/base64.rs
+++ b/arrow-cast/src/base64.rs
@@ -79,11 +79,7 @@ pub fn b64_decode<E: Engine, O: OffsetSizeTrait>(
// Safety: offsets monotonically increasing by construction
let offsets = unsafe { OffsetBuffer::new_unchecked(offsets.into()) };
- Ok(GenericBinaryArray::new(
- offsets,
- Buffer::from_vec(buffer),
- array.nulls().cloned(),
- ))
+ GenericBinaryArray::try_new(offsets, Buffer::from_vec(buffer),
array.nulls().cloned())
}
#[cfg(test)]
diff --git a/arrow-cast/src/cast/list.rs b/arrow-cast/src/cast/list.rs
index 1728cc4061..0bc313da53 100644
--- a/arrow-cast/src/cast/list.rs
+++ b/arrow-cast/src/cast/list.rs
@@ -25,7 +25,7 @@ pub(crate) fn cast_values_to_list<O: OffsetSizeTrait>(
) -> Result<ArrayRef, ArrowError> {
let values = cast_with_options(array, to.data_type(), cast_options)?;
let offsets = OffsetBuffer::from_lengths(std::iter::repeat_n(1,
values.len()));
- let list = GenericListArray::<O>::new(to.clone(), offsets, values, None);
+ let list = GenericListArray::<O>::try_new(to.clone(), offsets, values,
None)?;
Ok(Arc::new(list))
}
@@ -37,7 +37,7 @@ pub(crate) fn cast_values_to_fixed_size_list(
cast_options: &CastOptions,
) -> Result<ArrayRef, ArrowError> {
let values = cast_with_options(array, to.data_type(), cast_options)?;
- let list = FixedSizeListArray::new(to.clone(), size, values, None);
+ let list = FixedSizeListArray::try_new(to.clone(), size, values, None)?;
Ok(Arc::new(list))
}
@@ -140,7 +140,7 @@ where
// Construct the FixedSizeListArray
let nulls = nulls.map(|mut x| x.finish().into());
- let array = FixedSizeListArray::new(field.clone(), size, values, nulls);
+ let array = FixedSizeListArray::try_new(field.clone(), size, values,
nulls)?;
Ok(Arc::new(array))
}
@@ -152,12 +152,12 @@ pub(crate) fn cast_list_values<O: OffsetSizeTrait>(
) -> Result<ArrayRef, ArrowError> {
let list = array.as_list::<O>();
let values = cast_with_options(list.values(), to.data_type(),
cast_options)?;
- Ok(Arc::new(GenericListArray::<O>::new(
+ Ok(Arc::new(GenericListArray::<O>::try_new(
to.clone(),
list.offsets().clone(),
values,
list.nulls().cloned(),
- )))
+ )?))
}
/// Cast the container type of List/Largelist array along with the inner
datatype
@@ -184,10 +184,10 @@ pub(crate) fn cast_list<I: OffsetSizeTrait, O:
OffsetSizeTrait>(
// Safety: valid offsets and checked for overflow
let offsets = unsafe { OffsetBuffer::new_unchecked(offsets.into()) };
- Ok(Arc::new(GenericListArray::<O>::new(
+ Ok(Arc::new(GenericListArray::<O>::try_new(
field.clone(),
offsets,
values,
nulls,
- )))
+ )?))
}
diff --git a/arrow-cast/src/cast/map.rs b/arrow-cast/src/cast/map.rs
index d62a9519b7..e7a9b7495e 100644
--- a/arrow-cast/src/cast/map.rs
+++ b/arrow-cast/src/cast/map.rs
@@ -42,17 +42,17 @@ pub(crate) fn cast_map_values(
let key_array = cast_with_options(from.keys(), key_field.data_type(),
cast_options)?;
let value_array = cast_with_options(from.values(),
value_field.data_type(), cast_options)?;
- Ok(Arc::new(MapArray::new(
+ Ok(Arc::new(MapArray::try_new(
entries_field.clone(),
from.offsets().clone(),
- StructArray::new(
+ StructArray::try_new(
Fields::from(vec![key_field, value_field]),
vec![key_array, value_array],
from.entries().nulls().cloned(),
- ),
+ )?,
from.nulls().cloned(),
to_ordered,
- )))
+ )?))
}
/// Gets the key field from the entries of a map. For all other types returns
None.
diff --git a/arrow-cast/src/cast/mod.rs b/arrow-cast/src/cast/mod.rs
index fc241bea48..72b2de99bd 100644
--- a/arrow-cast/src/cast/mod.rs
+++ b/arrow-cast/src/cast/mod.rs
@@ -2381,11 +2381,11 @@ fn cast_numeric_to_binary<FROM: ArrowPrimitiveType, O:
OffsetSizeTrait>(
let array = array.as_primitive::<FROM>();
let size = std::mem::size_of::<FROM::Native>();
let offsets = OffsetBuffer::from_lengths(std::iter::repeat_n(size,
array.len()));
- Ok(Arc::new(GenericBinaryArray::<O>::new(
+ Ok(Arc::new(GenericBinaryArray::<O>::try_new(
offsets,
array.values().inner().clone(),
array.nulls().cloned(),
- )))
+ )?))
}
fn adjust_timestamp_to_timezone<T: ArrowTimestampType>(
diff --git a/arrow-cast/src/cast/string.rs b/arrow-cast/src/cast/string.rs
index 7f22c4fd64..09a9978ff7 100644
--- a/arrow-cast/src/cast/string.rs
+++ b/arrow-cast/src/cast/string.rs
@@ -115,7 +115,7 @@ fn parse_string_iter<
None => Ok(P::Native::default()),
})
.collect::<Result<Vec<_>, ArrowError>>()?;
- PrimitiveArray::new(v.into(), nulls())
+ PrimitiveArray::try_new(v.into(), nulls())?
};
Ok(Arc::new(array) as ArrayRef)
diff --git a/arrow-ord/src/sort.rs b/arrow-ord/src/sort.rs
index 797c224673..21e8d18593 100644
--- a/arrow-ord/src/sort.rs
+++ b/arrow-ord/src/sort.rs
@@ -120,7 +120,7 @@ where
}
Ok(Arc::new(
- PrimitiveArray::<T>::new(mutable_buffer.into(), null_bit_buffer)
+ PrimitiveArray::<T>::try_new(mutable_buffer.into(), null_bit_buffer)?
.with_data_type(primitive_values.data_type().clone()),
))
}
diff --git a/arrow-row/src/run.rs b/arrow-row/src/run.rs
index ff7c0ffe54..6ed246ce6f 100644
--- a/arrow-row/src/run.rs
+++ b/arrow-row/src/run.rs
@@ -98,7 +98,7 @@ pub unsafe fn decode<R: RunEndIndexType>(
) -> Result<RunArray<R>, ArrowError> {
if rows.is_empty() {
let values = converter.convert_raw(&mut [], validate_utf8)?;
- let run_ends_array =
PrimitiveArray::<R>::new(ScalarBuffer::from(vec![]), None);
+ let run_ends_array =
PrimitiveArray::<R>::try_new(ScalarBuffer::from(vec![]), None)?;
return RunArray::<R>::try_new(&run_ends_array, &values[0]);
}
@@ -149,7 +149,7 @@ pub unsafe fn decode<R: RunEndIndexType>(
};
// Create run ends array
- let run_ends_array =
PrimitiveArray::<R>::new(ScalarBuffer::from(run_ends), None);
+ let run_ends_array =
PrimitiveArray::<R>::try_new(ScalarBuffer::from(run_ends), None)?;
// Create the RunEndEncodedArray
RunArray::<R>::try_new(&run_ends_array, &values[0])
diff --git a/arrow-select/src/concat.rs b/arrow-select/src/concat.rs
index bd93650055..d300644792 100644
--- a/arrow-select/src/concat.rs
+++ b/arrow-select/src/concat.rs
@@ -134,7 +134,7 @@ fn concat_dictionaries<K: ArrowDictionaryKeyType>(
NullBuffer::new(nulls.finish())
});
- let keys = PrimitiveArray::<K>::new(key_values.into(), nulls);
+ let keys = PrimitiveArray::<K>::try_new(key_values.into(), nulls)?;
// Sanity check
assert_eq!(keys.len(), output_len);
diff --git a/arrow-select/src/dictionary.rs b/arrow-select/src/dictionary.rs
index ff1198cf70..3b3cad257b 100644
--- a/arrow-select/src/dictionary.rs
+++ b/arrow-select/src/dictionary.rs
@@ -75,7 +75,7 @@ pub fn garbage_collect_dictionary<K: ArrowDictionaryKeyType>(
// Create a new values array by filtering using the mask
let values = filter(dictionary.values(), &BooleanArray::new(mask, None))?;
- Ok(DictionaryArray::new(new_keys, values))
+ DictionaryArray::try_new(new_keys, values)
}
/// Equivalent to [`garbage_collect_dictionary`] but without requiring casting
to a specific key type.
diff --git a/arrow-select/src/filter.rs b/arrow-select/src/filter.rs
index 641599cea6..73877bb88c 100644
--- a/arrow-select/src/filter.rs
+++ b/arrow-select/src/filter.rs
@@ -433,7 +433,7 @@ where
let values = array.values();
let values = filter(&values, &pred)?;
- let run_ends = PrimitiveArray::<R>::new(new_run_ends.into(), None);
+ let run_ends = PrimitiveArray::<R>::try_new(new_run_ends.into(), None)?;
RunArray::try_new(&run_ends, &values)
}
@@ -845,7 +845,10 @@ fn filter_sparse_union(
unreachable!()
};
- let type_ids = filter_primitive(&Int8Array::new(array.type_ids().clone(),
None), predicate);
+ let type_ids = filter_primitive(
+ &Int8Array::try_new(array.type_ids().clone(), None)?,
+ predicate,
+ );
let children = fields
.iter()
diff --git a/arrow-select/src/interleave.rs b/arrow-select/src/interleave.rs
index ba2a032d3a..10f903d5e8 100644
--- a/arrow-select/src/interleave.rs
+++ b/arrow-select/src/interleave.rs
@@ -157,7 +157,7 @@ fn interleave_primitive<T: ArrowPrimitiveType>(
.map(|(a, b)| interleaved.arrays[*a].value(*b))
.collect::<Vec<_>>();
- let array = PrimitiveArray::<T>::new(values.into(), interleaved.nulls);
+ let array = PrimitiveArray::<T>::try_new(values.into(),
interleaved.nulls)?;
Ok(Arc::new(array.with_data_type(data_type.clone())))
}
diff --git a/arrow-select/src/take.rs b/arrow-select/src/take.rs
index 7680b82d4c..5bb966c678 100644
--- a/arrow-select/src/take.rs
+++ b/arrow-select/src/take.rs
@@ -314,8 +314,8 @@ fn take_impl<IndexType: ArrowPrimitiveType>(
DataType::Union(fields, UnionMode::Dense) => {
let values = values.as_any().downcast_ref::<UnionArray>().unwrap();
- let type_ids =
<PrimitiveArray<Int8Type>>::new(take_native(values.type_ids(), indices), None);
- let offsets =
<PrimitiveArray<Int32Type>>::new(take_native(values.offsets().unwrap(),
indices), None);
+ let type_ids =
<PrimitiveArray<Int8Type>>::try_new(take_native(values.type_ids(), indices),
None)?;
+ let offsets =
<PrimitiveArray<Int32Type>>::try_new(take_native(values.offsets().unwrap(),
indices), None)?;
let children = fields.iter()
.map(|(field_type_id, _)| {
@@ -387,7 +387,7 @@ where
{
let values_buf = take_native(values.values(), indices);
let nulls = take_nulls(values.nulls(), indices);
- Ok(PrimitiveArray::new(values_buf,
nulls).with_data_type(values.data_type().clone()))
+ Ok(PrimitiveArray::try_new(values_buf,
nulls)?.with_data_type(values.data_type().clone()))
}
#[inline(never)]
diff --git a/arrow-select/src/union_extract.rs
b/arrow-select/src/union_extract.rs
index 62d660b804..b07ea32f4d 100644
--- a/arrow-select/src/union_extract.rs
+++ b/arrow-select/src/union_extract.rs
@@ -257,7 +257,7 @@ fn extract_dense(
//case 6: some type_ids matches our target, but not all. For
selected values, take the value pointed by the offset. For unselected, use a
valid null
Ok(take(
target,
- &Int32Array::new(offsets.clone(), Some(selected.into())),
+ &Int32Array::try_new(offsets.clone(),
Some(selected.into()))?,
None,
)?)
}
diff --git a/arrow-string/src/length.rs b/arrow-string/src/length.rs
index 49fc244e72..b0f6eb0632 100644
--- a/arrow-string/src/length.rs
+++ b/arrow-string/src/length.rs
@@ -78,10 +78,10 @@ pub fn length(array: &dyn Array) -> Result<ArrayRef,
ArrowError> {
DataType::Utf8View => {
let list = array.as_string_view();
let v = list.views().iter().map(|v| *v as i32).collect::<Vec<_>>();
- Ok(Arc::new(PrimitiveArray::<Int32Type>::new(
+ Ok(Arc::new(PrimitiveArray::<Int32Type>::try_new(
v.into(),
list.nulls().cloned(),
- )))
+ )?))
}
DataType::Binary => {
let list = array.as_binary::<i32>();
@@ -92,15 +92,15 @@ pub fn length(array: &dyn Array) -> Result<ArrayRef,
ArrowError> {
Ok(length_impl::<Int64Type>(list.offsets(), list.nulls()))
}
DataType::FixedSizeBinary(len) | DataType::FixedSizeList(_, len) =>
Ok(Arc::new(
- Int32Array::new(vec![*len; array.len()].into(),
array.nulls().cloned()),
+ Int32Array::try_new(vec![*len; array.len()].into(),
array.nulls().cloned())?,
)),
DataType::BinaryView => {
let list = array.as_binary_view();
let v = list.views().iter().map(|v| *v as i32).collect::<Vec<_>>();
- Ok(Arc::new(PrimitiveArray::<Int32Type>::new(
+ Ok(Arc::new(PrimitiveArray::<Int32Type>::try_new(
v.into(),
list.nulls().cloned(),
- )))
+ )?))
}
other => Err(ArrowError::ComputeError(format!(
"length not supported for {other:?}"
@@ -144,7 +144,10 @@ pub fn bit_length(array: &dyn Array) -> Result<ArrayRef,
ArrowError> {
.iter()
.map(|view| (*view as i32).wrapping_mul(8))
.collect();
- Ok(Arc::new(Int32Array::new(values, array.nulls().cloned())))
+ Ok(Arc::new(Int32Array::try_new(
+ values,
+ array.nulls().cloned(),
+ )?))
}
DataType::Binary => {
let list = array.as_binary::<i32>();
@@ -154,10 +157,10 @@ pub fn bit_length(array: &dyn Array) -> Result<ArrayRef,
ArrowError> {
let list = array.as_binary::<i64>();
Ok(bit_length_impl::<Int64Type>(list.offsets(), list.nulls()))
}
- DataType::FixedSizeBinary(len) => Ok(Arc::new(Int32Array::new(
+ DataType::FixedSizeBinary(len) => Ok(Arc::new(Int32Array::try_new(
vec![*len * 8; array.len()].into(),
array.nulls().cloned(),
- ))),
+ )?)),
other => Err(ArrowError::ComputeError(format!(
"bit_length not supported for {other:?}"
))),
diff --git a/parquet-variant-compute/src/to_json.rs
b/parquet-variant-compute/src/to_json.rs
index 1d6f51ca24..fdb32d883a 100644
--- a/parquet-variant-compute/src/to_json.rs
+++ b/parquet-variant-compute/src/to_json.rs
@@ -95,11 +95,7 @@ pub fn variant_to_json(input: &ArrayRef) ->
Result<StringArray, ArrowError> {
let value_buffer = Buffer::from_vec(json_buffer);
let null_buffer = NullBuffer::new(validity.finish());
- Ok(StringArray::new(
- offsets_buffer,
- value_buffer,
- Some(null_buffer),
- ))
+ StringArray::try_new(offsets_buffer, value_buffer, Some(null_buffer))
}
#[cfg(test)]