This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/main by this push:
new 9293c895b6 Take fsb null indices (#8981)
9293c895b6 is described below
commit 9293c895b6df6755f711dd1db4362d5d936d8e47
Author: Andrew Lamb <[email protected]>
AuthorDate: Thu Dec 11 14:08:59 2025 -0500
Take fsb null indices (#8981)
# Which issue does this PR close?
- redo of https://github.com/apache/arrow-rs/pull/8948 from @tobixdev
- closes https://github.com/apache/arrow-rs/issues/8947
# Rationale for this change
I messed up @tobixdev 's PR / branch by accident in
https://github.com/apache/arrow-rs/pull/8948
This PR contains the previous commits that were in that PR
# What changes are included in this PR?
- See https://github.com/apache/arrow-rs/pull/8948
# Are these changes tested?
Yes, by CI
# Are there any user-facing changes?
Less bugs, faster code
---------
Co-authored-by: Tobias Schwarzinger <[email protected]>
---
arrow-select/src/take.rs | 94 +++++++++++++++++++++++++++++++++++++-----------
1 file changed, 73 insertions(+), 21 deletions(-)
diff --git a/arrow-select/src/take.rs b/arrow-select/src/take.rs
index fb3b559945..edf50a6a2f 100644
--- a/arrow-select/src/take.rs
+++ b/arrow-select/src/take.rs
@@ -384,13 +384,6 @@ pub struct TakeOptions {
pub check_bounds: bool,
}
-#[inline(always)]
-fn maybe_usize<I: ArrowNativeType>(index: I) -> Result<usize, ArrowError> {
- index
- .to_usize()
- .ok_or_else(|| ArrowError::ComputeError("Cast to usize
failed".to_string()))
-}
-
/// `take` implementation for all primitive arrays
///
/// This checks if an `indices` slot is populated, and gets the value from
`values`
@@ -713,27 +706,60 @@ fn take_fixed_size_list<IndexType: ArrowPrimitiveType>(
Ok(FixedSizeListArray::from(list_data))
}
+/// The take kernel implementation for `FixedSizeBinaryArray`.
+///
+/// The computation is done in two steps:
+/// - Compute the values buffer
+/// - Compute the null buffer
fn take_fixed_size_binary<IndexType: ArrowPrimitiveType>(
values: &FixedSizeBinaryArray,
indices: &PrimitiveArray<IndexType>,
size: i32,
) -> Result<FixedSizeBinaryArray, ArrowError> {
- let nulls = values.nulls();
- let array_iter = indices
- .values()
- .iter()
- .map(|idx| {
- let idx = maybe_usize::<IndexType::Native>(*idx)?;
- if nulls.map(|n| n.is_valid(idx)).unwrap_or(true) {
- Ok(Some(values.value(idx)))
- } else {
- Ok(None)
+ let size_usize = usize::try_from(size).map_err(|_| {
+ ArrowError::InvalidArgumentError(format!("Cannot convert size '{}' to
usize", size))
+ })?;
+
+ let values_buffer = values.values().as_slice();
+ let mut values_buffer_builder = BufferBuilder::new(indices.len() *
size_usize);
+
+ if indices.null_count() == 0 {
+ let array_iter = indices.values().iter().map(|idx| {
+ let offset = idx.as_usize() * size_usize;
+ &values_buffer[offset..offset + size_usize]
+ });
+ for slice in array_iter {
+ values_buffer_builder.append_slice(slice);
+ }
+ } else {
+ // The indices nullability cannot be ignored here because the values
buffer may contain
+ // nulls which should not cause a panic.
+ let array_iter = indices.iter().map(|idx| {
+ idx.map(|idx| {
+ let offset = idx.as_usize() * size_usize;
+ &values_buffer[offset..offset + size_usize]
+ })
+ });
+ for slice in array_iter {
+ match slice {
+ None => values_buffer_builder.append_n(size_usize, 0),
+ Some(slice) => values_buffer_builder.append_slice(slice),
}
- })
- .collect::<Result<Vec<_>, ArrowError>>()?
- .into_iter();
+ }
+ }
- FixedSizeBinaryArray::try_from_sparse_iter_with_size(array_iter, size)
+ let values_buffer = values_buffer_builder.finish();
+ let value_nulls = take_nulls(values.nulls(), indices);
+ let final_nulls = NullBuffer::union(value_nulls.as_ref(), indices.nulls());
+
+ let array_data = ArrayDataBuilder::new(DataType::FixedSizeBinary(size))
+ .len(indices.len())
+ .nulls(final_nulls)
+ .offset(0)
+ .add_buffer(values_buffer)
+ .build()?;
+
+ Ok(FixedSizeBinaryArray::from(array_data))
}
/// `take` implementation for dictionary arrays
@@ -2096,6 +2122,32 @@ mod tests {
);
}
+ #[test]
+ fn test_take_fixed_size_binary_with_nulls_indices() {
+ let fsb = FixedSizeBinaryArray::try_from_sparse_iter_with_size(
+ [
+ Some(vec![0x01, 0x01, 0x01, 0x01]),
+ Some(vec![0x02, 0x02, 0x02, 0x02]),
+ Some(vec![0x03, 0x03, 0x03, 0x03]),
+ Some(vec![0x04, 0x04, 0x04, 0x04]),
+ ]
+ .into_iter(),
+ 4,
+ )
+ .unwrap();
+
+ // The two middle indices are null -> Should be null in the output.
+ let indices = UInt32Array::from(vec![Some(0), None, None, Some(3)]);
+
+ let result = take_fixed_size_binary(&fsb, &indices, 4).unwrap();
+ assert_eq!(result.len(), 4);
+ assert_eq!(result.null_count(), 2);
+ assert_eq!(
+ result.nulls().unwrap().iter().collect::<Vec<_>>(),
+ vec![true, false, false, true]
+ );
+ }
+
#[test]
#[should_panic(expected = "index out of bounds: the len is 4 but the index
is 1000")]
fn test_take_list_out_of_bounds() {