This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch 57_maintenance
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/57_maintenance by this push:
new 6bbfb9932b [maintenance_57] Fix string array equality when the values
buffer is the same and only the offsets to access it differ (#9325) (#9330)
6bbfb9932b is described below
commit 6bbfb9932b5268c600bcf3dc201e79acd20a551e
Author: Andrew Lamb <[email protected]>
AuthorDate: Mon Feb 2 11:26:25 2026 -0500
[maintenance_57] Fix string array equality when the values buffer is the
same and only the offsets to access it differ (#9325) (#9330)
- Part of https://github.com/apache/arrow-rs/issues/9240
- Related to https://github.com/apache/arrow-rs/issues/9323
This is a backport of the following PR to the 57 line
- https://github.com/apache/arrow-rs/pull/9325 from @jhorstmann
Co-authored-by: Jörn Horstmann <[email protected]>
---
arrow-array/src/array/string_array.rs | 8 +++++
arrow-data/src/equal/list.rs | 2 +-
arrow-data/src/equal/variable_size.rs | 57 ++++++++++++++++++++++++++++-------
3 files changed, 55 insertions(+), 12 deletions(-)
diff --git a/arrow-array/src/array/string_array.rs
b/arrow-array/src/array/string_array.rs
index 80f3153ece..9cd4a9dc15 100644
--- a/arrow-array/src/array/string_array.rs
+++ b/arrow-array/src/array/string_array.rs
@@ -551,4 +551,12 @@ mod tests {
let err_return = array.into_builder().unwrap_err();
assert_eq!(&err_return, &shared_array);
}
+
+ #[test]
+ fn test_non_null_string_array_equal() {
+ let a = StringArray::from(vec![Some("ab"), Some("c")]);
+ let b = StringArray::from(vec![Some("a"), Some("bc")]);
+
+ assert_ne!(a, b);
+ }
}
diff --git a/arrow-data/src/equal/list.rs b/arrow-data/src/equal/list.rs
index ba5e5a8c93..13214ab6f0 100644
--- a/arrow-data/src/equal/list.rs
+++ b/arrow-data/src/equal/list.rs
@@ -21,7 +21,7 @@ use num_integer::Integer;
use super::equal_range;
-fn lengths_equal<T: ArrowNativeType + Integer>(lhs: &[T], rhs: &[T]) -> bool {
+pub(super) fn lengths_equal<T: ArrowNativeType + Integer>(lhs: &[T], rhs:
&[T]) -> bool {
// invariant from `base_equal`
debug_assert_eq!(lhs.len(), rhs.len());
diff --git a/arrow-data/src/equal/variable_size.rs
b/arrow-data/src/equal/variable_size.rs
index c83a39ebd8..10aeafd7e9 100644
--- a/arrow-data/src/equal/variable_size.rs
+++ b/arrow-data/src/equal/variable_size.rs
@@ -15,12 +15,12 @@
// specific language governing permissions and limitations
// under the License.
+use super::utils::equal_len;
use crate::data::{ArrayData, contains_nulls};
+use crate::equal::list::lengths_equal;
use arrow_buffer::ArrowNativeType;
use num_integer::Integer;
-use super::utils::equal_len;
-
fn offset_value_equal<T: ArrowNativeType + Integer>(
lhs_values: &[u8],
rhs_values: &[u8],
@@ -63,15 +63,18 @@ pub(super) fn variable_sized_equal<T: ArrowNativeType +
Integer>(
// Only checking one null mask here because by the time the control flow
reaches
// this point, the equality of the two masks would have already been
verified.
if !contains_nulls(lhs.nulls(), lhs_start, len) {
- offset_value_equal(
- lhs_values,
- rhs_values,
- lhs_offsets,
- rhs_offsets,
- lhs_start,
- rhs_start,
- len,
- )
+ let lhs_offsets_slice = &lhs_offsets[lhs_start..lhs_start + len + 1];
+ let rhs_offsets_slice = &rhs_offsets[rhs_start..rhs_start + len + 1];
+ lengths_equal(lhs_offsets_slice, rhs_offsets_slice)
+ && offset_value_equal(
+ lhs_values,
+ rhs_values,
+ lhs_offsets,
+ rhs_offsets,
+ lhs_start,
+ rhs_start,
+ len,
+ )
} else {
(0..len).all(|i| {
let lhs_pos = lhs_start + i;
@@ -95,3 +98,35 @@ pub(super) fn variable_sized_equal<T: ArrowNativeType +
Integer>(
})
}
}
+
+#[cfg(test)]
+mod tests {
+ use crate::ArrayData;
+ use crate::equal::variable_size::variable_sized_equal;
+ use arrow_buffer::Buffer;
+ use arrow_schema::DataType;
+
+ #[test]
+ fn test_variable_sized_equal_diff_offsets() {
+ let a = ArrayData::builder(DataType::Utf8)
+ .buffers(vec![
+ Buffer::from_vec(vec![0_i32, 3, 6]),
+ Buffer::from_slice_ref(b"foobar"),
+ ])
+ .null_bit_buffer(Some(Buffer::from_slice_ref([0b01_u8])))
+ .len(2)
+ .build()
+ .unwrap();
+ let b = ArrayData::builder(DataType::Utf8)
+ .buffers(vec![
+ Buffer::from_vec(vec![0_i32, 2, 6]),
+ Buffer::from_slice_ref(b"foobar"),
+ ])
+ .null_bit_buffer(Some(Buffer::from_slice_ref([0b01_u8])))
+ .len(2)
+ .build()
+ .unwrap();
+
+ assert!(!variable_sized_equal::<i32>(&a, &b, 0, 0, 2));
+ }
+}