This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch 57_maintenance
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/57_maintenance by this push:
     new 6bbfb9932b [maintenance_57] Fix string array equality when the values 
buffer is the same and only the offsets to access it differ (#9325) (#9330)
6bbfb9932b is described below

commit 6bbfb9932b5268c600bcf3dc201e79acd20a551e
Author: Andrew Lamb <[email protected]>
AuthorDate: Mon Feb 2 11:26:25 2026 -0500

    [maintenance_57] Fix string array equality when the values buffer is the 
same and only the offsets to access it differ (#9325) (#9330)
    
    - Part of https://github.com/apache/arrow-rs/issues/9240
    - Related to https://github.com/apache/arrow-rs/issues/9323
    
    This is a backport of the following PR  to the 57 line
    - https://github.com/apache/arrow-rs/pull/9325 from @jhorstmann
    
    Co-authored-by: Jörn Horstmann <[email protected]>
---
 arrow-array/src/array/string_array.rs |  8 +++++
 arrow-data/src/equal/list.rs          |  2 +-
 arrow-data/src/equal/variable_size.rs | 57 ++++++++++++++++++++++++++++-------
 3 files changed, 55 insertions(+), 12 deletions(-)

diff --git a/arrow-array/src/array/string_array.rs 
b/arrow-array/src/array/string_array.rs
index 80f3153ece..9cd4a9dc15 100644
--- a/arrow-array/src/array/string_array.rs
+++ b/arrow-array/src/array/string_array.rs
@@ -551,4 +551,12 @@ mod tests {
         let err_return = array.into_builder().unwrap_err();
         assert_eq!(&err_return, &shared_array);
     }
+
+    #[test]
+    fn test_non_null_string_array_equal() {
+        let a = StringArray::from(vec![Some("ab"), Some("c")]);
+        let b = StringArray::from(vec![Some("a"), Some("bc")]);
+
+        assert_ne!(a, b);
+    }
 }
diff --git a/arrow-data/src/equal/list.rs b/arrow-data/src/equal/list.rs
index ba5e5a8c93..13214ab6f0 100644
--- a/arrow-data/src/equal/list.rs
+++ b/arrow-data/src/equal/list.rs
@@ -21,7 +21,7 @@ use num_integer::Integer;
 
 use super::equal_range;
 
-fn lengths_equal<T: ArrowNativeType + Integer>(lhs: &[T], rhs: &[T]) -> bool {
+pub(super) fn lengths_equal<T: ArrowNativeType + Integer>(lhs: &[T], rhs: 
&[T]) -> bool {
     // invariant from `base_equal`
     debug_assert_eq!(lhs.len(), rhs.len());
 
diff --git a/arrow-data/src/equal/variable_size.rs 
b/arrow-data/src/equal/variable_size.rs
index c83a39ebd8..10aeafd7e9 100644
--- a/arrow-data/src/equal/variable_size.rs
+++ b/arrow-data/src/equal/variable_size.rs
@@ -15,12 +15,12 @@
 // specific language governing permissions and limitations
 // under the License.
 
+use super::utils::equal_len;
 use crate::data::{ArrayData, contains_nulls};
+use crate::equal::list::lengths_equal;
 use arrow_buffer::ArrowNativeType;
 use num_integer::Integer;
 
-use super::utils::equal_len;
-
 fn offset_value_equal<T: ArrowNativeType + Integer>(
     lhs_values: &[u8],
     rhs_values: &[u8],
@@ -63,15 +63,18 @@ pub(super) fn variable_sized_equal<T: ArrowNativeType + 
Integer>(
     // Only checking one null mask here because by the time the control flow 
reaches
     // this point, the equality of the two masks would have already been 
verified.
     if !contains_nulls(lhs.nulls(), lhs_start, len) {
-        offset_value_equal(
-            lhs_values,
-            rhs_values,
-            lhs_offsets,
-            rhs_offsets,
-            lhs_start,
-            rhs_start,
-            len,
-        )
+        let lhs_offsets_slice = &lhs_offsets[lhs_start..lhs_start + len + 1];
+        let rhs_offsets_slice = &rhs_offsets[rhs_start..rhs_start + len + 1];
+        lengths_equal(lhs_offsets_slice, rhs_offsets_slice)
+            && offset_value_equal(
+                lhs_values,
+                rhs_values,
+                lhs_offsets,
+                rhs_offsets,
+                lhs_start,
+                rhs_start,
+                len,
+            )
     } else {
         (0..len).all(|i| {
             let lhs_pos = lhs_start + i;
@@ -95,3 +98,35 @@ pub(super) fn variable_sized_equal<T: ArrowNativeType + 
Integer>(
         })
     }
 }
+
+#[cfg(test)]
+mod tests {
+    use crate::ArrayData;
+    use crate::equal::variable_size::variable_sized_equal;
+    use arrow_buffer::Buffer;
+    use arrow_schema::DataType;
+
+    #[test]
+    fn test_variable_sized_equal_diff_offsets() {
+        let a = ArrayData::builder(DataType::Utf8)
+            .buffers(vec![
+                Buffer::from_vec(vec![0_i32, 3, 6]),
+                Buffer::from_slice_ref(b"foobar"),
+            ])
+            .null_bit_buffer(Some(Buffer::from_slice_ref([0b01_u8])))
+            .len(2)
+            .build()
+            .unwrap();
+        let b = ArrayData::builder(DataType::Utf8)
+            .buffers(vec![
+                Buffer::from_vec(vec![0_i32, 2, 6]),
+                Buffer::from_slice_ref(b"foobar"),
+            ])
+            .null_bit_buffer(Some(Buffer::from_slice_ref([0b01_u8])))
+            .len(2)
+            .build()
+            .unwrap();
+
+        assert!(!variable_sized_equal::<i32>(&a, &b, 0, 0, 2));
+    }
+}

Reply via email to