This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/main by this push:
     new cd6053649c Add support for using ListView arrays and types through FFI 
(#8822)
cd6053649c is described below

commit cd6053649c00200c2a8b826d1f94b62c9ab3cd06
Author: Adam Gutglick <[email protected]>
AuthorDate: Thu Dec 4 10:48:20 2025 -0500

    Add support for using ListView arrays and types through FFI (#8822)
    
    # Which issue does this PR close?
    
    - Closes #8819
    
    # Rationale for this change
    
    Adds support for using list view arrays through FFI
    
    # What changes are included in this PR?
    
    Support for the ListView types and arrays
    
    # Are these changes tested?
    
    I've added additional tests that verify round trips through FFI, and a
    pyarrow integration test.
    
    # Are there any user-facing changes?
    
    Extended support for ListView, doesn't effect existing functionality.
    
    ---------
    
    Co-authored-by: Andrew Lamb <[email protected]>
---
 arrow-array/src/ffi.rs                             | 146 ++++++++++++++++++++-
 arrow-data/src/data.rs                             |   2 +-
 .../tests/test_sql.py                              |  37 ++++--
 arrow-schema/src/ffi.rs                            |  22 ++++
 4 files changed, 194 insertions(+), 13 deletions(-)

diff --git a/arrow-array/src/ffi.rs b/arrow-array/src/ffi.rs
index 542b61d892..f50dd3420b 100644
--- a/arrow-array/src/ffi.rs
+++ b/arrow-array/src/ffi.rs
@@ -140,8 +140,8 @@ pub unsafe fn export_array_into_raw(
     Ok(())
 }
 
-// returns the number of bits that buffer `i` (in the C data interface) is 
expected to have.
-// This is set by the Arrow specification
+/// returns the number of bits that buffer `i` (in the C data interface) is 
expected to have.
+/// This is set by the Arrow specification
 fn bit_width(data_type: &DataType, i: usize) -> Result<usize> {
     if let Some(primitive) = data_type.primitive_width() {
         return match i {
@@ -180,6 +180,10 @@ fn bit_width(data_type: &DataType, i: usize) -> 
Result<usize> {
         | (DataType::List(_), 1)
         | (DataType::Map(_, _), 1) => i32::BITS as _,
         (DataType::Utf8, 2) | (DataType::Binary, 2) => u8::BITS as _,
+        // List views have two i32 buffers, offsets and sizes
+        (DataType::ListView(_), 1) | (DataType::ListView(_), 2) => i32::BITS 
as _,
+        // Large list views have two i64 buffers, offsets and sizes
+        (DataType::LargeListView(_), 1) | (DataType::LargeListView(_), 2) => 
i64::BITS as _,
         (DataType::List(_), _) | (DataType::Map(_, _), _) => {
             return Err(ArrowError::CDataInterface(format!(
                 "The datatype \"{data_type}\" expects 2 buffers, but requested 
{i}. Please verify that the C data interface is correctly implemented."
@@ -351,6 +355,8 @@ impl ImportedArrowArray<'_> {
             DataType::List(field)
             | DataType::FixedSizeList(field, _)
             | DataType::LargeList(field)
+            | DataType::ListView(field)
+            | DataType::LargeListView(field)
             | DataType::Map(field, _) => Ok([self.consume_child(0, 
field.data_type())?].to_vec()),
             DataType::Struct(fields) => {
                 assert!(fields.len() == self.array.num_children());
@@ -471,6 +477,14 @@ impl ImportedArrowArray<'_> {
                 debug_assert_eq!(bits % 8, 0);
                 (length + 1) * (bits / 8)
             }
+            (DataType::ListView(_), 1)
+            | (DataType::ListView(_), 2)
+            | (DataType::LargeListView(_), 1)
+            | (DataType::LargeListView(_), 2) => {
+                let bits = bit_width(data_type, i)?;
+                debug_assert_eq!(bits % 8, 0);
+                length * (bits / 8)
+            }
             (DataType::Utf8, 2) | (DataType::Binary, 2) => {
                 if self.array.is_empty() {
                     return Ok(0);
@@ -553,7 +567,7 @@ mod tests_to_then_from_ffi {
     use std::collections::HashMap;
     use std::mem::ManuallyDrop;
 
-    use arrow_buffer::NullBuffer;
+    use arrow_buffer::{ArrowNativeType, NullBuffer};
     use arrow_schema::Field;
 
     use crate::builder::UnionBuilder;
@@ -783,6 +797,71 @@ mod tests_to_then_from_ffi {
         test_generic_list::<i64>()
     }
 
+    fn test_generic_list_view<Offset: OffsetSizeTrait + ArrowNativeType>() -> 
Result<()> {
+        // Construct a value array
+        let value_data = ArrayData::builder(DataType::Int16)
+            .len(8)
+            .add_buffer(Buffer::from_slice_ref([0_i16, 1, 2, 3, 4, 5, 6, 7]))
+            .build()
+            .unwrap();
+
+        // Construct a buffer for value offsets, for the nested array:
+        //  [[0, 1, 2], [3, 4, 5], [6, 7]]
+        let value_offsets = [0_usize, 3, 6]
+            .iter()
+            .map(|i| Offset::from_usize(*i).unwrap())
+            .collect::<Buffer>();
+
+        let sizes_buffer = [3_usize, 3, 2]
+            .iter()
+            .map(|i| Offset::from_usize(*i).unwrap())
+            .collect::<Buffer>();
+
+        // Construct a list array from the above two
+        let list_view_dt = 
GenericListViewArray::<Offset>::DATA_TYPE_CONSTRUCTOR(Arc::new(
+            Field::new_list_field(DataType::Int16, false),
+        ));
+
+        let list_data = ArrayData::builder(list_view_dt)
+            .len(3)
+            .add_buffer(value_offsets)
+            .add_buffer(sizes_buffer)
+            .add_child_data(value_data)
+            .build()
+            .unwrap();
+
+        let original = GenericListViewArray::<Offset>::from(list_data.clone());
+
+        // export it
+        let (array, schema) = to_ffi(&original.to_data())?;
+
+        // (simulate consumer) import it
+        let data = unsafe { from_ffi(array, &schema) }?;
+        let array = make_array(data);
+
+        // downcast
+        let array = array
+            .as_any()
+            .downcast_ref::<GenericListViewArray<Offset>>()
+            .unwrap();
+
+        assert_eq!(&array.value(0), &original.value(0));
+        assert_eq!(&array.value(1), &original.value(1));
+        assert_eq!(&array.value(2), &original.value(2));
+
+        Ok(())
+    }
+
+    #[test]
+    fn test_list_view() -> Result<()> {
+        test_generic_list_view::<i32>()
+    }
+
+    #[test]
+    fn test_large_list_view() -> Result<()> {
+        test_generic_list_view::<i64>()
+    }
+
     fn test_generic_binary<Offset: OffsetSizeTrait>() -> Result<()> {
         // create an array natively
         let array: Vec<Option<&[u8]>> = vec![Some(b"a"), None, Some(b"aaa")];
@@ -1315,6 +1394,7 @@ mod tests_from_ffi {
     use std::ptr::NonNull;
     use std::sync::Arc;
 
+    use arrow_buffer::NullBuffer;
     #[cfg(not(feature = "force_validate"))]
     use arrow_buffer::{ScalarBuffer, bit_util, buffer::Buffer};
     #[cfg(feature = "force_validate")]
@@ -1325,6 +1405,7 @@ mod tests_from_ffi {
     use arrow_schema::{DataType, Field};
 
     use super::Result;
+
     use crate::builder::GenericByteViewBuilder;
     use crate::types::{BinaryViewType, ByteViewType, Int32Type, 
StringViewType};
     use crate::{
@@ -1528,6 +1609,65 @@ mod tests_from_ffi {
         test_round_trip(&data)
     }
 
+    #[test]
+    fn test_list_view() -> Result<()> {
+        // Construct a value array
+        let value_data = ArrayData::builder(DataType::Int16)
+            .len(8)
+            .add_buffer(Buffer::from_slice_ref([0_i16, 1, 2, 3, 4, 5, 6, 7]))
+            .build()
+            .unwrap();
+
+        // Construct a buffer for value offsets, for the nested array:
+        //  [[0, 1, 2], [3, 4, 5], [6, 7]]
+        let value_offsets = Buffer::from(vec![0_i32, 3, 6]);
+        let sizes_buffer = Buffer::from(vec![3_i32, 3, 2]);
+
+        // Construct a list array from the above two
+        let list_view_dt =
+            DataType::ListView(Arc::new(Field::new_list_field(DataType::Int16, 
false)));
+
+        let list_view_data = ArrayData::builder(list_view_dt)
+            .len(3)
+            .add_buffer(value_offsets)
+            .add_buffer(sizes_buffer)
+            .add_child_data(value_data)
+            .build()
+            .unwrap();
+
+        test_round_trip(&list_view_data)
+    }
+
+    #[test]
+    fn test_list_view_with_nulls() -> Result<()> {
+        // Construct a value array
+        let value_data = ArrayData::builder(DataType::Int16)
+            .len(8)
+            .add_buffer(Buffer::from_slice_ref([0_i16, 1, 2, 3, 4, 5, 6, 7]))
+            .build()
+            .unwrap();
+
+        // Construct a buffer for value offsets, for the nested array:
+        //  [[0, 1, 2], [3, 4, 5], [6, 7], null]
+        let value_offsets = Buffer::from(vec![0_i32, 3, 6, 8]);
+        let sizes_buffer = Buffer::from(vec![3_i32, 3, 2, 0]);
+
+        // Construct a list array from the above two
+        let list_view_dt =
+            DataType::ListView(Arc::new(Field::new_list_field(DataType::Int16, 
true)));
+
+        let list_view_data = ArrayData::builder(list_view_dt)
+            .len(4)
+            .add_buffer(value_offsets)
+            .add_buffer(sizes_buffer)
+            .add_child_data(value_data)
+            .nulls(Some(NullBuffer::from(vec![true, true, true, false])))
+            .build()
+            .unwrap();
+
+        test_round_trip(&list_view_data)
+    }
+
     #[test]
     #[cfg(not(feature = "force_validate"))]
     fn test_empty_string_with_non_zero_offset() -> Result<()> {
diff --git a/arrow-data/src/data.rs b/arrow-data/src/data.rs
index 329c12bbf4..df3b2c578f 100644
--- a/arrow-data/src/data.rs
+++ b/arrow-data/src/data.rs
@@ -1793,7 +1793,7 @@ impl DataTypeLayout {
                 },
             ],
             can_contain_null_mask: true,
-            variadic: true,
+            variadic: false,
         }
     }
 }
diff --git a/arrow-pyarrow-integration-testing/tests/test_sql.py 
b/arrow-pyarrow-integration-testing/tests/test_sql.py
index 3b46d5729a..f5d53155fe 100644
--- a/arrow-pyarrow-integration-testing/tests/test_sql.py
+++ b/arrow-pyarrow-integration-testing/tests/test_sql.py
@@ -27,7 +27,9 @@ import pytz
 
 import arrow_pyarrow_integration_testing as rust
 
-PYARROW_PRE_14 = int(pa.__version__.split('.')[0]) < 14
+PYARROW_MAJOR_VER = int(pa.__version__.split(".")[0])
+PYARROW_PRE_14 = PYARROW_MAJOR_VER < 14
+PYARROW_PRE_16 = PYARROW_MAJOR_VER < 16
 
 
 @contextlib.contextmanager
@@ -112,8 +114,16 @@ _supported_pyarrow_types = [
     ),
 ]
 
-_unsupported_pyarrow_types = [
-]
+if PYARROW_MAJOR_VER >= 16:
+    _supported_pyarrow_types.extend(
+        [
+            pa.list_view(pa.uint64()),
+            pa.large_list_view(pa.uint64()),
+            pa.list_view(pa.string()),
+            pa.large_list_view(pa.string()),
+        ]
+    )
+
 
 # As of pyarrow 14, pyarrow implements the Arrow PyCapsule interface
 # 
(https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html).
@@ -158,12 +168,6 @@ def test_type_roundtrip_pycapsule(pyarrow_type):
     assert restored == pyarrow_type
     assert restored is not pyarrow_type
 
-
[email protected]("pyarrow_type", _unsupported_pyarrow_types, ids=str)
-def test_type_roundtrip_raises(pyarrow_type):
-    with pytest.raises(pa.ArrowException):
-        rust.round_trip_type(pyarrow_type)
-
 @pytest.mark.parametrize('pyarrow_type', _supported_pyarrow_types, ids=str)
 def test_field_roundtrip(pyarrow_type):
     pyarrow_field = pa.field("test", pyarrow_type, nullable=True)
@@ -337,6 +341,21 @@ def test_list_array():
     del a
     del b
 
+
[email protected](PYARROW_PRE_16, reason="requires pyarrow 16")
+def test_list_view_array():
+    """
+    Python -> Rust -> Python
+    """
+    a = pa.array([[], None, [1, 2], [4, 5, 6]], pa.list_view(pa.int64()))
+    b = rust.round_trip_array(a)
+    b.validate(full=True)
+    assert a.to_pylist() == b.to_pylist()
+    assert a.type == b.type
+    del a
+    del b
+
+
 def test_map_array():
     """
     Python -> Rust -> Python
diff --git a/arrow-schema/src/ffi.rs b/arrow-schema/src/ffi.rs
index 2f890508e0..a1a32224a4 100644
--- a/arrow-schema/src/ffi.rs
+++ b/arrow-schema/src/ffi.rs
@@ -456,6 +456,14 @@ impl TryFrom<&FFI_ArrowSchema> for DataType {
                 let c_child = c_schema.child(0);
                 DataType::LargeList(Arc::new(Field::try_from(c_child)?))
             }
+            "+vl" => {
+                let c_child = c_schema.child(0);
+                DataType::ListView(Arc::new(Field::try_from(c_child)?))
+            }
+            "+vL" => {
+                let c_child = c_schema.child(0);
+                DataType::LargeListView(Arc::new(Field::try_from(c_child)?))
+            }
             "+s" => {
                 let fields = c_schema.children().map(Field::try_from);
                 DataType::Struct(fields.collect::<Result<_, ArrowError>>()?)
@@ -657,6 +665,8 @@ impl TryFrom<&DataType> for FFI_ArrowSchema {
         let children = match dtype {
             DataType::List(child)
             | DataType::LargeList(child)
+            | DataType::ListView(child)
+            | DataType::LargeListView(child)
             | DataType::FixedSizeList(child, _)
             | DataType::Map(child, _) => {
                 vec![FFI_ArrowSchema::try_from(child.as_ref())?]
@@ -746,6 +756,8 @@ fn get_format_string(dtype: &DataType) -> 
Result<Cow<'static, str>, ArrowError>
         DataType::Interval(IntervalUnit::MonthDayNano) => Ok("tin".into()),
         DataType::List(_) => Ok("+l".into()),
         DataType::LargeList(_) => Ok("+L".into()),
+        DataType::ListView(_) => Ok("+vl".into()),
+        DataType::LargeListView(_) => Ok("+vL".into()),
         DataType::Struct(_) => Ok("+s".into()),
         DataType::Map(_, _) => Ok("+m".into()),
         DataType::RunEndEncoded(_, _) => Ok("+r".into()),
@@ -874,6 +886,16 @@ mod tests {
             DataType::Int16,
             false,
         ))));
+        round_trip_type(DataType::ListView(Arc::new(Field::new(
+            "a",
+            DataType::Int16,
+            false,
+        ))));
+        round_trip_type(DataType::LargeListView(Arc::new(Field::new(
+            "a",
+            DataType::Int16,
+            false,
+        ))));
         round_trip_type(DataType::Struct(Fields::from(vec![Field::new(
             "a",
             DataType::Utf8,

Reply via email to