This is an automated email from the ASF dual-hosted git repository.

scovich pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/main by this push:
     new 2bf6909305 Add list-like types support to VariantArray::try_new (#9457)
2bf6909305 is described below

commit 2bf6909305091c69edddb0f16c76184edd206141
Author: Konstantin Tarasov <[email protected]>
AuthorDate: Wed Feb 25 16:56:53 2026 -0500

    Add list-like types support to VariantArray::try_new (#9457)
    
    # Which issue does this PR close?
    
    - Closes #9455.
    
    # Rationale for this change
    check issue
    
    # What changes are included in this PR?
    
    Added list types support to `VariantArray` data type checking
    
    # Are these changes tested?
    
    # Are there any user-facing changes?
---
 parquet-variant-compute/src/variant_array.rs | 110 +++++++++++++++++++++++++--
 1 file changed, 102 insertions(+), 8 deletions(-)

diff --git a/parquet-variant-compute/src/variant_array.rs 
b/parquet-variant-compute/src/variant_array.rs
index 250852d021..145de5edfb 100644
--- a/parquet-variant-compute/src/variant_array.rs
+++ b/parquet-variant-compute/src/variant_array.rs
@@ -1181,16 +1181,23 @@ fn canonicalize_and_verify_data_type(data_type: 
&DataType) -> Result<Cow<'_, Dat
         FixedSizeBinary(16) => borrow!(),
         FixedSizeBinary(_) | FixedSizeList(..) => fail!(),
 
-        // We can _possibly_ allow (some of) these some day?
-        ListView(_) | LargeList(_) | LargeListView(_) => {
-            fail!()
-        }
-
-        // Lists and struct are allowed, maps and unions are not
+        // List-like containers and struct are allowed, maps and unions are not
         List(field) => match canonicalize_and_verify_field(field)? {
             Cow::Borrowed(_) => borrow!(),
             Cow::Owned(new_field) => Cow::Owned(DataType::List(new_field)),
         },
+        LargeList(field) => match canonicalize_and_verify_field(field)? {
+            Cow::Borrowed(_) => borrow!(),
+            Cow::Owned(new_field) => 
Cow::Owned(DataType::LargeList(new_field)),
+        },
+        ListView(field) => match canonicalize_and_verify_field(field)? {
+            Cow::Borrowed(_) => borrow!(),
+            Cow::Owned(new_field) => Cow::Owned(DataType::ListView(new_field)),
+        },
+        LargeListView(field) => match canonicalize_and_verify_field(field)? {
+            Cow::Borrowed(_) => borrow!(),
+            Cow::Owned(new_field) => 
Cow::Owned(DataType::LargeListView(new_field)),
+        },
         // Struct is used by the internal layout, and can also represent a 
shredded variant object.
         Struct(fields) => {
             // Avoid allocation unless at least one field changes, to avoid 
unnecessary deep cloning
@@ -1235,9 +1242,10 @@ mod test {
 
     use super::*;
     use arrow::array::{
-        BinaryViewArray, Decimal32Array, Decimal64Array, Decimal128Array, 
Int32Array,
-        Time64MicrosecondArray,
+        BinaryViewArray, Decimal32Array, Decimal64Array, Decimal128Array, 
Int32Array, Int64Array,
+        LargeListArray, LargeListViewArray, ListArray, ListViewArray, 
Time64MicrosecondArray,
     };
+    use arrow::buffer::{OffsetBuffer, ScalarBuffer};
     use arrow_schema::{Field, Fields};
     use parquet_variant::{EMPTY_VARIANT_METADATA_BYTES, ShortString};
 
@@ -1335,6 +1343,17 @@ mod test {
         Arc::new(Int32Array::from(vec![1]))
     }
 
+    fn make_variant_struct_with_typed_value(typed_value: ArrayRef) -> 
StructArray {
+        let metadata = BinaryViewArray::from_iter_values(std::iter::repeat_n(
+            EMPTY_VARIANT_METADATA_BYTES,
+            typed_value.len(),
+        ));
+        StructArrayBuilder::new()
+            .with_field("metadata", Arc::new(metadata), false)
+            .with_field("typed_value", typed_value, true)
+            .build()
+    }
+
     #[test]
     fn all_null_shredding_state() {
         // Verify the shredding state is AllNull
@@ -1420,6 +1439,81 @@ mod test {
         ));
     }
 
+    #[test]
+    fn canonicalize_and_verify_list_like_data_types() {
+        // `parquet/tests/variant_integration.rs` validates Parquet 
shredded-variant fixtures that
+        // use Parquet LIST encoding, but those fixtures do not cover 
Arrow-specific list container
+        // variants (`LargeList`, `ListView`, `LargeListView`) accepted by 
`VariantArray::try_new`.
+        let make_item_binary = || Arc::new(Field::new("item", 
DataType::Binary, true));
+        let make_item_binary_view = || Arc::new(Field::new("item", 
DataType::BinaryView, true));
+
+        let cases = vec![
+            (
+                DataType::LargeList(make_item_binary()),
+                DataType::LargeList(make_item_binary_view()),
+            ),
+            (
+                DataType::ListView(make_item_binary()),
+                DataType::ListView(make_item_binary_view()),
+            ),
+            (
+                DataType::LargeListView(make_item_binary()),
+                DataType::LargeListView(make_item_binary_view()),
+            ),
+        ];
+
+        for (input, expected) in cases {
+            assert_eq!(
+                canonicalize_and_verify_data_type(&input).unwrap().as_ref(),
+                &expected
+            );
+        }
+    }
+
+    #[test]
+    fn variant_array_try_new_supports_list_like_typed_value() {
+        let item_field = Arc::new(Field::new("item", DataType::Int64, true));
+        let values: ArrayRef = Arc::new(Int64Array::from(vec![Some(1), None, 
Some(3)]));
+
+        let typed_values = vec![
+            Arc::new(ListArray::new(
+                item_field.clone(),
+                OffsetBuffer::new(ScalarBuffer::from(vec![0, 2, 3])),
+                values.clone(),
+                None,
+            )) as ArrayRef,
+            Arc::new(LargeListArray::new(
+                item_field.clone(),
+                OffsetBuffer::new(ScalarBuffer::from(vec![0_i64, 2, 3])),
+                values.clone(),
+                None,
+            )) as ArrayRef,
+            Arc::new(ListViewArray::new(
+                item_field.clone(),
+                ScalarBuffer::from(vec![0, 2]),
+                ScalarBuffer::from(vec![2, 1]),
+                values.clone(),
+                None,
+            )) as ArrayRef,
+            Arc::new(LargeListViewArray::new(
+                item_field,
+                ScalarBuffer::from(vec![0_i64, 2]),
+                ScalarBuffer::from(vec![2_i64, 1]),
+                values,
+                None,
+            )) as ArrayRef,
+        ];
+
+        for typed_value in typed_values {
+            let input = 
make_variant_struct_with_typed_value(typed_value.clone());
+            let variant_array = VariantArray::try_new(&input).unwrap();
+            assert_eq!(
+                variant_array.typed_value_field().unwrap().data_type(),
+                typed_value.data_type(),
+            );
+        }
+    }
+
     #[test]
     fn test_variant_array_iterable() {
         let mut b = VariantArrayBuilder::new(6);

Reply via email to