This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/main by this push:
     new d7dae2c7c2 [ARROW-RS-7820][Variant] Add tests for large variant lists 
(#7876)
d7dae2c7c2 is described below

commit d7dae2c7c2ccde7d21597ece2888bf445c467d60
Author: Congxian Qiu <[email protected]>
AuthorDate: Fri Jul 11 01:33:05 2025 +0800

    [ARROW-RS-7820][Variant] Add tests for large variant lists (#7876)
    
    # Which issue does this PR close?
    
    Add tests for large variant list.
    
    - Closes #7820 .
    
    # Rationale for this change
    
    Add tests for larget vairant lists
    
    # What changes are included in this PR?
    
    This PR  adds three tests for large variant lists.
    - one for total child size between 2^8 and 2^16
    - one for total child size between 2^16 and 2^24
    - one for total child size between 2^24 and 2^32
    
    all the tests will verify the `is_large`, `offset_size` and the content
    of the list.
    
    # Are these changes tested?
    
    Yes
    
    # Are there any user-facing changes?
    
    Non
---
 parquet-variant/src/variant/list.rs | 195 ++++++++++++++++++++++++++++++++++++
 1 file changed, 195 insertions(+)

diff --git a/parquet-variant/src/variant/list.rs 
b/parquet-variant/src/variant/list.rs
index 5257ec6a02..05ddf9b2b7 100644
--- a/parquet-variant/src/variant/list.rs
+++ b/parquet-variant/src/variant/list.rs
@@ -286,6 +286,8 @@ impl<'m, 'v> VariantList<'m, 'v> {
 #[cfg(test)]
 mod tests {
     use super::*;
+    use crate::VariantBuilder;
+    use std::iter::repeat_n;
 
     #[test]
     fn test_variant_list_simple() {
@@ -413,4 +415,197 @@ mod tests {
         let elem1 = variant_list.get(1).unwrap();
         assert_eq!(elem1.as_boolean(), Some(false));
     }
+
+    #[test]
+    fn 
test_large_variant_list_with_total_child_length_between_2_pow_8_and_2_pow_16() {
+        // all the tests below will set the total child size to ~500,
+        // which is larger than 2^8 but less than 2^16.
+        // total child size = list_size * single_child_item_len
+
+        let mut list_size: usize = 1;
+        let mut single_child_item_len: usize = 500;
+
+        // offset size will be OffSizeBytes::Two as the total child length 
between 2^8 and 2^16
+        let expected_offset_size = OffsetSizeBytes::Two;
+
+        test_large_variant_list_with_child_length(
+            list_size,             // the elements in the list
+            single_child_item_len, // this will control the total child size 
in the list
+            OffsetSizeBytes::One, // will be OffsetSizeBytes::One as the size 
of the list is less than 256
+            expected_offset_size,
+        );
+
+        list_size = 255;
+        single_child_item_len = 2;
+        test_large_variant_list_with_child_length(
+            list_size,
+            single_child_item_len,
+            OffsetSizeBytes::One, // will be OffsetSizeBytes::One as the size 
of the list is less than 256
+            expected_offset_size,
+        );
+
+        list_size = 256;
+        single_child_item_len = 2;
+        test_large_variant_list_with_child_length(
+            list_size,
+            single_child_item_len,
+            OffsetSizeBytes::Four, // will be OffsetSizeBytes::Four as the 
size of the list is bigger than 255
+            expected_offset_size,
+        );
+
+        list_size = 300;
+        single_child_item_len = 2;
+        test_large_variant_list_with_child_length(
+            list_size,
+            single_child_item_len,
+            OffsetSizeBytes::Four, // will be OffsetSizeBytes::Four as the 
size of the list is bigger than 255
+            expected_offset_size,
+        );
+    }
+
+    #[test]
+    fn 
test_large_variant_list_with_total_child_length_between_2_pow_16_and_2_pow_24() 
{
+        // all the tests below will set the total child size to ~70,000,
+        // which is larger than 2^16 but less than 2^24.
+        // total child size = list_size * single_child_item_len
+
+        let mut list_size: usize = 1;
+        let mut single_child_item_len: usize = 70000;
+
+        // offset size will be OffSizeBytes::Two as the total child length 
between 2^16 and 2^24
+        let expected_offset_size = OffsetSizeBytes::Three;
+
+        test_large_variant_list_with_child_length(
+            list_size,
+            single_child_item_len,
+            OffsetSizeBytes::One, // will be OffsetSizeBytes::One as the size 
of the list is less than 256
+            expected_offset_size,
+        );
+
+        list_size = 255;
+        single_child_item_len = 275;
+        // total child size = 255 * 275 = 70,125
+        test_large_variant_list_with_child_length(
+            list_size,
+            single_child_item_len,
+            OffsetSizeBytes::One, // will be OffsetSizeBytes::One as the size 
of the list is less than 256
+            expected_offset_size,
+        );
+
+        list_size = 256;
+        single_child_item_len = 274;
+        // total child size = 256 * 274 = 70,144
+        test_large_variant_list_with_child_length(
+            list_size,
+            single_child_item_len,
+            OffsetSizeBytes::Four, // will be OffsetSizeBytes::Four as the 
size of the list is bigger than 255
+            expected_offset_size,
+        );
+
+        list_size = 300;
+        single_child_item_len = 234;
+        // total child size = 300 * 234 = 70,200
+        test_large_variant_list_with_child_length(
+            list_size,
+            single_child_item_len,
+            OffsetSizeBytes::Four, // will be OffsetSizeBytes::Four as the 
size of the list is bigger than 255
+            expected_offset_size,
+        );
+    }
+
+    #[test]
+    fn 
test_large_variant_list_with_total_child_length_between_2_pow_24_and_2_pow_32() 
{
+        // all the tests below will set the total child size to ~20,000,000,
+        // which is larger than 2^24 but less than 2^32.
+        // total child size = list_size * single_child_item_len
+
+        let mut list_size: usize = 1;
+        let mut single_child_item_len: usize = 20000000;
+
+        // offset size will be OffSizeBytes::Two as the total child length 
between 2^24 and 2^32
+        let expected_offset_size = OffsetSizeBytes::Four;
+
+        test_large_variant_list_with_child_length(
+            list_size,
+            single_child_item_len,
+            OffsetSizeBytes::One, // will be OffsetSizeBytes::One as the size 
of the list is less than 256
+            expected_offset_size,
+        );
+
+        list_size = 255;
+        single_child_item_len = 78432;
+        // total child size = 255 * 78,432 = 20,000,160
+        test_large_variant_list_with_child_length(
+            list_size,
+            single_child_item_len,
+            OffsetSizeBytes::One, // will be OffsetSizeBytes::One as the size 
of the list is less than 256
+            expected_offset_size,
+        );
+
+        list_size = 256;
+        single_child_item_len = 78125;
+        // total child size = 256 * 78,125 = 20,000,000
+        test_large_variant_list_with_child_length(
+            list_size,
+            single_child_item_len,
+            OffsetSizeBytes::Four, // will be OffsetSizeBytes::Four as the 
size of the list is bigger than 255
+            expected_offset_size,
+        );
+
+        list_size = 300;
+        single_child_item_len = 66667;
+        // total child size = 300 * 66,667 = 20,000,100
+        test_large_variant_list_with_child_length(
+            list_size,
+            single_child_item_len,
+            OffsetSizeBytes::Four, // will be OffsetSizeBytes::Four as the 
size of the list is bigger than 255
+            expected_offset_size,
+        );
+    }
+
+    // this function will create a large variant list from VariantBuilder
+    // with specified size and each child item with the given length.
+    // and verify the content and some meta for the variant list in the final.
+    fn test_large_variant_list_with_child_length(
+        list_size: usize,
+        single_child_item_len: usize,
+        expected_num_element_size: OffsetSizeBytes,
+        expected_offset_size_bytes: OffsetSizeBytes,
+    ) {
+        let mut builder = VariantBuilder::new();
+        let mut list_builder = builder.new_list();
+
+        let mut expected_list = vec![];
+        for i in 0..list_size {
+            let random_string: String =
+                repeat_n(char::from((i % 256) as u8), 
single_child_item_len).collect();
+
+            list_builder.append_value(Variant::String(random_string.as_str()));
+            expected_list.push(random_string);
+        }
+
+        list_builder.finish();
+        // Finish the builder to get the metadata and value
+        let (metadata, value) = builder.finish();
+        // use the Variant API to verify the result
+        let variant = Variant::try_new(&metadata, &value).unwrap();
+
+        let variant_list = variant.as_list().unwrap();
+
+        // verify that the head is expected
+        assert_eq!(expected_offset_size_bytes, 
variant_list.header.offset_size);
+        assert_eq!(
+            expected_num_element_size,
+            variant_list.header.num_elements_size
+        );
+        assert_eq!(list_size, variant_list.num_elements);
+
+        // verify the data in the variant
+        assert_eq!(list_size, variant_list.len());
+        for i in 0..list_size {
+            let item = variant_list.get(i).unwrap();
+            let item_str = item.as_string().unwrap();
+            assert_eq!(expected_list.get(i).unwrap(), item_str);
+        }
+    }
 }

Reply via email to