codephage2020 commented on code in PR #9598:
URL: https://github.com/apache/arrow-rs/pull/9598#discussion_r3015277130


##########
parquet-variant-compute/src/variant_get.rs:
##########
@@ -4237,4 +4291,143 @@ mod test {
             );
         }
     }
+
+    /// Test extracting a struct with mixed typed and variant fields.
+    /// Fields with VariantType extension metadata should be extracted as 
VariantArrays.
+    #[test]
+    fn test_struct_extraction_with_variant_fields() {
+        // Create test data: [{"id": 1, "name": "Alice", "data": {"score": 
95}},
+        //                   {"id": 2, "name": "Bob", "data": null}]
+        let json_strings = vec![
+            r#"{"id": 1, "name": "Alice", "data": {"score": 95}}"#,
+            r#"{"id": 2, "name": "Bob", "data": null}"#,
+            r#"{"id": 3, "name": null, "data": {"level": 5}}"#,
+        ];
+        let string_array: Arc<dyn Array> = 
Arc::new(StringArray::from(json_strings));
+        let variant_array = json_to_variant(&string_array).unwrap();
+
+        // Request struct where:
+        // - "id" is extracted as Int32
+        // - "name" is extracted as String (Utf8)
+        // - "data" is extracted as Variant (using VariantType extension 
metadata)
+        let struct_fields = Fields::from(vec![
+            Field::new("id", DataType::Int32, true),
+            Field::new("name", DataType::Utf8, true),
+            // Use VariantType extension metadata to request extraction as 
VariantArray.
+            // The data type must be Struct to satisfy 
VariantType::supports_data_type.
+            Field::new("data", DataType::Struct(Fields::empty()), true)
+                .with_extension_type(VariantType),
+        ]);
+        let struct_type = DataType::Struct(struct_fields);
+
+        let options = GetOptions {
+            path: VariantPath::default(),
+            as_type: Some(Arc::new(Field::new("result", struct_type, true))),
+            cast_options: CastOptions::default(),
+        };
+
+        let variant_array_ref = ArrayRef::from(variant_array);
+        let result = variant_get(&variant_array_ref, options).unwrap();
+
+        // Verify the result is a StructArray with 3 fields
+        let struct_result = 
result.as_any().downcast_ref::<StructArray>().unwrap();
+        assert_eq!(struct_result.len(), 3);
+        assert_eq!(struct_result.num_columns(), 3);
+
+        // Verify "id" field (Int32)
+        let id_field = struct_result
+            .column(0)
+            .as_any()
+            .downcast_ref::<Int32Array>()
+            .unwrap();
+        assert_eq!(id_field.value(0), 1);
+        assert_eq!(id_field.value(1), 2);
+        assert_eq!(id_field.value(2), 3);
+
+        // Verify "name" field (String/Utf8)
+        let name_field = struct_result
+            .column(1)
+            .as_any()
+            .downcast_ref::<StringArray>()
+            .unwrap();
+        assert_eq!(name_field.value(0), "Alice");
+        assert_eq!(name_field.value(1), "Bob");
+        assert!(name_field.is_null(2)); // null name in row 2
+
+        // Verify "data" field schema has VariantType extension metadata
+        let data_schema_field = struct_result
+            .fields()
+            .iter()
+            .find(|f| f.name() == "data")
+            .unwrap();
+        assert!(
+            data_schema_field
+                .try_extension_type::<VariantType>()
+                .is_ok(),
+            "data field should have VariantType extension metadata"
+        );
+
+        // Verify "data" field (VariantArray)
+        let data_field = struct_result.column(2);
+        // The data field should be a StructArray representing VariantArray's 
internal structure
+        // It has columns: metadata, value (optional), typed_value (optional)
+        let data_as_struct = data_field.as_any().downcast_ref::<StructArray>();
+        assert!(
+            data_as_struct.is_some(),
+            "data field should be a VariantArray (represented as StructArray)"
+        );
+
+        // Verify we can access the variant values
+        let data_variant_array = VariantArray::try_new(data_field).unwrap();
+        assert_eq!(data_variant_array.len(), 3);
+
+        // Row 0: data = {"score": 95}
+        let data0 = data_variant_array.value(0);
+        assert!(matches!(data0, Variant::Object(_)));

Review Comment:
   Good idea



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to