This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/main by this push:
new d6a29ec103 Support reading/writing `VariantArray` to parquet with
Variant LogicalType (#8408)
d6a29ec103 is described below
commit d6a29ec1033b133add2a94d5fb62bb450c9519ff
Author: Andrew Lamb <[email protected]>
AuthorDate: Fri Sep 26 09:20:54 2025 -0700
Support reading/writing `VariantArray` to parquet with Variant LogicalType
(#8408)
# Which issue does this PR close?
- Closes https://github.com/apache/arrow-rs/issues/8370
- Closes https://github.com/apache/arrow-rs/pull/8365
# Rationale for this change
Parquet has logical types, which is how other writers signal what
columns contain `Variant` values.
# What changes are included in this PR?
1. Add mapping from Parquet LogicalType to/from Arrow ExtensionType
added in https://github.com/apache/arrow-rs/pull/8392
2. Documentation and tests showing reading/writing Parquet files with
the Variant logical annotation
# Are these changes tested?
Yes, new unit tests and doc examples
# Are there any user-facing changes?
You can now read/write Variant columns to `VariantArray`
---------
Co-authored-by: Matthijs Brobbel <[email protected]>
---
parquet-variant-compute/Cargo.toml | 2 +-
parquet/Cargo.toml | 2 +-
parquet/src/arrow/schema/complex.rs | 29 ++--
parquet/src/arrow/schema/extension.rs | 72 +++++++++
parquet/src/arrow/schema/mod.rs | 12 +-
parquet/src/variant.rs | 269 +++++++++++++++++++++++++++++-----
6 files changed, 330 insertions(+), 56 deletions(-)
diff --git a/parquet-variant-compute/Cargo.toml
b/parquet-variant-compute/Cargo.toml
index 64ab195a52..828ad77bd6 100644
--- a/parquet-variant-compute/Cargo.toml
+++ b/parquet-variant-compute/Cargo.toml
@@ -31,7 +31,7 @@ rust-version = { workspace = true }
[dependencies]
-arrow = { workspace = true }
+arrow = { workspace = true , features = ["canonical_extension_types"]}
arrow-schema = { workspace = true }
half = { version = "2.1", default-features = false }
indexmap = "2.10.0"
diff --git a/parquet/Cargo.toml b/parquet/Cargo.toml
index f57a7627a5..06e6aac2e3 100644
--- a/parquet/Cargo.toml
+++ b/parquet/Cargo.toml
@@ -130,7 +130,7 @@ encryption = ["dep:ring"]
flate2-rust_backened = ["flate2/rust_backend"]
flate2-zlib-rs = ["flate2/zlib-rs"]
# Enable parquet variant support
-variant_experimental = ["parquet-variant", "parquet-variant-json",
"parquet-variant-compute"]
+variant_experimental = ["arrow", "parquet-variant", "parquet-variant-json",
"parquet-variant-compute"]
[[example]]
diff --git a/parquet/src/arrow/schema/complex.rs
b/parquet/src/arrow/schema/complex.rs
index 16d46bd852..ecc80a6590 100644
--- a/parquet/src/arrow/schema/complex.rs
+++ b/parquet/src/arrow/schema/complex.rs
@@ -18,6 +18,7 @@
use std::collections::HashMap;
use std::sync::Arc;
+use crate::arrow::schema::extension::add_extension_type;
use crate::arrow::schema::primitive::convert_primitive;
use crate::arrow::{ProjectionMask, PARQUET_FIELD_ID_META_KEY};
use crate::basic::{ConvertedType, Repetition};
@@ -172,7 +173,7 @@ impl Visitor {
let parquet_fields = struct_type.get_fields();
- // Extract the arrow fields
+ // Extract any arrow fields from the hints
let arrow_fields = match &context.data_type {
Some(DataType::Struct(fields)) => {
if fields.len() != parquet_fields.len() {
@@ -220,10 +221,10 @@ impl Visitor {
data_type,
};
- if let Some(child) = self.dispatch(parquet_field, child_ctx)? {
+ if let Some(mut child) = self.dispatch(parquet_field, child_ctx)? {
// The child type returned may be different from what is
encoded in the arrow
// schema in the event of a mismatch or a projection
- child_fields.push(convert_field(parquet_field, &child,
arrow_field));
+ child_fields.push(convert_field(parquet_field, &mut child,
arrow_field));
children.push(child);
}
}
@@ -352,13 +353,13 @@ impl Visitor {
// Need both columns to be projected
match (maybe_key, maybe_value) {
- (Some(key), Some(value)) => {
+ (Some(mut key), Some(mut value)) => {
let key_field = Arc::new(
- convert_field(map_key, &key, arrow_key)
+ convert_field(map_key, &mut key, arrow_key)
// The key is always non-nullable (#5630)
.with_nullable(false),
);
- let value_field = Arc::new(convert_field(map_value, &value,
arrow_value));
+ let value_field = Arc::new(convert_field(map_value, &mut
value, arrow_value));
let field_metadata = match arrow_map {
Some(field) => field.metadata().clone(),
_ => HashMap::default(),
@@ -495,8 +496,8 @@ impl Visitor {
};
match self.dispatch(item_type, new_context) {
- Ok(Some(item)) => {
- let item_field = Arc::new(convert_field(item_type, &item,
arrow_field));
+ Ok(Some(mut item)) => {
+ let item_field = Arc::new(convert_field(item_type, &mut item,
arrow_field));
// Use arrow type as hint for index size
let arrow_type = match context.data_type {
@@ -540,11 +541,15 @@ impl Visitor {
}
}
-/// Computes the [`Field`] for a child column
+/// Computes the Arrow [`Field`] for a child column
///
-/// The resulting [`Field`] will have the type dictated by `field`, a name
+/// The resulting Arrow [`Field`] will have the type dictated by the Parquet
`field`, a name
/// dictated by the `parquet_type`, and any metadata from `arrow_hint`
-fn convert_field(parquet_type: &Type, field: &ParquetField, arrow_hint:
Option<&Field>) -> Field {
+fn convert_field(
+ parquet_type: &Type,
+ field: &mut ParquetField,
+ arrow_hint: Option<&Field>,
+) -> Field {
let name = parquet_type.name();
let data_type = field.arrow_type.clone();
let nullable = field.nullable;
@@ -575,7 +580,7 @@ fn convert_field(parquet_type: &Type, field: &ParquetField,
arrow_hint: Option<&
);
ret.set_metadata(meta);
}
- ret
+ add_extension_type(ret, parquet_type)
}
}
}
diff --git a/parquet/src/arrow/schema/extension.rs
b/parquet/src/arrow/schema/extension.rs
new file mode 100644
index 0000000000..752b9a5ced
--- /dev/null
+++ b/parquet/src/arrow/schema/extension.rs
@@ -0,0 +1,72 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Arrow Extension Type Support for Parquet
+//!
+//! This module contains mapping code to map Parquet [`LogicalType`]s to/from
+//! Arrow [`ExtensionType`]s.
+//!
+//! Extension types are represented using the metadata from Arrow [`Field`]s
+//! with the key "ARROW:extension:name".
+
+use crate::basic::LogicalType;
+use crate::schema::types::Type;
+use arrow_schema::extension::ExtensionType;
+use arrow_schema::Field;
+
+/// Adds extension type metadata, if necessary, based on the Parquet field's
+/// [`LogicalType`]
+///
+/// Some Parquet logical types, such as Variant, do not map directly to an
+/// Arrow DataType, and instead are represented by an Arrow ExtensionType.
+/// Extension types are attached to Arrow Fields via metadata.
+pub(crate) fn add_extension_type(mut arrow_field: Field, parquet_type: &Type)
-> Field {
+ match parquet_type.get_basic_info().logical_type() {
+ #[cfg(feature = "variant_experimental")]
+ Some(LogicalType::Variant) => {
+ // try to add the Variant extension type, but if that fails (e.g.
because the
+ // storage type is not supported), just return the field as is
+ arrow_field
+ .try_with_extension_type(parquet_variant_compute::VariantType)
+ .ok();
+ arrow_field
+ }
+ // TODO add other LogicalTypes here
+ _ => arrow_field,
+ }
+}
+
+/// Return the Parquet logical type to use for the specified Arrow field, if
any.
+#[cfg(feature = "variant_experimental")]
+pub(crate) fn logical_type_for_struct(field: &Field) -> Option<LogicalType> {
+ use parquet_variant_compute::VariantType;
+ // Check the name (= quick and cheap) and only try_extension_type if the
name matches
+ // to avoid unnecessary String allocations in ArrowError
+ if field.extension_type_name()? != VariantType::NAME {
+ return None;
+ }
+ match field.try_extension_type::<VariantType>() {
+ Ok(VariantType) => Some(LogicalType::Variant),
+ // Given check above, this should not error, but if it does ignore
+ Err(_e) => None,
+ }
+}
+
+#[cfg(not(feature = "variant_experimental"))]
+pub(crate) fn logical_type_for_struct(field: &Field) -> Option<LogicalType> {
+ None
+}
diff --git a/parquet/src/arrow/schema/mod.rs b/parquet/src/arrow/schema/mod.rs
index 5b079b6627..9d1098d86c 100644
--- a/parquet/src/arrow/schema/mod.rs
+++ b/parquet/src/arrow/schema/mod.rs
@@ -35,13 +35,14 @@ use crate::file::{metadata::KeyValue,
properties::WriterProperties};
use crate::schema::types::{ColumnDescriptor, SchemaDescriptor, Type};
mod complex;
+mod extension;
mod primitive;
+use super::PARQUET_FIELD_ID_META_KEY;
+use crate::arrow::schema::extension::logical_type_for_struct;
use crate::arrow::ProjectionMask;
pub(crate) use complex::{ParquetField, ParquetFieldType};
-use super::PARQUET_FIELD_ID_META_KEY;
-
/// Convert Parquet schema to Arrow schema including optional metadata
///
/// Attempts to decode any existing Arrow schema metadata, falling back
@@ -63,7 +64,11 @@ pub fn parquet_to_arrow_schema_by_columns(
Ok(parquet_to_arrow_schema_and_fields(parquet_schema, mask,
key_value_metadata)?.0)
}
-/// Extracts the arrow metadata
+/// Determines the Arrow Schema from a Parquet schema
+///
+/// Looks for an Arrow schema metadata "hint" (see
+/// [`parquet_to_arrow_field_levels`]), and uses it if present to ensure
+/// lossless round trips.
pub(crate) fn parquet_to_arrow_schema_and_fields(
parquet_schema: &SchemaDescriptor,
mask: ProjectionMask,
@@ -728,6 +733,7 @@ fn arrow_to_parquet_type(field: &Field, coerce_types: bool)
-> Result<Type> {
.with_fields(fields)
.with_repetition(repetition)
.with_id(id)
+ .with_logical_type(logical_type_for_struct(field))
.build()
}
DataType::Map(field, _) => {
diff --git a/parquet/src/variant.rs b/parquet/src/variant.rs
index b5902c02ed..497d1dc6c4 100644
--- a/parquet/src/variant.rs
+++ b/parquet/src/variant.rs
@@ -25,38 +25,36 @@
//! * [`Variant`] represents variant value, which can be an object, list, or
primitive.
//! * [`VariantBuilder`] for building `Variant` values.
//! * [`VariantArray`] for representing a column of Variant values.
-//! * [`compute`] module with functions for manipulating Variants, such as
+//! * [`json_to_variant`] and [`variant_to_json`] for converting to/from JSON.
+//! * [`cast_to_variant()`] for casting other Arrow arrays to `VariantArray`.
+//! * [`VariantType`] Arrow ExtensionType for Parquet Variant logical type.
//! [`variant_get`] to extracting a value by path and functions to convert
//! between `Variant` and JSON.
//!
-//! [Variant Logical Type]: Variant
-//! [`VariantArray`]: compute::VariantArray
-//! [`variant_get`]: compute::variant_get
-//!
//! # Example: Writing a Parquet file with Variant column
//! ```rust
-//! # use parquet::variant::compute::{VariantArray, VariantArrayBuilder};
-//! # use parquet::variant::VariantBuilderExt;
+//! # use parquet::variant::{VariantArray, VariantType, VariantArrayBuilder,
VariantBuilderExt};
//! # use std::sync::Arc;
-//! # use arrow_array::{ArrayRef, RecordBatch};
+//! # use arrow_array::{Array, ArrayRef, RecordBatch};
+//! # use arrow_schema::{DataType, Field, Schema};
//! # use parquet::arrow::ArrowWriter;
//! # fn main() -> Result<(), parquet::errors::ParquetError> {
//! // Use the VariantArrayBuilder to build a VariantArray
//! let mut builder = VariantArrayBuilder::new(3);
-//! // row 1: {"name": "Alice"}
-//! builder.new_object().with_field("name", "Alice").finish();
+//! builder.new_object().with_field("name", "Alice").finish(); // row 1:
{"name": "Alice"}
+//! builder.append_value("such wow"); // row 2: "such wow" (a string)
//! let array = builder.build();
//!
-//! // TODO support writing VariantArray directly
-//! // at the moment it panics when trying to downcast to a struct array
-//! // https://github.com/apache/arrow-rs/issues/8296
-//! // let array: ArrayRef = Arc::new(array);
-//! let array: ArrayRef = Arc::new(array.into_inner());
-//!
+//! // Since VariantArray is an ExtensionType, it needs to be converted
+//! // to an ArrayRef and Field with the appropriate metadata
+//! // before it can be written to a Parquet file
+//! let field = array.field("data");
+//! let array = ArrayRef::from(array);
//! // create a RecordBatch with the VariantArray
-//! let batch = RecordBatch::try_from_iter(vec![("data", array)])?;
+//! let schema = Schema::new(vec![field]);
+//! let batch = RecordBatch::try_new(Arc::new(schema), vec![array])?;
//!
-//! // write the RecordBatch to a Parquet file
+//! // Now you can write the RecordBatch to the Parquet file, as normal
//! let file = std::fs::File::create("variant.parquet")?;
//! let mut writer = ArrowWriter::try_new(file, batch.schema(), None)?;
//! writer.write(&batch)?;
@@ -67,37 +65,29 @@
//! # }
//! ```
//!
-//! # Example: Writing JSON with a Parquet file with Variant column
+//! # Example: Writing JSON into a Parquet file with Variant column
//! ```rust
//! # use std::sync::Arc;
//! # use arrow_array::{ArrayRef, RecordBatch, StringArray};
-//! # use parquet::variant::compute::json_to_variant;
-//! # use parquet::variant::compute::VariantArray;
+//! # use arrow_schema::Schema;
+//! # use parquet::variant::{json_to_variant, VariantArray};
//! # use parquet::arrow::ArrowWriter;
//! # fn main() -> Result<(), parquet::errors::ParquetError> {
-//! // Create an array of JSON strings, simulating a column of JSON data
-//! // TODO use StringViewArray when available
-//! let input_array = StringArray::from(vec![
+//! // Create an array of JSON strings, simulating a column of JSON data
+//! let input_array: ArrayRef = Arc::new(StringArray::from(vec![
//! Some(r#"{"name": "Alice", "age": 30}"#),
//! Some(r#"{"name": "Bob", "age": 25, "address": {"city": "New York"}}"#),
//! None,
//! Some("{}"),
-//! ]);
-//! let input_array: ArrayRef = Arc::new(input_array);
-//!
-//! // Convert the JSON strings to a VariantArray
-//! let array: VariantArray = json_to_variant(&input_array)?;
-//!
-//! // TODO support writing VariantArray directly
-//! // at the moment it panics when trying to downcast to a struct array
-//! // https://github.com/apache/arrow-rs/issues/8296
-//! // let array: ArrayRef = Arc::new(array);
-//! let array: ArrayRef = Arc::new(array.into_inner());
+//! ]));
//!
+//! // Convert the JSON strings to a VariantArray
+//! let array: VariantArray = json_to_variant(&input_array)?;
//! // create a RecordBatch with the VariantArray
-//! let batch = RecordBatch::try_from_iter(vec![("data", array)])?;
+//! let schema = Schema::new(vec![array.field("data")]);
+//! let batch = RecordBatch::try_new(Arc::new(schema),
vec![ArrayRef::from(array)])?;
//!
-//! // write the RecordBatch to a Parquet file
+//! // write the RecordBatch to a Parquet file as normal
//! let file = std::fs::File::create("variant-json.parquet")?;
//! let mut writer = ArrowWriter::try_new(file, batch.schema(), None)?;
//! writer.write(&batch)?;
@@ -108,6 +98,207 @@
//! ```
//!
//! # Example: Reading a Parquet file with Variant column
-//! (TODO: add example)
+//!
+//! Use the [`VariantType`] extension type to find the Variant column:
+//!
+//! ```
+//! # use std::sync::Arc;
+//! # use std::path::PathBuf;
+//! # use arrow_array::{ArrayRef, RecordBatch, RecordBatchReader};
+//! # use parquet::variant::{Variant, VariantArray, VariantType};
+//! # use parquet::arrow::arrow_reader::ArrowReaderBuilder;
+//! # fn main() -> Result<(), parquet::errors::ParquetError> {
+//! # use arrow_array::StructArray;
+//! # fn file_path() -> PathBuf { // return a testing file path
+//! # PathBuf::from(arrow::util::test_util::parquet_test_data())
+//! # .join("..")
+//! # .join("shredded_variant")
+//! # .join("case-075.parquet")
+//! # }
+//! // Read the Parquet file using standard Arrow Parquet reader.
+//! // Note this file has 2 columns: "id", "var", and the "var" column
+// // contains a variant that looks like this:
+// // "Variant(metadata=VariantMetadata(dict={}), value=Variant(type=STRING,
value=iceberg))"
+//! let file = std::fs::File::open(file_path())?;
+//! let mut reader = ArrowReaderBuilder::try_new(file)?.build()?;
+//!
+//! // You can check if a column contains a Variant using
+//! // the VariantType extension type
+//! let schema = reader.schema();
+//! let field = schema.field_with_name("var")?;
+//! assert!(field.try_extension_type::<VariantType>().is_ok());
+//!
+//! // The reader will yield RecordBatches with a StructArray
+//! // to convert them to VariantArray, use VariantArray::try_new
+//! let batch = reader.next().unwrap().unwrap();
+//!
+//! let col = batch.column_by_name("var").unwrap();
+//! let var_array = VariantArray::try_new(col)?;
+//! assert_eq!(var_array.len(), 1);
+//! let var_value: Variant = var_array.value(0);
+//! assert_eq!(var_value, Variant::from("iceberg")); // the value in
case-075.parquet
+//! # Ok(())
+//! # }
+//! ```
pub use parquet_variant::*;
-pub use parquet_variant_compute as compute;
+pub use parquet_variant_compute::*;
+
+#[cfg(test)]
+mod tests {
+ use crate::arrow::arrow_reader::ArrowReaderBuilder;
+ use crate::arrow::ArrowWriter;
+ use crate::file::metadata::{ParquetMetaData, ParquetMetaDataReader};
+ use crate::file::reader::ChunkReader;
+ use arrow::util::test_util::parquet_test_data;
+ use arrow_array::{ArrayRef, RecordBatch};
+ use arrow_schema::Schema;
+ use bytes::Bytes;
+ use parquet_variant::{Variant, VariantBuilderExt};
+ use parquet_variant_compute::{VariantArray, VariantArrayBuilder,
VariantType};
+ use std::path::PathBuf;
+ use std::sync::Arc;
+
+ #[test]
+ fn roundtrip_basic() {
+ roundtrip(variant_array());
+ }
+
+ /// Ensure a file with Variant LogicalType, written by another writer in
+ /// parquet-testing, can be read as a VariantArray
+ #[test]
+ fn read_logical_type() {
+ // Note: case-075 2 columns ("id", "var")
+ // The variant looks like this:
+ // "Variant(metadata=VariantMetadata(dict={}),
value=Variant(type=STRING, value=iceberg))"
+ let batch = read_shredded_variant_test_case("case-075.parquet");
+
+ assert_variant_metadata(&batch, "var");
+ let var_column = batch.column_by_name("var").expect("expected var
column");
+ let var_array =
+ VariantArray::try_new(&var_column).expect("expected var column to
be a VariantArray");
+
+ // verify the value
+ assert_eq!(var_array.len(), 1);
+ assert!(var_array.is_valid(0));
+ let var_value = var_array.value(0);
+ assert_eq!(var_value, Variant::from("iceberg"));
+ }
+
+ /// Writes a variant to a parquet file and ensures the parquet logical type
+ /// annotation is correct
+ #[test]
+ fn write_logical_type() {
+ let array = variant_array();
+ let batch = variant_array_to_batch(array);
+ let buffer = write_to_buffer(&batch);
+
+ // read the parquet file's metadata and verify the logical type
+ let metadata = read_metadata(&Bytes::from(buffer));
+ let schema = metadata.file_metadata().schema_descr();
+ let fields = schema.root_schema().get_fields();
+ assert_eq!(fields.len(), 1);
+ let field = &fields[0];
+ assert_eq!(field.name(), "data");
+ // data should have been written with the Variant logical type
+ assert_eq!(
+ field.get_basic_info().logical_type(),
+ Some(crate::basic::LogicalType::Variant)
+ );
+ }
+
+ /// Return a VariantArray with 3 rows:
+ ///
+ /// 1. `{"name": "Alice"}`
+ /// 2. `"such wow"` (a string)
+ /// 3. `null`
+ fn variant_array() -> VariantArray {
+ let mut builder = VariantArrayBuilder::new(3);
+ // row 1: {"name": "Alice"}
+ builder.new_object().with_field("name", "Alice").finish();
+ // row 2: "such wow" (a string)
+ builder.append_value("such wow");
+ // row 3: null
+ builder.append_null();
+ builder.build()
+ }
+
+ /// Writes a VariantArray to a parquet file and reads it back, verifying
that
+ /// the data is the same
+ fn roundtrip(array: VariantArray) {
+ let source_batch = variant_array_to_batch(array);
+ assert_variant_metadata(&source_batch, "data");
+
+ let buffer = write_to_buffer(&source_batch);
+ let result_batch = read_to_batch(Bytes::from(buffer));
+ assert_variant_metadata(&result_batch, "data");
+ assert_eq!(result_batch, source_batch); // NB this also checks the
schemas
+ }
+
+ /// creates a RecordBatch with a single column "data" from a VariantArray,
+ fn variant_array_to_batch(array: VariantArray) -> RecordBatch {
+ let field = array.field("data");
+ let schema = Schema::new(vec![field]);
+ RecordBatch::try_new(Arc::new(schema),
vec![ArrayRef::from(array)]).unwrap()
+ }
+
+ /// writes a RecordBatch to memory buffer and returns the buffer
+ fn write_to_buffer(batch: &RecordBatch) -> Vec<u8> {
+ let mut buffer = vec![];
+ let mut writer = ArrowWriter::try_new(&mut buffer, batch.schema(),
None).unwrap();
+ writer.write(batch).unwrap();
+ writer.close().unwrap();
+ buffer
+ }
+
+ /// Reads the Parquet metadata
+ fn read_metadata<T: ChunkReader + 'static>(input: &T) -> ParquetMetaData {
+ let mut reader = ParquetMetaDataReader::new();
+ reader.try_parse(input).unwrap();
+ reader.finish().unwrap()
+ }
+
+ /// Reads a RecordBatch from a reader (e.g. Vec or File)
+ fn read_to_batch<T: ChunkReader + 'static>(reader: T) -> RecordBatch {
+ let reader = ArrowReaderBuilder::try_new(reader)
+ .unwrap()
+ .build()
+ .unwrap();
+ let mut batches: Vec<RecordBatch> = reader.collect::<Result<Vec<_>,
_>>().unwrap();
+ assert_eq!(batches.len(), 1);
+ batches.swap_remove(0)
+ }
+
+ /// Verifies the variant metadata is present in the schema for the
specified
+ /// field name.
+ fn assert_variant_metadata(batch: &RecordBatch, field_name: &str) {
+ let schema = batch.schema();
+ let field = schema
+ .field_with_name(field_name)
+ .expect("could not find expected field");
+
+ // explicitly check the metadata so it is clear in the tests what the
+ // names are
+ let metadata_value = field
+ .metadata()
+ .get("ARROW:extension:name")
+ .expect("metadata does not exist");
+
+ assert_eq!(metadata_value, "arrow.parquet.variant");
+
+ // verify that `VariantType` also correctly finds the metadata
+ field
+ .try_extension_type::<VariantType>()
+ .expect("VariantExtensionType should be readable");
+ }
+
+ /// Read the specified test case filename from parquet-testing
+ /// See parquet-testing/shredded_variant/cases.json for more details
+ fn read_shredded_variant_test_case(name: &str) -> RecordBatch {
+ let case_file = PathBuf::from(parquet_test_data())
+ .join("..") // go up from data/ to parquet-testing/
+ .join("shredded_variant")
+ .join(name);
+ let case_file = std::fs::File::open(case_file).unwrap();
+ read_to_batch(case_file)
+ }
+}