This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/main by this push:
     new d0ed407895 Move extension type construction logic out of Field (#9266)
d0ed407895 is described below

commit d0ed4078953d8921d9c7967cf0e6b5c0ba13311c
Author: Ryan Johnson <[email protected]>
AuthorDate: Tue Jan 27 14:49:35 2026 -0700

    Move extension type construction logic out of Field (#9266)
    
    # Which issue does this PR close?
    
    - Part of https://github.com/apache/arrow-rs/issues/8987
    
    # Rationale for this change
    
    The logic to instantiate a type extension does not really depend on
    `Field`, other than indirectly because that struct happens to contain
    all the necessary bits of information.
    
    As part of the work to make the JSON decoder support extension types, it
    was
    
[observed](https://github.com/apache/arrow-rs/pull/9021#discussion_r2636492978)
    that a field is not always available (or at least, not desirable because
    it creates redundancy). This change addresses the concern by making it
    possible to work directly with extension types instead of being forced
    to route through a `Field` instance.
    
    # What changes are included in this PR?
    
    Factor out the body of `Field::try_extension_type` as a new associated
    function `ExtensionType::try_new_from_field_metadata` that takes data
    type and field metadata map and delegates to `ExtensionType::try_new`.
    `Field::try_extension_type` then simply calls that new method.
    
    # Are these changes tested?
    
    Code movement. Existing unit tests validate it.
    
    # Are there any user-facing changes?
    
    New provided trait method.
---
 arrow-schema/src/extension/canonical/bool8.rs      |  2 +-
 .../src/extension/canonical/fixed_shape_tensor.rs  |  2 +-
 arrow-schema/src/extension/canonical/json.rs       |  2 +-
 arrow-schema/src/extension/canonical/opaque.rs     |  2 +-
 .../extension/canonical/timestamp_with_offset.rs   |  2 +-
 arrow-schema/src/extension/canonical/uuid.rs       |  2 +-
 .../extension/canonical/variable_shape_tensor.rs   |  2 +-
 arrow-schema/src/extension/mod.rs                  | 43 ++++++++++++++++++++++
 arrow-schema/src/field.rs                          | 20 +---------
 parquet/src/arrow/schema/virtual_type.rs           |  4 +-
 10 files changed, 53 insertions(+), 28 deletions(-)

diff --git a/arrow-schema/src/extension/canonical/bool8.rs 
b/arrow-schema/src/extension/canonical/bool8.rs
index 362a2cc018..c94c8217b8 100644
--- a/arrow-schema/src/extension/canonical/bool8.rs
+++ b/arrow-schema/src/extension/canonical/bool8.rs
@@ -96,7 +96,7 @@ mod tests {
     }
 
     #[test]
-    #[should_panic(expected = "Field extension type name missing")]
+    #[should_panic(expected = "Extension type name missing")]
     fn missing_name() {
         let field = Field::new("", DataType::Int8, false).with_metadata(
             [(EXTENSION_TYPE_METADATA_KEY.to_owned(), "".to_owned())]
diff --git a/arrow-schema/src/extension/canonical/fixed_shape_tensor.rs 
b/arrow-schema/src/extension/canonical/fixed_shape_tensor.rs
index b6bd1c1223..5157eefe9e 100644
--- a/arrow-schema/src/extension/canonical/fixed_shape_tensor.rs
+++ b/arrow-schema/src/extension/canonical/fixed_shape_tensor.rs
@@ -471,7 +471,7 @@ mod tests {
     }
 
     #[test]
-    #[should_panic(expected = "Field extension type name missing")]
+    #[should_panic(expected = "Extension type name missing")]
     fn missing_name() {
         let field =
             Field::new_fixed_size_list("", Field::new("", DataType::Float32, 
false), 3, false)
diff --git a/arrow-schema/src/extension/canonical/json.rs 
b/arrow-schema/src/extension/canonical/json.rs
index 297a2d99aa..d2a54b9189 100644
--- a/arrow-schema/src/extension/canonical/json.rs
+++ b/arrow-schema/src/extension/canonical/json.rs
@@ -222,7 +222,7 @@ mod tests {
     }
 
     #[test]
-    #[should_panic(expected = "Field extension type name missing")]
+    #[should_panic(expected = "Extension type name missing")]
     fn missing_name() {
         let field = Field::new("", DataType::Int8, false).with_metadata(
             [(EXTENSION_TYPE_METADATA_KEY.to_owned(), "{}".to_owned())]
diff --git a/arrow-schema/src/extension/canonical/opaque.rs 
b/arrow-schema/src/extension/canonical/opaque.rs
index fceae8d371..acfc1331a6 100644
--- a/arrow-schema/src/extension/canonical/opaque.rs
+++ b/arrow-schema/src/extension/canonical/opaque.rs
@@ -285,7 +285,7 @@ mod tests {
     }
 
     #[test]
-    #[should_panic(expected = "Field extension type name missing")]
+    #[should_panic(expected = "Extension type name missing")]
     fn missing_name() {
         let field = Field::new("", DataType::Null, false).with_metadata(
             [(
diff --git a/arrow-schema/src/extension/canonical/timestamp_with_offset.rs 
b/arrow-schema/src/extension/canonical/timestamp_with_offset.rs
index 643025919d..20df20bad9 100644
--- a/arrow-schema/src/extension/canonical/timestamp_with_offset.rs
+++ b/arrow-schema/src/extension/canonical/timestamp_with_offset.rs
@@ -300,7 +300,7 @@ mod tests {
     }
 
     #[test]
-    #[should_panic(expected = "Field extension type name missing")]
+    #[should_panic(expected = "Extension type name missing")]
     fn missing_name() {
         let field = make_valid_field_primitive(TimeUnit::Second)
             .with_metadata([(EXTENSION_TYPE_METADATA_KEY.to_owned(), 
"".to_owned())].into());
diff --git a/arrow-schema/src/extension/canonical/uuid.rs 
b/arrow-schema/src/extension/canonical/uuid.rs
index 09533564ed..3e897f4731 100644
--- a/arrow-schema/src/extension/canonical/uuid.rs
+++ b/arrow-schema/src/extension/canonical/uuid.rs
@@ -100,7 +100,7 @@ mod tests {
     }
 
     #[test]
-    #[should_panic(expected = "Field extension type name missing")]
+    #[should_panic(expected = "Extension type name missing")]
     fn missing_name() {
         let field = Field::new("", DataType::FixedSizeBinary(16), false);
         field.extension_type::<Uuid>();
diff --git a/arrow-schema/src/extension/canonical/variable_shape_tensor.rs 
b/arrow-schema/src/extension/canonical/variable_shape_tensor.rs
index b5403dcf68..fbc641f543 100644
--- a/arrow-schema/src/extension/canonical/variable_shape_tensor.rs
+++ b/arrow-schema/src/extension/canonical/variable_shape_tensor.rs
@@ -529,7 +529,7 @@ mod tests {
     }
 
     #[test]
-    #[should_panic(expected = "Field extension type name missing")]
+    #[should_panic(expected = "Extension type name missing")]
     fn missing_name() {
         let field = Field::new_struct(
             "",
diff --git a/arrow-schema/src/extension/mod.rs 
b/arrow-schema/src/extension/mod.rs
index cd17272e15..aed560029d 100644
--- a/arrow-schema/src/extension/mod.rs
+++ b/arrow-schema/src/extension/mod.rs
@@ -23,6 +23,7 @@ mod canonical;
 pub use canonical::*;
 
 use crate::{ArrowError, DataType};
+use std::collections::HashMap;
 
 /// The metadata key for the string name identifying an [`ExtensionType`].
 pub const EXTENSION_TYPE_NAME_KEY: &str = "ARROW:extension:name";
@@ -255,4 +256,46 @@ pub trait ExtensionType: Sized {
     /// This should return an error if the given data type is not supported by
     /// this extension type.
     fn try_new(data_type: &DataType, metadata: Self::Metadata) -> Result<Self, 
ArrowError>;
+
+    /// Construct this extension type from field metadata and data type.
+    ///
+    /// This is a provided method that extracts extension type information from
+    /// metadata (using [`EXTENSION_TYPE_NAME_KEY`] and
+    /// [`EXTENSION_TYPE_METADATA_KEY`]) and delegates to [`Self::try_new`].
+    ///
+    /// Returns an error if:
+    /// - The extension type name is missing or doesn't match [`Self::NAME`]
+    /// - Metadata deserialization fails
+    /// - The data type is not supported
+    ///
+    /// This method enables extension type checking without requiring a full
+    /// [`Field`] instance, useful when only metadata and data type are 
available.
+    ///
+    /// [`Field`]: crate::Field
+    fn try_new_from_field_metadata(
+        data_type: &DataType,
+        metadata: &HashMap<String, String>,
+    ) -> Result<Self, ArrowError> {
+        // Check the extension name in the metadata
+        match metadata.get(EXTENSION_TYPE_NAME_KEY).map(|s| s.as_str()) {
+            // It should match the name of the given extension type
+            Some(name) if name == Self::NAME => {
+                // Deserialize the metadata and try to construct the extension 
type
+                let ext_metadata = metadata
+                    .get(EXTENSION_TYPE_METADATA_KEY)
+                    .map(|s| s.as_str());
+                let parsed = Self::deserialize_metadata(ext_metadata)?;
+                Self::try_new(data_type, parsed)
+            }
+            // Name mismatch
+            Some(name) => Err(ArrowError::InvalidArgumentError(format!(
+                "Extension type name mismatch: expected {}, got {name}",
+                Self::NAME
+            ))),
+            // Name missing
+            None => Err(ArrowError::InvalidArgumentError(
+                "Extension type name missing".to_string(),
+            )),
+        }
+    }
 }
diff --git a/arrow-schema/src/field.rs b/arrow-schema/src/field.rs
index c4566e41bf..a1c509abf2 100644
--- a/arrow-schema/src/field.rs
+++ b/arrow-schema/src/field.rs
@@ -575,25 +575,7 @@ impl Field {
     /// }
     /// ```
     pub fn try_extension_type<E: ExtensionType>(&self) -> Result<E, 
ArrowError> {
-        // Check the extension name in the metadata
-        match self.extension_type_name() {
-            // It should match the name of the given extension type
-            Some(name) if name == E::NAME => {
-                // Deserialize the metadata and try to construct the extension
-                // type
-                E::deserialize_metadata(self.extension_type_metadata())
-                    .and_then(|metadata| E::try_new(self.data_type(), 
metadata))
-            }
-            // Name mismatch
-            Some(name) => Err(ArrowError::InvalidArgumentError(format!(
-                "Field extension type name mismatch, expected {}, found 
{name}",
-                E::NAME
-            ))),
-            // Name missing
-            None => Err(ArrowError::InvalidArgumentError(
-                "Field extension type name missing".to_owned(),
-            )),
-        }
+        E::try_new_from_field_metadata(self.data_type(), self.metadata())
     }
 
     /// Returns an instance of the given [`ExtensionType`] of this [`Field`],
diff --git a/parquet/src/arrow/schema/virtual_type.rs 
b/parquet/src/arrow/schema/virtual_type.rs
index b71753f61c..657a76b732 100644
--- a/parquet/src/arrow/schema/virtual_type.rs
+++ b/parquet/src/arrow/schema/virtual_type.rs
@@ -143,7 +143,7 @@ mod tests {
     }
 
     #[test]
-    #[should_panic(expected = "Field extension type name missing")]
+    #[should_panic(expected = "Extension type name missing")]
     fn row_number_missing_name() {
         let field = Field::new("", DataType::Int64, false).with_metadata(
             [(EXTENSION_TYPE_METADATA_KEY.to_owned(), "".to_owned())]
@@ -203,7 +203,7 @@ mod tests {
     }
 
     #[test]
-    #[should_panic(expected = "Field extension type name missing")]
+    #[should_panic(expected = "Extension type name missing")]
     fn row_group_index_missing_name() {
         let field = Field::new("", DataType::Int64, false).with_metadata(
             [(EXTENSION_TYPE_METADATA_KEY.to_owned(), "".to_owned())]

Reply via email to