This is an automated email from the ASF dual-hosted git repository.

mgrigorov pushed a commit to branch 
dont-allow-duplicating-names-from-known-schemata
in repository https://gitbox.apache.org/repos/asf/avro-rs.git

commit 1abb1bc7e88a5bff8ff05c93bd611fb8fffe8520
Author: Martin Tzvetanov Grigorov <[email protected]>
AuthorDate: Tue Jan 27 15:18:52 2026 +0200

    fix: Don't allow resolving a schema that is already known
    
    The `known_schemata` NamesRef is used to resolve schema references, i.e.
    if a schema ref is not referring to a schema that has been seen/defined
    earlier in the same schema definition (JSON) then we consult a collection
    of known (previously parsed) schemas.
    
    If a newly parsed schema is defined earlier then an 
AmbiguousSchemaDefinition error is returned.
    The same should be done when a newly parsed schema clashes with a schema
    with the very same name in `known_schemata`
---
 avro/src/schema/resolve.rs | 133 ++++++++++++++++++++++++++++++++-------------
 1 file changed, 96 insertions(+), 37 deletions(-)

diff --git a/avro/src/schema/resolve.rs b/avro/src/schema/resolve.rs
index e82b87d..7abd4f6 100644
--- a/avro/src/schema/resolve.rs
+++ b/avro/src/schema/resolve.rs
@@ -105,27 +105,30 @@ impl<'s> ResolvedSchema<'s> {
                 })
                 | Schema::Duration(FixedSchema { name, .. }) => {
                     let fully_qualified_name = 
name.fully_qualified_name(enclosing_namespace);
-                    if self
-                        .names_ref
-                        .insert(fully_qualified_name.clone(), schema)
-                        .is_some()
-                    {
+                    let is_duplicate = 
self.names_ref.contains_key(&fully_qualified_name)
+                        || known_schemata
+                            .as_ref()
+                            .map(|names| 
names.contains_key(&fully_qualified_name))
+                            .unwrap_or(false);
+                    if is_duplicate {
                         return 
Err(Details::AmbiguousSchemaDefinition(fully_qualified_name).into());
                     }
+                    self.names_ref.insert(fully_qualified_name.clone(), 
schema);
                 }
                 Schema::Record(RecordSchema { name, fields, .. }) => {
                     let fully_qualified_name = 
name.fully_qualified_name(enclosing_namespace);
-                    if self
-                        .names_ref
-                        .insert(fully_qualified_name.clone(), schema)
-                        .is_some()
-                    {
+                    let is_duplicate = 
self.names_ref.contains_key(&fully_qualified_name)
+                        || known_schemata
+                            .as_ref()
+                            .map(|names| 
names.contains_key(&fully_qualified_name))
+                            .unwrap_or(false);
+                    if is_duplicate {
                         return 
Err(Details::AmbiguousSchemaDefinition(fully_qualified_name).into());
-                    } else {
-                        let record_namespace = fully_qualified_name.namespace;
-                        for field in fields {
-                            self.resolve(vec![&field.schema], 
&record_namespace, known_schemata)?
-                        }
+                    }
+                    self.names_ref.insert(fully_qualified_name.clone(), 
schema);
+                    let record_namespace = fully_qualified_name.namespace;
+                    for field in fields {
+                        self.resolve(vec![&field.schema], &record_namespace, 
known_schemata)?
                     }
                 }
                 Schema::Ref { name } => {
@@ -266,9 +269,12 @@ pub fn resolve_names_with_schemata(
 
 #[cfg(test)]
 mod tests {
+    use std::collections::HashMap;
+
+    use super::{ResolvedOwnedSchema, ResolvedSchema};
     use crate::Schema;
-    use crate::schema::Name;
-    use crate::schema::resolve::{ResolvedOwnedSchema, ResolvedSchema};
+    use crate::error::Details;
+    use crate::schema::{Name, NamesRef};
     use apache_avro_test_helper::TestResult;
 
     #[test]
@@ -303,7 +309,7 @@ mod tests {
         }
         "#;
         let schema = Schema::parse_str(schema)?;
-        let rs = ResolvedSchema::try_from(&schema).expect("Schema didn't 
successfully parse");
+        let rs = ResolvedSchema::try_from(&schema)?;
         assert_eq!(rs.get_names().len(), 2);
         for s in &["space.record_name", "space.inner_record_name"] {
             assert!(rs.get_names().contains_key(&Name::new(s)?));
@@ -344,7 +350,7 @@ mod tests {
         }
         "#;
         let schema = Schema::parse_str(schema)?;
-        let rs = ResolvedSchema::try_from(&schema).expect("Schema didn't 
successfully parse");
+        let rs = ResolvedSchema::try_from(&schema)?;
         assert_eq!(rs.get_names().len(), 2);
         for s in &["space.record_name", "space.inner_record_name"] {
             assert!(rs.get_names().contains_key(&Name::new(s)?));
@@ -380,7 +386,7 @@ mod tests {
         }
         "#;
         let schema = Schema::parse_str(schema)?;
-        let rs = ResolvedSchema::try_from(&schema).expect("Schema didn't 
successfully parse");
+        let rs = ResolvedSchema::try_from(&schema)?;
         assert_eq!(rs.get_names().len(), 2);
         for s in &["space.record_name", "space.inner_enum_name"] {
             assert!(rs.get_names().contains_key(&Name::new(s)?));
@@ -416,7 +422,7 @@ mod tests {
         }
         "#;
         let schema = Schema::parse_str(schema)?;
-        let rs = ResolvedSchema::try_from(&schema).expect("Schema didn't 
successfully parse");
+        let rs = ResolvedSchema::try_from(&schema)?;
         assert_eq!(rs.get_names().len(), 2);
         for s in &["space.record_name", "space.inner_enum_name"] {
             assert!(rs.get_names().contains_key(&Name::new(s)?));
@@ -452,7 +458,7 @@ mod tests {
         }
         "#;
         let schema = Schema::parse_str(schema)?;
-        let rs = ResolvedSchema::try_from(&schema).expect("Schema didn't 
successfully parse");
+        let rs = ResolvedSchema::try_from(&schema)?;
         assert_eq!(rs.get_names().len(), 2);
         for s in &["space.record_name", "space.inner_fixed_name"] {
             assert!(rs.get_names().contains_key(&Name::new(s)?));
@@ -488,7 +494,7 @@ mod tests {
         }
         "#;
         let schema = Schema::parse_str(schema)?;
-        let rs = ResolvedSchema::try_from(&schema).expect("Schema didn't 
successfully parse");
+        let rs = ResolvedSchema::try_from(&schema)?;
         assert_eq!(rs.get_names().len(), 2);
         for s in &["space.record_name", "space.inner_fixed_name"] {
             assert!(rs.get_names().contains_key(&Name::new(s)?));
@@ -530,7 +536,7 @@ mod tests {
         }
         "#;
         let schema = Schema::parse_str(schema)?;
-        let rs = ResolvedSchema::try_from(&schema).expect("Schema didn't 
successfully parse");
+        let rs = ResolvedSchema::try_from(&schema)?;
         assert_eq!(rs.get_names().len(), 2);
         for s in &["space.record_name", "inner_space.inner_record_name"] {
             assert!(rs.get_names().contains_key(&Name::new(s)?));
@@ -567,7 +573,7 @@ mod tests {
         }
         "#;
         let schema = Schema::parse_str(schema)?;
-        let rs = ResolvedSchema::try_from(&schema).expect("Schema didn't 
successfully parse");
+        let rs = ResolvedSchema::try_from(&schema)?;
         assert_eq!(rs.get_names().len(), 2);
         for s in &["space.record_name", "inner_space.inner_enum_name"] {
             assert!(rs.get_names().contains_key(&Name::new(s)?));
@@ -604,7 +610,7 @@ mod tests {
         }
         "#;
         let schema = Schema::parse_str(schema)?;
-        let rs = ResolvedSchema::try_from(&schema).expect("Schema didn't 
successfully parse");
+        let rs = ResolvedSchema::try_from(&schema)?;
         assert_eq!(rs.get_names().len(), 2);
         for s in &["space.record_name", "inner_space.inner_fixed_name"] {
             assert!(rs.get_names().contains_key(&Name::new(s)?));
@@ -657,7 +663,7 @@ mod tests {
         }
         "#;
         let schema = Schema::parse_str(schema)?;
-        let rs = ResolvedSchema::try_from(&schema).expect("Schema didn't 
successfully parse");
+        let rs = ResolvedSchema::try_from(&schema)?;
         assert_eq!(rs.get_names().len(), 3);
         for s in &[
             "space.record_name",
@@ -715,7 +721,7 @@ mod tests {
         }
         "#;
         let schema = Schema::parse_str(schema)?;
-        let rs = ResolvedSchema::try_from(&schema).expect("Schema didn't 
successfully parse");
+        let rs = ResolvedSchema::try_from(&schema)?;
         assert_eq!(rs.get_names().len(), 3);
         for s in &[
             "space.record_name",
@@ -774,7 +780,7 @@ mod tests {
         }
         "#;
         let schema = Schema::parse_str(schema)?;
-        let rs = ResolvedSchema::try_from(&schema).expect("Schema didn't 
successfully parse");
+        let rs = ResolvedSchema::try_from(&schema)?;
         assert_eq!(rs.get_names().len(), 3);
         for s in &[
             "space.record_name",
@@ -819,7 +825,7 @@ mod tests {
         }
         "#;
         let schema = Schema::parse_str(schema)?;
-        let rs = ResolvedSchema::try_from(&schema).expect("Schema didn't 
successfully parse");
+        let rs = ResolvedSchema::try_from(&schema)?;
         assert_eq!(rs.get_names().len(), 2);
         for s in &["space.record_name", "space.in_array_record"] {
             assert!(rs.get_names().contains_key(&Name::new(s)?));
@@ -860,7 +866,7 @@ mod tests {
         }
         "#;
         let schema = Schema::parse_str(schema)?;
-        let rs = ResolvedSchema::try_from(&schema).expect("Schema didn't 
successfully parse");
+        let rs = ResolvedSchema::try_from(&schema)?;
         assert_eq!(rs.get_names().len(), 2);
         for s in &["space.record_name", "space.in_map_record"] {
             assert!(rs.get_names().contains_key(&Name::new(s)?));
@@ -897,7 +903,7 @@ mod tests {
         }
         "#;
         let schema = Schema::parse_str(schema)?;
-        let rs = ResolvedSchema::try_from(&schema).expect("Schema didn't 
successfully parse");
+        let rs = ResolvedSchema::try_from(&schema)?;
 
         // confirm we have expected 2 full-names
         assert_eq!(rs.get_names().len(), 2);
@@ -906,8 +912,8 @@ mod tests {
         }
 
         // convert Schema back to JSON string
-        let schema_str = serde_json::to_string(&schema).expect("test failed");
-        let _schema = Schema::parse_str(&schema_str).expect("test failed");
+        let schema_str = serde_json::to_string(&schema)?;
+        let _schema = Schema::parse_str(&schema_str)?;
         assert_eq!(schema, _schema);
 
         Ok(())
@@ -941,7 +947,7 @@ mod tests {
         }
         "#;
         let schema = Schema::parse_str(schema)?;
-        let rs = ResolvedSchema::try_from(&schema).expect("Schema didn't 
successfully parse");
+        let rs = ResolvedSchema::try_from(&schema)?;
 
         // confirm we have expected 2 full-names
         assert_eq!(rs.get_names().len(), 2);
@@ -950,8 +956,8 @@ mod tests {
         }
 
         // convert Schema back to JSON string
-        let schema_str = serde_json::to_string(&schema).expect("test failed");
-        let _schema = Schema::parse_str(&schema_str).expect("test failed");
+        let schema_str = serde_json::to_string(&schema)?;
+        let _schema = Schema::parse_str(&schema_str)?;
         assert_eq!(schema, _schema);
 
         Ok(())
@@ -1016,4 +1022,57 @@ mod tests {
 
         Ok(())
     }
+
+    #[test]
+    fn avro_rs_444_do_not_allow_duplicate_names_in_known_schemata() -> 
TestResult {
+        let schema = Schema::parse_str(
+            r#"{
+            "name": "foo",
+            "type": "record",
+            "fields": [
+                {
+                    "name": "a",
+                    "type": {
+                        "type": "fixed",
+                        "size": 16,
+                        "logicalType": "decimal",
+                        "precision": 4,
+                        "scale": 2,
+                        "name": "bar"
+                    }
+                },
+                {
+                    "name": "b",
+                    "type": "bar"
+                },
+                {
+                    "name": "c",
+                    "type": {
+                        "type": "fixed",
+                        "size": 16,
+                        "logicalType": "uuid",
+                        "name": "duplicated_name"
+                    }
+                }
+            ]
+        }"#,
+        )?;
+
+        let mut known_schemata: NamesRef = HashMap::default();
+        known_schemata.insert("duplicated_name".try_into()?, &Schema::Boolean);
+
+        let result = ResolvedSchema::new_with_known_schemata(vec![&schema], 
&None, &known_schemata);
+        match result {
+            Ok(_) => {
+                panic!("Expected a AmbiguousSchemaDefinition error for 
`duplicated_name`")
+            }
+            Err(err) => match err.into_details() {
+                Details::AmbiguousSchemaDefinition(name) => {
+                    assert_eq!(name, "duplicated_name".try_into()?);
+                }
+                details => panic!("Unexpected error: {details}"),
+            },
+        }
+        Ok(())
+    }
 }

Reply via email to