This is an automated email from the ASF dual-hosted git repository. mgrigorov pushed a commit to branch dont-allow-duplicating-names-from-known-schemata in repository https://gitbox.apache.org/repos/asf/avro-rs.git
commit 1abb1bc7e88a5bff8ff05c93bd611fb8fffe8520 Author: Martin Tzvetanov Grigorov <[email protected]> AuthorDate: Tue Jan 27 15:18:52 2026 +0200 fix: Don't allow resolving a schema that is already known The `known_schemata` NamesRef is used to resolve schema references, i.e. if a schema ref is not referring to a schema that has been seen/defined earlier in the same schema definition (JSON) then we consult a collection of known (previously parsed) schemas. If a newly parsed schema is defined earlier then an AmbiguousSchemaDefinition error is returned. The same should be done when a newly parsed schema clashes with a schema with the very same name in `known_schemata` --- avro/src/schema/resolve.rs | 133 ++++++++++++++++++++++++++++++++------------- 1 file changed, 96 insertions(+), 37 deletions(-) diff --git a/avro/src/schema/resolve.rs b/avro/src/schema/resolve.rs index e82b87d..7abd4f6 100644 --- a/avro/src/schema/resolve.rs +++ b/avro/src/schema/resolve.rs @@ -105,27 +105,30 @@ impl<'s> ResolvedSchema<'s> { }) | Schema::Duration(FixedSchema { name, .. }) => { let fully_qualified_name = name.fully_qualified_name(enclosing_namespace); - if self - .names_ref - .insert(fully_qualified_name.clone(), schema) - .is_some() - { + let is_duplicate = self.names_ref.contains_key(&fully_qualified_name) + || known_schemata + .as_ref() + .map(|names| names.contains_key(&fully_qualified_name)) + .unwrap_or(false); + if is_duplicate { return Err(Details::AmbiguousSchemaDefinition(fully_qualified_name).into()); } + self.names_ref.insert(fully_qualified_name.clone(), schema); } Schema::Record(RecordSchema { name, fields, .. }) => { let fully_qualified_name = name.fully_qualified_name(enclosing_namespace); - if self - .names_ref - .insert(fully_qualified_name.clone(), schema) - .is_some() - { + let is_duplicate = self.names_ref.contains_key(&fully_qualified_name) + || known_schemata + .as_ref() + .map(|names| names.contains_key(&fully_qualified_name)) + .unwrap_or(false); + if is_duplicate { return Err(Details::AmbiguousSchemaDefinition(fully_qualified_name).into()); - } else { - let record_namespace = fully_qualified_name.namespace; - for field in fields { - self.resolve(vec![&field.schema], &record_namespace, known_schemata)? - } + } + self.names_ref.insert(fully_qualified_name.clone(), schema); + let record_namespace = fully_qualified_name.namespace; + for field in fields { + self.resolve(vec![&field.schema], &record_namespace, known_schemata)? } } Schema::Ref { name } => { @@ -266,9 +269,12 @@ pub fn resolve_names_with_schemata( #[cfg(test)] mod tests { + use std::collections::HashMap; + + use super::{ResolvedOwnedSchema, ResolvedSchema}; use crate::Schema; - use crate::schema::Name; - use crate::schema::resolve::{ResolvedOwnedSchema, ResolvedSchema}; + use crate::error::Details; + use crate::schema::{Name, NamesRef}; use apache_avro_test_helper::TestResult; #[test] @@ -303,7 +309,7 @@ mod tests { } "#; let schema = Schema::parse_str(schema)?; - let rs = ResolvedSchema::try_from(&schema).expect("Schema didn't successfully parse"); + let rs = ResolvedSchema::try_from(&schema)?; assert_eq!(rs.get_names().len(), 2); for s in &["space.record_name", "space.inner_record_name"] { assert!(rs.get_names().contains_key(&Name::new(s)?)); @@ -344,7 +350,7 @@ mod tests { } "#; let schema = Schema::parse_str(schema)?; - let rs = ResolvedSchema::try_from(&schema).expect("Schema didn't successfully parse"); + let rs = ResolvedSchema::try_from(&schema)?; assert_eq!(rs.get_names().len(), 2); for s in &["space.record_name", "space.inner_record_name"] { assert!(rs.get_names().contains_key(&Name::new(s)?)); @@ -380,7 +386,7 @@ mod tests { } "#; let schema = Schema::parse_str(schema)?; - let rs = ResolvedSchema::try_from(&schema).expect("Schema didn't successfully parse"); + let rs = ResolvedSchema::try_from(&schema)?; assert_eq!(rs.get_names().len(), 2); for s in &["space.record_name", "space.inner_enum_name"] { assert!(rs.get_names().contains_key(&Name::new(s)?)); @@ -416,7 +422,7 @@ mod tests { } "#; let schema = Schema::parse_str(schema)?; - let rs = ResolvedSchema::try_from(&schema).expect("Schema didn't successfully parse"); + let rs = ResolvedSchema::try_from(&schema)?; assert_eq!(rs.get_names().len(), 2); for s in &["space.record_name", "space.inner_enum_name"] { assert!(rs.get_names().contains_key(&Name::new(s)?)); @@ -452,7 +458,7 @@ mod tests { } "#; let schema = Schema::parse_str(schema)?; - let rs = ResolvedSchema::try_from(&schema).expect("Schema didn't successfully parse"); + let rs = ResolvedSchema::try_from(&schema)?; assert_eq!(rs.get_names().len(), 2); for s in &["space.record_name", "space.inner_fixed_name"] { assert!(rs.get_names().contains_key(&Name::new(s)?)); @@ -488,7 +494,7 @@ mod tests { } "#; let schema = Schema::parse_str(schema)?; - let rs = ResolvedSchema::try_from(&schema).expect("Schema didn't successfully parse"); + let rs = ResolvedSchema::try_from(&schema)?; assert_eq!(rs.get_names().len(), 2); for s in &["space.record_name", "space.inner_fixed_name"] { assert!(rs.get_names().contains_key(&Name::new(s)?)); @@ -530,7 +536,7 @@ mod tests { } "#; let schema = Schema::parse_str(schema)?; - let rs = ResolvedSchema::try_from(&schema).expect("Schema didn't successfully parse"); + let rs = ResolvedSchema::try_from(&schema)?; assert_eq!(rs.get_names().len(), 2); for s in &["space.record_name", "inner_space.inner_record_name"] { assert!(rs.get_names().contains_key(&Name::new(s)?)); @@ -567,7 +573,7 @@ mod tests { } "#; let schema = Schema::parse_str(schema)?; - let rs = ResolvedSchema::try_from(&schema).expect("Schema didn't successfully parse"); + let rs = ResolvedSchema::try_from(&schema)?; assert_eq!(rs.get_names().len(), 2); for s in &["space.record_name", "inner_space.inner_enum_name"] { assert!(rs.get_names().contains_key(&Name::new(s)?)); @@ -604,7 +610,7 @@ mod tests { } "#; let schema = Schema::parse_str(schema)?; - let rs = ResolvedSchema::try_from(&schema).expect("Schema didn't successfully parse"); + let rs = ResolvedSchema::try_from(&schema)?; assert_eq!(rs.get_names().len(), 2); for s in &["space.record_name", "inner_space.inner_fixed_name"] { assert!(rs.get_names().contains_key(&Name::new(s)?)); @@ -657,7 +663,7 @@ mod tests { } "#; let schema = Schema::parse_str(schema)?; - let rs = ResolvedSchema::try_from(&schema).expect("Schema didn't successfully parse"); + let rs = ResolvedSchema::try_from(&schema)?; assert_eq!(rs.get_names().len(), 3); for s in &[ "space.record_name", @@ -715,7 +721,7 @@ mod tests { } "#; let schema = Schema::parse_str(schema)?; - let rs = ResolvedSchema::try_from(&schema).expect("Schema didn't successfully parse"); + let rs = ResolvedSchema::try_from(&schema)?; assert_eq!(rs.get_names().len(), 3); for s in &[ "space.record_name", @@ -774,7 +780,7 @@ mod tests { } "#; let schema = Schema::parse_str(schema)?; - let rs = ResolvedSchema::try_from(&schema).expect("Schema didn't successfully parse"); + let rs = ResolvedSchema::try_from(&schema)?; assert_eq!(rs.get_names().len(), 3); for s in &[ "space.record_name", @@ -819,7 +825,7 @@ mod tests { } "#; let schema = Schema::parse_str(schema)?; - let rs = ResolvedSchema::try_from(&schema).expect("Schema didn't successfully parse"); + let rs = ResolvedSchema::try_from(&schema)?; assert_eq!(rs.get_names().len(), 2); for s in &["space.record_name", "space.in_array_record"] { assert!(rs.get_names().contains_key(&Name::new(s)?)); @@ -860,7 +866,7 @@ mod tests { } "#; let schema = Schema::parse_str(schema)?; - let rs = ResolvedSchema::try_from(&schema).expect("Schema didn't successfully parse"); + let rs = ResolvedSchema::try_from(&schema)?; assert_eq!(rs.get_names().len(), 2); for s in &["space.record_name", "space.in_map_record"] { assert!(rs.get_names().contains_key(&Name::new(s)?)); @@ -897,7 +903,7 @@ mod tests { } "#; let schema = Schema::parse_str(schema)?; - let rs = ResolvedSchema::try_from(&schema).expect("Schema didn't successfully parse"); + let rs = ResolvedSchema::try_from(&schema)?; // confirm we have expected 2 full-names assert_eq!(rs.get_names().len(), 2); @@ -906,8 +912,8 @@ mod tests { } // convert Schema back to JSON string - let schema_str = serde_json::to_string(&schema).expect("test failed"); - let _schema = Schema::parse_str(&schema_str).expect("test failed"); + let schema_str = serde_json::to_string(&schema)?; + let _schema = Schema::parse_str(&schema_str)?; assert_eq!(schema, _schema); Ok(()) @@ -941,7 +947,7 @@ mod tests { } "#; let schema = Schema::parse_str(schema)?; - let rs = ResolvedSchema::try_from(&schema).expect("Schema didn't successfully parse"); + let rs = ResolvedSchema::try_from(&schema)?; // confirm we have expected 2 full-names assert_eq!(rs.get_names().len(), 2); @@ -950,8 +956,8 @@ mod tests { } // convert Schema back to JSON string - let schema_str = serde_json::to_string(&schema).expect("test failed"); - let _schema = Schema::parse_str(&schema_str).expect("test failed"); + let schema_str = serde_json::to_string(&schema)?; + let _schema = Schema::parse_str(&schema_str)?; assert_eq!(schema, _schema); Ok(()) @@ -1016,4 +1022,57 @@ mod tests { Ok(()) } + + #[test] + fn avro_rs_444_do_not_allow_duplicate_names_in_known_schemata() -> TestResult { + let schema = Schema::parse_str( + r#"{ + "name": "foo", + "type": "record", + "fields": [ + { + "name": "a", + "type": { + "type": "fixed", + "size": 16, + "logicalType": "decimal", + "precision": 4, + "scale": 2, + "name": "bar" + } + }, + { + "name": "b", + "type": "bar" + }, + { + "name": "c", + "type": { + "type": "fixed", + "size": 16, + "logicalType": "uuid", + "name": "duplicated_name" + } + } + ] + }"#, + )?; + + let mut known_schemata: NamesRef = HashMap::default(); + known_schemata.insert("duplicated_name".try_into()?, &Schema::Boolean); + + let result = ResolvedSchema::new_with_known_schemata(vec![&schema], &None, &known_schemata); + match result { + Ok(_) => { + panic!("Expected a AmbiguousSchemaDefinition error for `duplicated_name`") + } + Err(err) => match err.into_details() { + Details::AmbiguousSchemaDefinition(name) => { + assert_eq!(name, "duplicated_name".try_into()?); + } + details => panic!("Unexpected error: {details}"), + }, + } + Ok(()) + } }
