This is an automated email from the ASF dual-hosted git repository.

mgrigorov pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/avro-rs.git


The following commit(s) were added to refs/heads/main by this push:
     new 6484f21  fix: AVRO-4055: Validate records properly when parsing a 
schema (#9)
6484f21 is described below

commit 6484f21bf938f9e7dd528f69329d85c82995b36a
Author: Santiago Fraire Willemoes <[email protected]>
AuthorDate: Fri Oct 4 14:39:29 2024 +0200

    fix: AVRO-4055: Validate records properly when parsing a schema (#9)
    
    fix:  AVRO-4055: Validate records properly when parsing a schema
    
    * Issue #9 - Rename ParseLocation to RecordSchemaParseLocation
    
    Signed-off-by: Martin Tzvetanov Grigorov <[email protected]>
    
    * Issue #9 - Check for other common/complex names (fixed & enum)
    
    Signed-off-by: Martin Tzvetanov Grigorov <[email protected]>
    
    ---------
    
    Signed-off-by: Martin Tzvetanov Grigorov <[email protected]>
    Co-authored-by: Martin Tzvetanov Grigorov <[email protected]>
---
 avro/src/error.rs  |   3 ++
 avro/src/schema.rs | 102 ++++++++++++++++++++++++++++++++++++++++++++++++++---
 avro/src/types.rs  |  36 ++++++++++---------
 3 files changed, 119 insertions(+), 22 deletions(-)

diff --git a/avro/src/error.rs b/avro/src/error.rs
index 09d458c..90f2522 100644
--- a/avro/src/error.rs
+++ b/avro/src/error.rs
@@ -350,6 +350,9 @@ pub enum Error {
     #[error("Invalid namespace {0}. It must match the regex '{1}'")]
     InvalidNamespace(String, &'static str),
 
+    #[error("Invalid schema: There is no type called '{0}', if you meant to 
define a non-primitive schema, it should be defined inside `type` attribute. 
Please review the specification")]
+    InvalidSchemaRecord(String),
+
     #[error("Duplicate enum symbol {0}")]
     EnumSymbolDuplicate(String),
 
diff --git a/avro/src/schema.rs b/avro/src/schema.rs
index 5a299d1..177cc3e 100644
--- a/avro/src/schema.rs
+++ b/avro/src/schema.rs
@@ -669,7 +669,11 @@ impl RecordField {
         validate_record_field_name(&name)?;
 
         // TODO: "type" = "<record name>"
-        let schema = parser.parse_complex(field, &enclosing_record.namespace)?;
+        let schema = parser.parse_complex(
+            field,
+            &enclosing_record.namespace,
+            RecordSchemaParseLocation::FromField,
+        )?;
 
         let default = field.get("default").cloned();
         Self::resolve_default_value(
@@ -1006,6 +1010,16 @@ fn parse_json_integer_for_decimal(value: 
&serde_json::Number) -> Result<DecimalM
     })
 }
 
+#[derive(Debug, Default)]
+enum RecordSchemaParseLocation {
+    /// When the parse is happening at root level
+    #[default]
+    Root,
+
+    /// When the parse is happening inside a record field
+    FromField,
+}
+
 #[derive(Default)]
 struct Parser {
     input_schemas: HashMap<Name, Value>,
@@ -1232,7 +1246,9 @@ impl Parser {
     fn parse(&mut self, value: &Value, enclosing_namespace: &Namespace) -> 
AvroResult<Schema> {
         match *value {
             Value::String(ref t) => self.parse_known_schema(t.as_str(), 
enclosing_namespace),
-            Value::Object(ref data) => self.parse_complex(data, 
enclosing_namespace),
+            Value::Object(ref data) => {
+                self.parse_complex(data, enclosing_namespace, 
RecordSchemaParseLocation::Root)
+            }
             Value::Array(ref data) => self.parse_union(data, 
enclosing_namespace),
             _ => Err(Error::ParseSchemaFromValidJson),
         }
@@ -1294,6 +1310,14 @@ impl Parser {
             return Ok(resolving_schema.clone());
         }
 
+        // For good error reporting we add this check
+        match name.name.as_str() {
+            "record" | "enum" | "fixed" => {
+                return Err(Error::InvalidSchemaRecord(name.to_string()));
+            }
+            _ => (),
+        }
+
         let value = self
             .input_schemas
             .remove(&fully_qualified_name)
@@ -1353,6 +1377,7 @@ impl Parser {
         &mut self,
         complex: &Map<String, Value>,
         enclosing_namespace: &Namespace,
+        parse_location: RecordSchemaParseLocation,
     ) -> AvroResult<Schema> {
         // Try to parse this as a native complex type.
         fn parse_as_native_complex(
@@ -1542,14 +1567,23 @@ impl Parser {
         }
         match complex.get("type") {
             Some(Value::String(t)) => match t.as_str() {
-                "record" => self.parse_record(complex, enclosing_namespace),
+                "record" => match parse_location {
+                    RecordSchemaParseLocation::Root => {
+                        self.parse_record(complex, enclosing_namespace)
+                    }
+                    RecordSchemaParseLocation::FromField => {
+                        self.fetch_schema_ref(t, enclosing_namespace)
+                    }
+                },
                 "enum" => self.parse_enum(complex, enclosing_namespace),
                 "array" => self.parse_array(complex, enclosing_namespace),
                 "map" => self.parse_map(complex, enclosing_namespace),
                 "fixed" => self.parse_fixed(complex, enclosing_namespace),
                 other => self.parse_known_schema(other, enclosing_namespace),
             },
-            Some(Value::Object(data)) => self.parse_complex(data, 
enclosing_namespace),
+            Some(Value::Object(data)) => {
+                self.parse_complex(data, enclosing_namespace, 
RecordSchemaParseLocation::Root)
+            }
             Some(Value::Array(variants)) => self.parse_union(variants, 
enclosing_namespace),
             Some(unknown) => Err(Error::GetComplexType(unknown.clone())),
             None => Err(Error::GetComplexTypeField),
@@ -4932,7 +4966,7 @@ mod tests {
                     "type": "Bar"
                 }
             ]
-        } 
+        }
         "#;
 
         #[derive(
@@ -6821,4 +6855,62 @@ mod tests {
         assert_eq!("92f2ccef718c6754", fp_rabin.to_string());
         Ok(())
     }
+
+    #[test]
+    fn avro_4055_should_fail_to_parse_invalid_schema() -> TestResult {
+        // This is invalid because the record type should be inside the type 
field.
+        let invalid_schema_str = r#"
+        {
+        "type": "record",
+        "name": "SampleSchema",
+        "fields": [
+            {
+            "name": "order",
+            "type": "record",
+            "fields": [
+                {
+                "name": "order_number",
+                "type": ["null", "string"],
+                "default": null
+                },
+                { "name": "order_date", "type": "string" }
+            ]
+            }
+        ]
+        }"#;
+
+        let schema = Schema::parse_str(invalid_schema_str);
+        assert!(schema.is_err());
+        assert_eq!(
+            schema.unwrap_err().to_string(),
+            "Invalid schema: There is no type called 'record', if you meant to 
define a non-primitive schema, it should be defined inside `type` attribute. 
Please review the specification"
+        );
+
+        let valid_schema = r#"
+        {
+            "type": "record",
+            "name": "SampleSchema",
+            "fields": [
+                {
+                "name": "order",
+                "type": {
+                    "type": "record",
+                    "name": "Order",
+                    "fields": [
+                    {
+                        "name": "order_number",
+                        "type": ["null", "string"],
+                        "default": null
+                    },
+                    { "name": "order_date", "type": "string" }
+                    ]
+                }
+                }
+            ]
+        }"#;
+        let schema = Schema::parse_str(valid_schema);
+        assert!(schema.is_ok());
+
+        Ok(())
+    }
 }
diff --git a/avro/src/types.rs b/avro/src/types.rs
index 0fb43f2..b4d61a9 100644
--- a/avro/src/types.rs
+++ b/avro/src/types.rs
@@ -2771,35 +2771,37 @@ Field with name '"b"' is not a member of the map 
items"#,
         use crate::ser::Serializer;
         use serde::Serialize;
 
-        let schema_str = r#"{
+        let schema_str = r#"
+        {
             "type": "record",
             "name": "NamespacedMessage",
             [NAMESPACE]
             "fields": [
                 {
-                    "type": "record",
                     "name": "field_a",
-                    "fields": [
-                        {
-                            "name": "enum_a",
-                            "type": {
+                    "type": {
+                        "type": "record",
+                        "name": "NestedMessage",
+                        "fields": [
+                            {
+                                "name": "enum_a",
+                                "type": {
                                 "type": "enum",
                                 "name": "EnumType",
-                                "symbols": [
-                                    "SYMBOL_1",
-                                    "SYMBOL_2"
-                                ],
+                                "symbols": ["SYMBOL_1", "SYMBOL_2"],
                                 "default": "SYMBOL_1"
+                                }
+                            },
+                            {
+                                "name": "enum_b",
+                                "type": "EnumType"
                             }
-                        },
-                        {
-                            "name": "enum_b",
-                            "type": "EnumType"
-                        }
-                    ]
+                        ]
+                    }
                 }
             ]
-        }"#;
+        }
+        "#;
         let schema_str = schema_str.replace(
             "[NAMESPACE]",
             if with_namespace {

Reply via email to