This is an automated email from the ASF dual-hosted git repository.
mgrigorov pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/avro-rs.git
The following commit(s) were added to refs/heads/main by this push:
new 6484f21 fix: AVRO-4055: Validate records properly when parsing a
schema (#9)
6484f21 is described below
commit 6484f21bf938f9e7dd528f69329d85c82995b36a
Author: Santiago Fraire Willemoes <[email protected]>
AuthorDate: Fri Oct 4 14:39:29 2024 +0200
fix: AVRO-4055: Validate records properly when parsing a schema (#9)
fix: AVRO-4055: Validate records properly when parsing a schema
* Issue #9 - Rename ParseLocation to RecordSchemaParseLocation
Signed-off-by: Martin Tzvetanov Grigorov <[email protected]>
* Issue #9 - Check for other common/complex names (fixed & enum)
Signed-off-by: Martin Tzvetanov Grigorov <[email protected]>
---------
Signed-off-by: Martin Tzvetanov Grigorov <[email protected]>
Co-authored-by: Martin Tzvetanov Grigorov <[email protected]>
---
avro/src/error.rs | 3 ++
avro/src/schema.rs | 102 ++++++++++++++++++++++++++++++++++++++++++++++++++---
avro/src/types.rs | 36 ++++++++++---------
3 files changed, 119 insertions(+), 22 deletions(-)
diff --git a/avro/src/error.rs b/avro/src/error.rs
index 09d458c..90f2522 100644
--- a/avro/src/error.rs
+++ b/avro/src/error.rs
@@ -350,6 +350,9 @@ pub enum Error {
#[error("Invalid namespace {0}. It must match the regex '{1}'")]
InvalidNamespace(String, &'static str),
+ #[error("Invalid schema: There is no type called '{0}', if you meant to
define a non-primitive schema, it should be defined inside `type` attribute.
Please review the specification")]
+ InvalidSchemaRecord(String),
+
#[error("Duplicate enum symbol {0}")]
EnumSymbolDuplicate(String),
diff --git a/avro/src/schema.rs b/avro/src/schema.rs
index 5a299d1..177cc3e 100644
--- a/avro/src/schema.rs
+++ b/avro/src/schema.rs
@@ -669,7 +669,11 @@ impl RecordField {
validate_record_field_name(&name)?;
// TODO: "type" = "<record name>"
- let schema = parser.parse_complex(field, &enclosing_record.namespace)?;
+ let schema = parser.parse_complex(
+ field,
+ &enclosing_record.namespace,
+ RecordSchemaParseLocation::FromField,
+ )?;
let default = field.get("default").cloned();
Self::resolve_default_value(
@@ -1006,6 +1010,16 @@ fn parse_json_integer_for_decimal(value:
&serde_json::Number) -> Result<DecimalM
})
}
+#[derive(Debug, Default)]
+enum RecordSchemaParseLocation {
+ /// When the parse is happening at root level
+ #[default]
+ Root,
+
+ /// When the parse is happening inside a record field
+ FromField,
+}
+
#[derive(Default)]
struct Parser {
input_schemas: HashMap<Name, Value>,
@@ -1232,7 +1246,9 @@ impl Parser {
fn parse(&mut self, value: &Value, enclosing_namespace: &Namespace) ->
AvroResult<Schema> {
match *value {
Value::String(ref t) => self.parse_known_schema(t.as_str(),
enclosing_namespace),
- Value::Object(ref data) => self.parse_complex(data,
enclosing_namespace),
+ Value::Object(ref data) => {
+ self.parse_complex(data, enclosing_namespace,
RecordSchemaParseLocation::Root)
+ }
Value::Array(ref data) => self.parse_union(data,
enclosing_namespace),
_ => Err(Error::ParseSchemaFromValidJson),
}
@@ -1294,6 +1310,14 @@ impl Parser {
return Ok(resolving_schema.clone());
}
+ // For good error reporting we add this check
+ match name.name.as_str() {
+ "record" | "enum" | "fixed" => {
+ return Err(Error::InvalidSchemaRecord(name.to_string()));
+ }
+ _ => (),
+ }
+
let value = self
.input_schemas
.remove(&fully_qualified_name)
@@ -1353,6 +1377,7 @@ impl Parser {
&mut self,
complex: &Map<String, Value>,
enclosing_namespace: &Namespace,
+ parse_location: RecordSchemaParseLocation,
) -> AvroResult<Schema> {
// Try to parse this as a native complex type.
fn parse_as_native_complex(
@@ -1542,14 +1567,23 @@ impl Parser {
}
match complex.get("type") {
Some(Value::String(t)) => match t.as_str() {
- "record" => self.parse_record(complex, enclosing_namespace),
+ "record" => match parse_location {
+ RecordSchemaParseLocation::Root => {
+ self.parse_record(complex, enclosing_namespace)
+ }
+ RecordSchemaParseLocation::FromField => {
+ self.fetch_schema_ref(t, enclosing_namespace)
+ }
+ },
"enum" => self.parse_enum(complex, enclosing_namespace),
"array" => self.parse_array(complex, enclosing_namespace),
"map" => self.parse_map(complex, enclosing_namespace),
"fixed" => self.parse_fixed(complex, enclosing_namespace),
other => self.parse_known_schema(other, enclosing_namespace),
},
- Some(Value::Object(data)) => self.parse_complex(data,
enclosing_namespace),
+ Some(Value::Object(data)) => {
+ self.parse_complex(data, enclosing_namespace,
RecordSchemaParseLocation::Root)
+ }
Some(Value::Array(variants)) => self.parse_union(variants,
enclosing_namespace),
Some(unknown) => Err(Error::GetComplexType(unknown.clone())),
None => Err(Error::GetComplexTypeField),
@@ -4932,7 +4966,7 @@ mod tests {
"type": "Bar"
}
]
- }
+ }
"#;
#[derive(
@@ -6821,4 +6855,62 @@ mod tests {
assert_eq!("92f2ccef718c6754", fp_rabin.to_string());
Ok(())
}
+
+ #[test]
+ fn avro_4055_should_fail_to_parse_invalid_schema() -> TestResult {
+ // This is invalid because the record type should be inside the type
field.
+ let invalid_schema_str = r#"
+ {
+ "type": "record",
+ "name": "SampleSchema",
+ "fields": [
+ {
+ "name": "order",
+ "type": "record",
+ "fields": [
+ {
+ "name": "order_number",
+ "type": ["null", "string"],
+ "default": null
+ },
+ { "name": "order_date", "type": "string" }
+ ]
+ }
+ ]
+ }"#;
+
+ let schema = Schema::parse_str(invalid_schema_str);
+ assert!(schema.is_err());
+ assert_eq!(
+ schema.unwrap_err().to_string(),
+ "Invalid schema: There is no type called 'record', if you meant to
define a non-primitive schema, it should be defined inside `type` attribute.
Please review the specification"
+ );
+
+ let valid_schema = r#"
+ {
+ "type": "record",
+ "name": "SampleSchema",
+ "fields": [
+ {
+ "name": "order",
+ "type": {
+ "type": "record",
+ "name": "Order",
+ "fields": [
+ {
+ "name": "order_number",
+ "type": ["null", "string"],
+ "default": null
+ },
+ { "name": "order_date", "type": "string" }
+ ]
+ }
+ }
+ ]
+ }"#;
+ let schema = Schema::parse_str(valid_schema);
+ assert!(schema.is_ok());
+
+ Ok(())
+ }
}
diff --git a/avro/src/types.rs b/avro/src/types.rs
index 0fb43f2..b4d61a9 100644
--- a/avro/src/types.rs
+++ b/avro/src/types.rs
@@ -2771,35 +2771,37 @@ Field with name '"b"' is not a member of the map
items"#,
use crate::ser::Serializer;
use serde::Serialize;
- let schema_str = r#"{
+ let schema_str = r#"
+ {
"type": "record",
"name": "NamespacedMessage",
[NAMESPACE]
"fields": [
{
- "type": "record",
"name": "field_a",
- "fields": [
- {
- "name": "enum_a",
- "type": {
+ "type": {
+ "type": "record",
+ "name": "NestedMessage",
+ "fields": [
+ {
+ "name": "enum_a",
+ "type": {
"type": "enum",
"name": "EnumType",
- "symbols": [
- "SYMBOL_1",
- "SYMBOL_2"
- ],
+ "symbols": ["SYMBOL_1", "SYMBOL_2"],
"default": "SYMBOL_1"
+ }
+ },
+ {
+ "name": "enum_b",
+ "type": "EnumType"
}
- },
- {
- "name": "enum_b",
- "type": "EnumType"
- }
- ]
+ ]
+ }
}
]
- }"#;
+ }
+ "#;
let schema_str = schema_str.replace(
"[NAMESPACE]",
if with_namespace {