This is an automated email from the ASF dual-hosted git repository. kriskras99 pushed a commit to branch uuid in repository https://gitbox.apache.org/repos/asf/avro-rs.git
commit 25c965e00e4062d0b819dc126938e34401087557 Author: Kriskras99 <[email protected]> AuthorDate: Sun Nov 16 07:59:27 2025 +0100 feat: Rework `Schema::Uuid` to support `fixed` as the base type --- avro/src/decode.rs | 93 ++++++++++++++------ avro/src/encode.rs | 23 +++-- avro/src/schema.rs | 146 ++++++++++++++++++++++++------- avro/src/schema_compatibility.rs | 110 +++++++++++++++++++---- avro/src/schema_equality.rs | 69 ++++++++++++++- avro/src/ser_schema.rs | 35 ++++---- avro/src/types.rs | 93 ++++++++++++++------ avro/tests/serde_human_readable_false.rs | 4 +- avro/tests/serde_human_readable_true.rs | 3 +- 9 files changed, 443 insertions(+), 133 deletions(-) diff --git a/avro/src/decode.rs b/avro/src/decode.rs index 5bdd657..9c13c8e 100644 --- a/avro/src/decode.rs +++ b/avro/src/decode.rs @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -use crate::schema::InnerDecimalSchema; +use crate::schema::{InnerDecimalSchema, UuidSchema}; use crate::{ AvroResult, Error, bigdecimal::deserialize_big_decimal, @@ -83,7 +83,7 @@ pub(crate) fn decode_internal<R: Read, S: Borrow<Schema>>( enclosing_namespace: &Namespace, reader: &mut R, ) -> AvroResult<Value> { - match *schema { + match schema { Schema::Null => Ok(Value::Null), Schema::Boolean => { let mut buf = [0u8; 1]; @@ -102,7 +102,7 @@ pub(crate) fn decode_internal<R: Read, S: Borrow<Schema>>( } } } - Schema::Decimal(DecimalSchema { ref inner, .. }) => match inner { + Schema::Decimal(DecimalSchema { inner, .. }) => match inner { InnerDecimalSchema::Fixed(fixed) => { match decode_internal( &Schema::Fixed(fixed.copy_only_size()), @@ -127,20 +127,41 @@ pub(crate) fn decode_internal<R: Read, S: Borrow<Schema>>( value => Err(Details::BytesValue(value).into()), } } - Schema::Uuid => { + Schema::Uuid(UuidSchema::String) => { + let Value::String(string) = + decode_internal(&Schema::String, names, enclosing_namespace, reader)? + else { + // Calling decode_internal with Schema::String can only return a Value::String or an error + unreachable!() + }; + let uuid = Uuid::parse_str(&string).map_err(Details::ConvertStrToUuid)?; + Ok(Value::Uuid(uuid)) + } + Schema::Uuid(UuidSchema::Bytes) => { let Value::Bytes(bytes) = decode_internal(&Schema::Bytes, names, enclosing_namespace, reader)? else { // Calling decode_internal with Schema::Bytes can only return a Value::Bytes or an error - unreachable!(); + unreachable!() }; - - let uuid = if bytes.len() == 16 { - Uuid::from_slice(&bytes).map_err(Details::ConvertSliceToUuid)? - } else { - let string = std::str::from_utf8(&bytes).map_err(Details::ConvertToUtf8Error)?; - Uuid::parse_str(string).map_err(Details::ConvertStrToUuid)? + let uuid = Uuid::from_slice(&bytes).map_err(Details::ConvertSliceToUuid)?; + Ok(Value::Uuid(uuid)) + } + Schema::Uuid(UuidSchema::Fixed(fixed)) => { + let Value::Fixed(n, bytes) = decode_internal( + &Schema::Fixed(fixed.copy_only_size()), + names, + enclosing_namespace, + reader, + )? + else { + // Calling decode_internal with Schema::Fixed can only return a Value::Fixed or an error + unreachable!() }; + if n != 16 { + return Err(Details::ConvertFixedToUuid(n).into()); + } + let uuid = Uuid::from_slice(&bytes).map_err(Details::ConvertSliceToUuid)?; Ok(Value::Uuid(uuid)) } Schema::Int => decode_int(reader), @@ -196,13 +217,13 @@ pub(crate) fn decode_internal<R: Read, S: Borrow<Schema>>( } } Schema::Fixed(FixedSchema { size, .. }) => { - let mut buf = vec![0u8; size]; + let mut buf = vec![0u8; *size]; reader .read_exact(&mut buf) - .map_err(|e| Details::ReadFixed(e, size))?; - Ok(Value::Fixed(size, buf)) + .map_err(|e| Details::ReadFixed(e, *size))?; + Ok(Value::Fixed(*size, buf)) } - Schema::Array(ref inner) => { + Schema::Array(inner) => { let mut items = Vec::new(); loop { @@ -224,7 +245,7 @@ pub(crate) fn decode_internal<R: Read, S: Borrow<Schema>>( Ok(Value::Array(items)) } - Schema::Map(ref inner) => { + Schema::Map(inner) => { let mut items = HashMap::new(); loop { @@ -248,7 +269,7 @@ pub(crate) fn decode_internal<R: Read, S: Borrow<Schema>>( Ok(Value::Map(items)) } - Schema::Union(ref inner) => match zag_i64(reader).map_err(Error::into_details) { + Schema::Union(inner) => match zag_i64(reader).map_err(Error::into_details) { Ok(index) => { let variants = inner.variants(); let variant = variants @@ -269,11 +290,7 @@ pub(crate) fn decode_internal<R: Read, S: Borrow<Schema>>( } Err(io_err) => Err(Error::new(io_err)), }, - Schema::Record(RecordSchema { - ref name, - ref fields, - .. - }) => { + Schema::Record(RecordSchema { name, fields, .. }) => { let fully_qualified_name = name.fully_qualified_name(enclosing_namespace); // Benchmarks indicate ~10% improvement using this method. let mut items = Vec::with_capacity(fields.len()); @@ -291,7 +308,7 @@ pub(crate) fn decode_internal<R: Read, S: Borrow<Schema>>( } Ok(Value::Record(items)) } - Schema::Enum(EnumSchema { ref symbols, .. }) => { + Schema::Enum(EnumSchema { symbols, .. }) => { Ok(if let Value::Int(raw_index) = decode_int(reader)? { let index = usize::try_from(raw_index) .map_err(|e| Details::ConvertI32ToUsize(e, raw_index))?; @@ -309,7 +326,7 @@ pub(crate) fn decode_internal<R: Read, S: Borrow<Schema>>( return Err(Details::GetEnumUnknownIndexValue.into()); }) } - Schema::Ref { ref name } => { + Schema::Ref { name } => { let fully_qualified_name = name.fully_qualified_name(enclosing_namespace); if let Some(resolved) = names.get(&fully_qualified_name) { decode_internal( @@ -328,7 +345,7 @@ pub(crate) fn decode_internal<R: Read, S: Borrow<Schema>>( #[cfg(test)] #[allow(clippy::expect_fun_call)] mod tests { - use crate::schema::InnerDecimalSchema; + use crate::schema::{InnerDecimalSchema, UuidSchema}; use crate::{ Decimal, decode::decode, @@ -850,7 +867,7 @@ mod tests { let mut buffer = Vec::new(); encode(&value, &schema, &mut buffer).expect(&success(&value, &schema)); - let result = decode(&Schema::Uuid, &mut &buffer[..])?; + let result = decode(&Schema::Uuid(UuidSchema::String), &mut &buffer[..])?; assert_eq!(result, value); Ok(()) @@ -860,20 +877,38 @@ mod tests { fn avro_3926_encode_decode_uuid_to_fixed() -> TestResult { use crate::encode::encode; - let schema = Schema::Fixed(FixedSchema { + let fixed = FixedSchema { size: 16, name: "uuid".into(), aliases: None, doc: None, default: None, attributes: Default::default(), - }); + }; + + let schema = Schema::Fixed(fixed.clone()); + let value = Value::Uuid(Uuid::parse_str("550e8400-e29b-41d4-a716-446655440000")?); + + let mut buffer = Vec::new(); + encode(&value, &schema, &mut buffer).expect(&success(&value, &schema)); + + let result = decode(&Schema::Uuid(UuidSchema::Fixed(fixed)), &mut &buffer[..])?; + assert_eq!(result, value); + + Ok(()) + } + + #[test] + fn encode_decode_uuid_to_bytes() -> TestResult { + use crate::encode::encode; + + let schema = Schema::Bytes; let value = Value::Uuid(Uuid::parse_str("550e8400-e29b-41d4-a716-446655440000")?); let mut buffer = Vec::new(); encode(&value, &schema, &mut buffer).expect(&success(&value, &schema)); - let result = decode(&Schema::Uuid, &mut &buffer[..])?; + let result = decode(&Schema::Uuid(UuidSchema::Bytes), &mut &buffer[..])?; assert_eq!(result, value); Ok(()) diff --git a/avro/src/encode.rs b/avro/src/encode.rs index 894b033..c67f021 100644 --- a/avro/src/encode.rs +++ b/avro/src/encode.rs @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -use crate::schema::InnerDecimalSchema; +use crate::schema::{InnerDecimalSchema, UuidSchema}; use crate::{ AvroResult, bigdecimal::serialize_big_decimal, @@ -144,23 +144,30 @@ pub(crate) fn encode_internal<W: Write, S: Borrow<Schema>>( .map_err(|e| Details::WriteBytes(e).into()) } Value::Uuid(uuid) => match *schema { - Schema::Uuid | Schema::String => encode_bytes( + Schema::Uuid(UuidSchema::String) | Schema::String => encode_bytes( // we need the call .to_string() to properly convert ASCII to UTF-8 #[allow(clippy::unnecessary_to_owned)] &uuid.to_string(), writer, ), - Schema::Fixed(FixedSchema { size, .. }) => { + Schema::Uuid(UuidSchema::Bytes) | Schema::Bytes => { + let bytes = uuid.as_bytes(); + encode_bytes(bytes, writer) + } + Schema::Uuid(UuidSchema::Fixed(FixedSchema { size, .. })) + | Schema::Fixed(FixedSchema { size, .. }) => { if size != 16 { return Err(Details::ConvertFixedToUuid(size).into()); } let bytes = uuid.as_bytes(); - encode_bytes(bytes, writer) + writer + .write(bytes.as_slice()) + .map_err(|e| Details::WriteBytes(e).into()) } _ => Err(Details::EncodeValueAsSchemaError { value_kind: ValueKind::Uuid, - supported_schema: vec![SchemaKind::Uuid, SchemaKind::Fixed], + supported_schema: vec![SchemaKind::Uuid, SchemaKind::Fixed, SchemaKind::Bytes], } .into()), }, @@ -171,7 +178,7 @@ pub(crate) fn encode_internal<W: Write, S: Borrow<Schema>>( .map_err(|e| Details::WriteBytes(e).into()) } Value::Bytes(bytes) => match *schema { - Schema::Bytes => encode_bytes(bytes, writer), + Schema::Bytes | Schema::Uuid(UuidSchema::Bytes) => encode_bytes(bytes, writer), Schema::Fixed { .. } => writer .write(bytes.as_slice()) .map_err(|e| Details::WriteBytes(e).into()), @@ -182,7 +189,7 @@ pub(crate) fn encode_internal<W: Write, S: Borrow<Schema>>( .into()), }, Value::String(s) => match *schema { - Schema::String | Schema::Uuid => encode_bytes(s, writer), + Schema::String | Schema::Uuid(UuidSchema::String) => encode_bytes(s, writer), Schema::Enum(EnumSchema { ref symbols, .. }) => { if let Some(index) = symbols.iter().position(|item| item == s) { encode_int(index as i32, writer) @@ -945,7 +952,7 @@ pub(crate) mod tests { #[test] fn test_avro_3585_encode_uuids() { let value = Value::String(String::from("00000000-0000-0000-0000-000000000000")); - let schema = Schema::Uuid; + let schema = Schema::Uuid(UuidSchema::String); let mut buffer = Vec::new(); let encoded = encode(&value, &schema, &mut buffer); assert!(encoded.is_ok()); diff --git a/avro/src/schema.rs b/avro/src/schema.rs index 84aa3d5..fae74be 100644 --- a/avro/src/schema.rs +++ b/avro/src/schema.rs @@ -111,7 +111,7 @@ pub enum Schema { /// The underlying type is serialized and deserialized as `Schema::Bytes` BigDecimal, /// A universally unique identifier, annotating a string. - Uuid, + Uuid(UuidSchema), /// Logical type which represents the number of days since the unix epoch. /// Serialization format is `Schema::Int`. Date, @@ -933,6 +933,20 @@ impl TryFrom<Schema> for InnerDecimalSchema { } } +/// The inner schema of the Uuid type. +#[derive(Debug, Clone)] +pub enum UuidSchema { + /// [`Schema::Bytes`] with size of 16. + /// + /// This is according to specification, but was what happened in `0.21.0` and earlier when + /// a schema with logical type `uuid` and inner type `fixed` was used. + Bytes, + /// [`Schema::String`]. + String, + /// [`Schema::Fixed`] with size of 16. + Fixed(FixedSchema), +} + /// A description of a Union schema #[derive(Debug, Clone)] pub struct UnionSchema { @@ -1235,7 +1249,12 @@ impl Schema { | Schema::Enum(EnumSchema { attributes, .. }) | Schema::Fixed(FixedSchema { attributes, .. }) | Schema::Array(ArraySchema { attributes, .. }) - | Schema::Map(MapSchema { attributes, .. }) => Some(attributes), + | Schema::Map(MapSchema { attributes, .. }) + | Schema::Decimal(DecimalSchema { + inner: InnerDecimalSchema::Fixed(FixedSchema { attributes, .. }), + .. + }) + | Schema::Uuid(UuidSchema::Fixed(FixedSchema { attributes, .. })) => Some(attributes), _ => None, } } @@ -1246,7 +1265,12 @@ impl Schema { Schema::Ref { name, .. } | Schema::Record(RecordSchema { name, .. }) | Schema::Enum(EnumSchema { name, .. }) - | Schema::Fixed(FixedSchema { name, .. }) => Some(name), + | Schema::Fixed(FixedSchema { name, .. }) + | Schema::Decimal(DecimalSchema { + inner: InnerDecimalSchema::Fixed(FixedSchema { name, .. }), + .. + }) + | Schema::Uuid(UuidSchema::Fixed(FixedSchema { name, .. })) => Some(name), _ => None, } } @@ -1261,7 +1285,12 @@ impl Schema { match self { Schema::Record(RecordSchema { aliases, .. }) | Schema::Enum(EnumSchema { aliases, .. }) - | Schema::Fixed(FixedSchema { aliases, .. }) => aliases.as_ref(), + | Schema::Fixed(FixedSchema { aliases, .. }) + | Schema::Decimal(DecimalSchema { + inner: InnerDecimalSchema::Fixed(FixedSchema { aliases, .. }), + .. + }) + | Schema::Uuid(UuidSchema::Fixed(FixedSchema { aliases, .. })) => aliases.as_ref(), _ => None, } } @@ -1271,7 +1300,12 @@ impl Schema { match self { Schema::Record(RecordSchema { doc, .. }) | Schema::Enum(EnumSchema { doc, .. }) - | Schema::Fixed(FixedSchema { doc, .. }) => doc.as_ref(), + | Schema::Fixed(FixedSchema { doc, .. }) + | Schema::Decimal(DecimalSchema { + inner: InnerDecimalSchema::Fixed(FixedSchema { doc, .. }), + .. + }) + | Schema::Uuid(UuidSchema::Fixed(FixedSchema { doc, .. })) => doc.as_ref(), _ => None, } } @@ -1605,16 +1639,19 @@ impl Parser { return try_convert_to_logical_type( "uuid", parse_as_native_complex(complex, self, enclosing_namespace)?, - &[SchemaKind::String, SchemaKind::Fixed], + &[SchemaKind::String, SchemaKind::Fixed, SchemaKind::Bytes], |schema| match schema { - Schema::String => Ok(Schema::Uuid), - Schema::Fixed(FixedSchema { size: 16, .. }) => Ok(Schema::Uuid), + Schema::String => Ok(Schema::Uuid(UuidSchema::String)), + Schema::Fixed(fixed @ FixedSchema { size: 16, .. }) => { + Ok(Schema::Uuid(UuidSchema::Fixed(fixed))) + } Schema::Fixed(FixedSchema { size, .. }) => { warn!( "Ignoring uuid logical type for a Fixed schema because its size ({size:?}) is not 16! Schema: {schema:?}" ); Ok(schema) } + Schema::Bytes => Ok(Schema::Uuid(UuidSchema::Bytes)), _ => { warn!("Ignoring invalid uuid logical type for schema: {schema:?}"); Ok(schema) @@ -2113,8 +2150,8 @@ impl Serialize for Schema { where S: Serializer, { - match *self { - Schema::Ref { ref name } => serializer.serialize_str(&name.fullname(None)), + match &self { + Schema::Ref { name } => serializer.serialize_str(&name.fullname(None)), Schema::Null => serializer.serialize_str("null"), Schema::Boolean => serializer.serialize_str("boolean"), Schema::Int => serializer.serialize_str("int"), @@ -2123,7 +2160,7 @@ impl Serialize for Schema { Schema::Double => serializer.serialize_str("double"), Schema::Bytes => serializer.serialize_str("bytes"), Schema::String => serializer.serialize_str("string"), - Schema::Array(ref inner) => { + Schema::Array(inner) => { let mut map = serializer.serialize_map(Some(2 + inner.attributes.len()))?; map.serialize_entry("type", "array")?; map.serialize_entry("items", &*inner.items.clone())?; @@ -2132,7 +2169,7 @@ impl Serialize for Schema { } map.end() } - Schema::Map(ref inner) => { + Schema::Map(inner) => { let mut map = serializer.serialize_map(Some(2 + inner.attributes.len()))?; map.serialize_entry("type", "map")?; map.serialize_entry("values", &*inner.types.clone())?; @@ -2141,7 +2178,7 @@ impl Serialize for Schema { } map.end() } - Schema::Union(ref inner) => { + Schema::Union(inner) => { let variants = inner.variants(); let mut seq = serializer.serialize_seq(Some(variants.len()))?; for v in variants { @@ -2150,11 +2187,11 @@ impl Serialize for Schema { seq.end() } Schema::Record(RecordSchema { - ref name, - ref aliases, - ref doc, - ref fields, - ref attributes, + name, + aliases, + doc, + fields, + attributes, .. }) => { let mut map = serializer.serialize_map(None)?; @@ -2176,10 +2213,10 @@ impl Serialize for Schema { map.end() } Schema::Enum(EnumSchema { - ref name, - ref symbols, - ref aliases, - ref attributes, + name, + symbols, + aliases, + attributes, .. }) => { let mut map = serializer.serialize_map(None)?; @@ -2198,15 +2235,15 @@ impl Serialize for Schema { } map.end() } - Schema::Fixed(ref fixed_schema) => { + Schema::Fixed(fixed_schema) => { let mut map = serializer.serialize_map(None)?; map = fixed_schema.serialize_to_map::<S>(map)?; map.end() } Schema::Decimal(DecimalSchema { - ref scale, - ref precision, - ref inner, + scale, + precision, + inner, }) => { let mut map = serializer.serialize_map(None)?; match inner { @@ -2229,9 +2266,19 @@ impl Serialize for Schema { map.serialize_entry("logicalType", "big-decimal")?; map.end() } - Schema::Uuid => { + Schema::Uuid(inner) => { let mut map = serializer.serialize_map(None)?; - map.serialize_entry("type", "string")?; + match inner { + UuidSchema::Bytes => { + map.serialize_entry("type", "bytes")?; + } + UuidSchema::String => { + map.serialize_entry("type", "string")?; + } + UuidSchema::Fixed(fixed_schema) => { + map = fixed_schema.serialize_to_map::<S>(map)?; + } + } map.serialize_entry("logicalType", "uuid")?; map.end() } @@ -2569,7 +2616,20 @@ pub mod derive { impl_schema!(f32, Schema::Float); impl_schema!(f64, Schema::Double); impl_schema!(String, Schema::String); - impl_schema!(uuid::Uuid, Schema::Uuid); + impl_schema!( + uuid::Uuid, + Schema::Uuid(UuidSchema::Fixed(FixedSchema { + name: Name { + name: String::new(), + namespace: None + }, + aliases: None, + doc: None, + size: 16, + default: None, + attributes: Default::default() + })) + ); impl_schema!(core::time::Duration, Schema::Duration); impl<T> AvroSchemaComponent for Vec<T> @@ -6728,7 +6788,7 @@ mod tests { "logicalType": "uuid" }); let parse_result = Schema::parse(&schema)?; - assert_eq!(parse_result, Schema::Uuid); + assert_eq!(parse_result, Schema::Uuid(UuidSchema::String)); Ok(()) } @@ -6743,7 +6803,17 @@ mod tests { "logicalType": "uuid" }); let parse_result = Schema::parse(&schema)?; - assert_eq!(parse_result, Schema::Uuid); + assert_eq!( + parse_result, + Schema::Uuid(UuidSchema::Fixed(FixedSchema { + name: Name::new("FixedUUID")?, + aliases: None, + doc: None, + size: 16, + default: None, + attributes: Default::default(), + })) + ); assert_not_logged( r#"Ignoring uuid logical type for a Fixed schema because its size (6) is not 16! Schema: Fixed(FixedSchema { name: Name { name: "FixedUUID", namespace: None }, aliases: None, doc: None, size: 6, attributes: {"logicalType": String("uuid")} })"#, ); @@ -6751,6 +6821,20 @@ mod tests { Ok(()) } + #[test] + fn uuid_schema_bytes() -> TestResult { + let schema = json!( + { + "type": "bytes", + "name": "BytesUUID", + "logicalType": "uuid" + }); + let parse_result = Schema::parse(&schema)?; + assert_eq!(parse_result, Schema::Uuid(UuidSchema::Bytes)); + + Ok(()) + } + #[test] fn avro_3926_uuid_schema_for_fixed_with_size_different_than_16() -> TestResult { let schema = json!( diff --git a/avro/src/schema_compatibility.rs b/avro/src/schema_compatibility.rs index be75b91..9491073 100644 --- a/avro/src/schema_compatibility.rs +++ b/avro/src/schema_compatibility.rs @@ -16,6 +16,7 @@ // under the License. //! Logic for checking schema compatibility +use crate::schema::UuidSchema; use crate::{ error::CompatibilityError, schema::{EnumSchema, FixedSchema, RecordSchema, Schema, SchemaKind}, @@ -426,13 +427,48 @@ impl SchemaCompatibility { } } } - SchemaKind::Uuid => { - return check_writer_type( - writers_schema, - readers_schema, - vec![r_type, SchemaKind::String], - ); - } + SchemaKind::Uuid => match readers_schema { + Schema::Uuid(UuidSchema::Bytes) => { + return check_writer_type( + writers_schema, + readers_schema, + vec![r_type, SchemaKind::Bytes], + ); + } + Schema::Uuid(UuidSchema::String) => { + return check_writer_type( + writers_schema, + readers_schema, + vec![r_type, SchemaKind::String], + ); + } + Schema::Uuid(UuidSchema::Fixed(FixedSchema { + name: w_name, + size: w_size, + .. + })) => { + if let Schema::Uuid(UuidSchema::Fixed(FixedSchema { + name: r_name, + size: r_size, + .. + })) = readers_schema + { + return (w_name.name == r_name.name && w_size == r_size) + .then_some(()) + .ok_or(CompatibilityError::FixedMismatch); + } else if let Schema::Fixed(FixedSchema { + name: r_name, + size: r_size, + .. + }) = readers_schema + { + return (w_name.name == r_name.name && w_size == r_size) + .then_some(()) + .ok_or(CompatibilityError::FixedMismatch); + } + } + _ => unreachable!(), + }, SchemaKind::Date | SchemaKind::TimeMillis => { return check_writer_type( writers_schema, @@ -499,8 +535,15 @@ impl SchemaCompatibility { SchemaKind::String => { check_reader_type_multi(r_type, vec![SchemaKind::Bytes, SchemaKind::Uuid], w_type) } - SchemaKind::Bytes => check_reader_type(r_type, SchemaKind::String, w_type), - SchemaKind::Uuid => check_reader_type(r_type, SchemaKind::String, w_type), + SchemaKind::Bytes => { + check_reader_type_multi(r_type, vec![SchemaKind::String, SchemaKind::Uuid], w_type) + } + SchemaKind::Uuid => check_reader_type_multi( + r_type, + vec![SchemaKind::String, SchemaKind::Bytes, SchemaKind::Fixed], + w_type, + ), + SchemaKind::Fixed => check_reader_type(r_type, SchemaKind::Uuid, w_type), SchemaKind::Date | SchemaKind::TimeMillis => { check_reader_type(r_type, SchemaKind::Int, w_type) } @@ -523,6 +566,7 @@ impl SchemaCompatibility { #[cfg(test)] mod tests { use super::*; + use crate::schema::{Name, UuidSchema}; use crate::{ Codec, Reader, Writer, types::{Record, Value}, @@ -1006,6 +1050,18 @@ mod tests { #[test] fn test_compatible_reader_writer_pairs() { + let uuid_fixed = FixedSchema { + name: Name { + name: String::new(), + namespace: None, + }, + aliases: None, + doc: None, + size: 16, + default: None, + attributes: Default::default(), + }; + let compatible_schemas = vec![ (Schema::Null, Schema::Null), (Schema::Long, Schema::Int), @@ -1017,8 +1073,30 @@ mod tests { (Schema::String, Schema::Bytes), (Schema::Bytes, Schema::String), // logical types - (Schema::Uuid, Schema::Uuid), - (Schema::Uuid, Schema::String), + ( + Schema::Uuid(UuidSchema::String), + Schema::Uuid(UuidSchema::String), + ), + (Schema::Uuid(UuidSchema::String), Schema::String), + (Schema::String, Schema::Uuid(UuidSchema::String)), + ( + Schema::Uuid(UuidSchema::Bytes), + Schema::Uuid(UuidSchema::Bytes), + ), + (Schema::Uuid(UuidSchema::Bytes), Schema::Bytes), + (Schema::Bytes, Schema::Uuid(UuidSchema::Bytes)), + ( + Schema::Uuid(UuidSchema::Fixed(uuid_fixed.clone())), + Schema::Uuid(UuidSchema::Fixed(uuid_fixed.clone())), + ), + ( + Schema::Uuid(UuidSchema::Fixed(uuid_fixed.clone())), + Schema::Fixed(uuid_fixed.clone()), + ), + ( + Schema::Fixed(uuid_fixed.clone()), + Schema::Uuid(UuidSchema::Fixed(uuid_fixed.clone())), + ), (Schema::Date, Schema::Int), (Schema::TimeMillis, Schema::Int), (Schema::TimeMicros, Schema::Long), @@ -1028,7 +1106,6 @@ mod tests { (Schema::LocalTimestampMillis, Schema::Long), (Schema::LocalTimestampMicros, Schema::Long), (Schema::LocalTimestampNanos, Schema::Long), - (Schema::String, Schema::Uuid), (Schema::Int, Schema::Date), (Schema::Int, Schema::TimeMillis), (Schema::Long, Schema::TimeMicros), @@ -1070,11 +1147,10 @@ mod tests { (nested_optional_record(), nested_record()), ]; - assert!( - compatible_schemas - .iter() - .all(|(reader, writer)| SchemaCompatibility::can_read(writer, reader).is_ok()) - ); + for (reader, writer) in compatible_schemas { + println!("{reader:?}, {writer:?}"); + SchemaCompatibility::can_read(&writer, &reader).unwrap(); + } } fn writer_schema() -> Schema { diff --git a/avro/src/schema_equality.rs b/avro/src/schema_equality.rs index 20c114e..d615c4f 100644 --- a/avro/src/schema_equality.rs +++ b/avro/src/schema_equality.rs @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -use crate::schema::InnerDecimalSchema; +use crate::schema::{InnerDecimalSchema, UuidSchema}; use crate::{ Schema, schema::{ @@ -84,8 +84,6 @@ impl SchemataEq for StructFieldEq { (Schema::Bytes, _) => false, (Schema::String, Schema::String) => true, (Schema::String, _) => false, - (Schema::Uuid, Schema::Uuid) => true, - (Schema::Uuid, _) => false, (Schema::BigDecimal, Schema::BigDecimal) => true, (Schema::BigDecimal, _) => false, (Schema::Date, Schema::Date) => true, @@ -153,6 +151,14 @@ impl SchemataEq for StructFieldEq { } } (Schema::Decimal(_), _) => false, + (Schema::Uuid(UuidSchema::Bytes), Schema::Uuid(UuidSchema::Bytes)) => true, + (Schema::Uuid(UuidSchema::Bytes), _) => false, + (Schema::Uuid(UuidSchema::String), Schema::Uuid(UuidSchema::String)) => true, + (Schema::Uuid(UuidSchema::String), _) => false, + (Schema::Uuid(UuidSchema::Fixed(FixedSchema { size: size_one, ..})), Schema::Uuid(UuidSchema::Fixed(FixedSchema { size: size_two, ..}))) => { + size_one == size_two + }, + (Schema::Uuid(UuidSchema::Fixed(_)), _) => false, ( Schema::Array(ArraySchema { items: items_one, ..}), Schema::Array(ArraySchema { items: items_two, ..}) @@ -250,7 +256,6 @@ mod tests { test_primitives!(Double); test_primitives!(Bytes); test_primitives!(String); - test_primitives!(Uuid); test_primitives!(BigDecimal); test_primitives!(Date); test_primitives!(Duration); @@ -551,4 +556,60 @@ mod tests { assert_eq!(specification_eq_res, struct_field_eq_res); Ok(()) } + + #[test] + fn test_uuid_compare_uuid() -> TestResult { + let string = Schema::Uuid(UuidSchema::String); + let bytes = Schema::Uuid(UuidSchema::Bytes); + let mut fixed_schema = FixedSchema { + name: Name { + name: String::new(), + namespace: None, + }, + aliases: None, + doc: None, + size: 16, + default: None, + attributes: Default::default(), + }; + let fixed = Schema::Fixed(fixed_schema.clone()); + fixed_schema + .attributes + .insert("Something".to_string(), Value::Null); + let fixed_different = Schema::Fixed(fixed_schema); + + assert!(SPECIFICATION_EQ.compare(&string, &string)); + assert!(STRUCT_FIELD_EQ.compare(&string, &string)); + assert!(SPECIFICATION_EQ.compare(&bytes, &bytes)); + assert!(STRUCT_FIELD_EQ.compare(&bytes, &bytes)); + assert!(SPECIFICATION_EQ.compare(&fixed, &fixed)); + assert!(STRUCT_FIELD_EQ.compare(&fixed, &fixed)); + + assert!(!SPECIFICATION_EQ.compare(&string, &bytes)); + assert!(!STRUCT_FIELD_EQ.compare(&string, &bytes)); + assert!(!SPECIFICATION_EQ.compare(&bytes, &string)); + assert!(!STRUCT_FIELD_EQ.compare(&bytes, &string)); + assert!(!SPECIFICATION_EQ.compare(&string, &fixed)); + assert!(!STRUCT_FIELD_EQ.compare(&string, &fixed)); + assert!(!SPECIFICATION_EQ.compare(&fixed, &string)); + assert!(!STRUCT_FIELD_EQ.compare(&fixed, &string)); + assert!(!SPECIFICATION_EQ.compare(&bytes, &fixed)); + assert!(!STRUCT_FIELD_EQ.compare(&bytes, &fixed)); + assert!(!SPECIFICATION_EQ.compare(&fixed, &bytes)); + assert!(!STRUCT_FIELD_EQ.compare(&fixed, &bytes)); + + assert!(SPECIFICATION_EQ.compare(&fixed, &fixed_different)); + assert!(STRUCT_FIELD_EQ.compare(&fixed, &fixed_different)); + assert!(SPECIFICATION_EQ.compare(&fixed_different, &fixed)); + assert!(STRUCT_FIELD_EQ.compare(&fixed_different, &fixed)); + + let strict = StructFieldEq { + include_attributes: true, + }; + + assert!(!strict.compare(&fixed, &fixed_different)); + assert!(!strict.compare(&fixed_different, &fixed)); + + Ok(()) + } } diff --git a/avro/src/ser_schema.rs b/avro/src/ser_schema.rs index 87fbf8e..2816b05 100644 --- a/avro/src/ser_schema.rs +++ b/avro/src/ser_schema.rs @@ -18,7 +18,7 @@ //! Logic for serde-compatible schema-aware serialization //! which writes directly to a `Write` stream -use crate::schema::InnerDecimalSchema; +use crate::schema::{InnerDecimalSchema, UuidSchema}; use crate::{ bigdecimal::big_decimal_as_bytes, encode::{encode_int, encode_long}, @@ -886,7 +886,9 @@ impl<'s, W: Write> SchemaAwareWriteSerializer<'s, W> { }; match schema { - Schema::String | Schema::Bytes | Schema::Uuid => self.write_bytes(value.as_bytes()), + Schema::String | Schema::Bytes | Schema::Uuid(UuidSchema::String) => { + self.write_bytes(value.as_bytes()) + } Schema::BigDecimal => { // If we get a string for a `BigDecimal` type, expect a display string representation, such as "12.75" let decimal_val = @@ -916,7 +918,7 @@ impl<'s, W: Write> SchemaAwareWriteSerializer<'s, W> { match variant_schema { Schema::String | Schema::Bytes - | Schema::Uuid + | Schema::Uuid(UuidSchema::String) | Schema::Fixed(_) | Schema::Ref { name: _ } => { encode_int(i as i32, &mut *self.writer)?; @@ -955,10 +957,11 @@ impl<'s, W: Write> SchemaAwareWriteSerializer<'s, W> { }; match schema { - Schema::String | Schema::Bytes | Schema::Uuid | Schema::BigDecimal => { - self.write_bytes(value) - } - Schema::Fixed(fixed_schema) => { + Schema::String + | Schema::Bytes + | Schema::Uuid(UuidSchema::Bytes) + | Schema::BigDecimal => self.write_bytes(value), + Schema::Fixed(fixed_schema) | Schema::Uuid(UuidSchema::Fixed(fixed_schema)) => { if value.len() == fixed_schema.size { self.writer .write(value) @@ -1017,7 +1020,7 @@ impl<'s, W: Write> SchemaAwareWriteSerializer<'s, W> { match variant_schema { Schema::String | Schema::Bytes - | Schema::Uuid + | Schema::Uuid(UuidSchema::Bytes) | Schema::BigDecimal | Schema::Fixed(_) | Schema::Duration @@ -2440,8 +2443,10 @@ mod tests { fn test_serialize_uuid() -> TestResult { let schema = Schema::parse_str( r#"{ - "type": "string", - "logicalType": "uuid" + "type": "fixed", + "size": 16, + "logicalType": "uuid", + "name": "FixedUuid" }"#, )?; @@ -2470,7 +2475,7 @@ mod tests { assert_eq!( buffer.as_slice(), &[ - 32, 140, 40, 218, 129, 35, 140, 67, 38, 189, 221, 78, 61, 0, 204, 80, 153 + 140, 40, 218, 129, 35, 140, 67, 38, 189, 221, 78, 61, 0, 204, 80, 153 ] ); @@ -2761,10 +2766,10 @@ mod tests { assert_eq!( buffer.as_slice(), &[ - 8, 116, 101, 115, 116, 20, 10, 6, 0, 195, 104, 4, 32, 140, 40, 218, 129, 35, 140, - 67, 38, 189, 221, 78, 61, 0, 204, 80, 152, 2, 20, 105, 110, 110, 101, 114, 95, 116, - 101, 115, 116, 200, 1, 8, 4, 78, 70, 4, 32, 140, 40, 218, 129, 35, 140, 67, 38, - 189, 221, 78, 61, 0, 204, 80, 153, 0 + 8, 116, 101, 115, 116, 20, 10, 6, 0, 195, 104, 4, 140, 40, 218, 129, 35, 140, 67, + 38, 189, 221, 78, 61, 0, 204, 80, 152, 2, 20, 105, 110, 110, 101, 114, 95, 116, + 101, 115, 116, 200, 1, 8, 4, 78, 70, 4, 140, 40, 218, 129, 35, 140, 67, 38, 189, + 221, 78, 61, 0, 204, 80, 153, 0 ] ); diff --git a/avro/src/types.rs b/avro/src/types.rs index ddd9781..c14bd20 100644 --- a/avro/src/types.rs +++ b/avro/src/types.rs @@ -16,7 +16,7 @@ // under the License. //! Logic handling the intermediate representation of Avro values. -use crate::schema::InnerDecimalSchema; +use crate::schema::{InnerDecimalSchema, UuidSchema}; use crate::{ AvroResult, Error, bigdecimal::{deserialize_big_decimal, serialize_big_decimal}, @@ -442,14 +442,15 @@ impl Value { (&Value::Decimal(_), &Schema::Decimal { .. }) => None, (&Value::BigDecimal(_), &Schema::BigDecimal) => None, (&Value::Duration(_), &Schema::Duration) => None, - (&Value::Uuid(_), &Schema::Uuid) => None, + (&Value::Uuid(_), &Schema::Uuid(_)) => None, (&Value::Float(_), &Schema::Float) => None, (&Value::Float(_), &Schema::Double) => None, (&Value::Double(_), &Schema::Double) => None, (&Value::Bytes(_), &Schema::Bytes) => None, (&Value::Bytes(_), &Schema::Decimal { .. }) => None, + (&Value::Bytes(_), &Schema::Uuid(UuidSchema::Bytes)) => None, (&Value::String(_), &Schema::String) => None, - (&Value::String(_), &Schema::Uuid) => None, + (&Value::String(_), &Schema::Uuid(UuidSchema::String)) => None, (&Value::Fixed(n, _), &Schema::Fixed(FixedSchema { size, .. })) => { if n != size { Some(format!( @@ -479,6 +480,15 @@ impl Value { None } } + (&Value::Fixed(n, _), &Schema::Uuid(UuidSchema::Fixed(_))) => { + if n != 16 { + Some(format!( + "The value's size ('{n}') must be exactly 12 to be a Duration" + )) + } else { + None + } + } // TODO: check precision against n (&Value::Fixed(_n, _), &Schema::Decimal { .. }) => None, (Value::String(s), Schema::Enum(EnumSchema { symbols, .. })) => { @@ -654,8 +664,8 @@ impl Value { }; self = v; } - match *schema { - Schema::Ref { ref name } => { + match schema { + Schema::Ref { name } => { let name = name.fully_qualified_name(enclosing_namespace); if let Some(resolved) = names.get(&name) { @@ -674,27 +684,23 @@ impl Value { Schema::Double => self.resolve_double(), Schema::Bytes => self.resolve_bytes(), Schema::String => self.resolve_string(), - Schema::Fixed(FixedSchema { size, .. }) => self.resolve_fixed(size), - Schema::Union(ref inner) => { + Schema::Fixed(FixedSchema { size, .. }) => self.resolve_fixed(*size), + Schema::Union(inner) => { self.resolve_union(inner, names, enclosing_namespace, field_default) } Schema::Enum(EnumSchema { - ref symbols, - ref default, - .. + symbols, default, .. }) => self.resolve_enum(symbols, default, field_default), - Schema::Array(ref inner) => { - self.resolve_array(&inner.items, names, enclosing_namespace) - } - Schema::Map(ref inner) => self.resolve_map(&inner.types, names, enclosing_namespace), - Schema::Record(RecordSchema { ref fields, .. }) => { + Schema::Array(inner) => self.resolve_array(&inner.items, names, enclosing_namespace), + Schema::Map(inner) => self.resolve_map(&inner.types, names, enclosing_namespace), + Schema::Record(RecordSchema { fields, .. }) => { self.resolve_record(fields, names, enclosing_namespace) } Schema::Decimal(DecimalSchema { scale, precision, - ref inner, - }) => self.resolve_decimal(precision, scale, inner), + inner, + }) => self.resolve_decimal(*precision, *scale, inner), Schema::BigDecimal => self.resolve_bigdecimal(), Schema::Date => self.resolve_date(), Schema::TimeMillis => self.resolve_time_millis(), @@ -706,18 +712,28 @@ impl Value { Schema::LocalTimestampMicros => self.resolve_local_timestamp_micros(), Schema::LocalTimestampNanos => self.resolve_local_timestamp_nanos(), Schema::Duration => self.resolve_duration(), - Schema::Uuid => self.resolve_uuid(), + Schema::Uuid(inner) => self.resolve_uuid(inner), } } - fn resolve_uuid(self) -> Result<Self, Error> { - Ok(match self { - uuid @ Value::Uuid(_) => uuid, - Value::String(ref string) => { + fn resolve_uuid(self, inner: &UuidSchema) -> Result<Self, Error> { + let value = match (self, inner) { + (uuid @ Value::Uuid(_), _) => uuid, + (Value::String(ref string), UuidSchema::String) => { Value::Uuid(Uuid::from_str(string).map_err(Details::ConvertStrToUuid)?) } - other => return Err(Details::GetUuid(other).into()), - }) + (Value::Bytes(ref bytes), UuidSchema::Bytes) => { + Value::Uuid(Uuid::from_slice(bytes).map_err(Details::ConvertSliceToUuid)?) + } + (Value::Fixed(n, ref bytes), UuidSchema::Fixed(_)) => { + if n != 16 { + return Err(Details::ConvertFixedToUuid(n).into()); + } + Value::Uuid(Uuid::from_slice(bytes).map_err(Details::ConvertSliceToUuid)?) + } + (other, _) => return Err(Details::GetUuid(other).into()), + }; + Ok(value) } fn resolve_bigdecimal(self) -> Result<Self, Error> { @@ -1870,7 +1886,34 @@ Field with name '"b"' is not a member of the map items"#, #[test] fn resolve_uuid() -> TestResult { let value = Value::Uuid(Uuid::parse_str("1481531d-ccc9-46d9-a56f-5b67459c0537")?); - assert!(value.clone().resolve(&Schema::Uuid).is_ok()); + assert!( + value + .clone() + .resolve(&Schema::Uuid(UuidSchema::String)) + .is_ok() + ); + assert!( + value + .clone() + .resolve(&Schema::Uuid(UuidSchema::Bytes)) + .is_ok() + ); + assert!( + value + .clone() + .resolve(&Schema::Uuid(UuidSchema::Fixed(FixedSchema { + name: Name { + name: String::new(), + namespace: None + }, + aliases: None, + doc: None, + size: 16, + default: None, + attributes: Default::default(), + }))) + .is_ok() + ); assert!(value.resolve(&Schema::TimestampMicros).is_err()); Ok(()) diff --git a/avro/tests/serde_human_readable_false.rs b/avro/tests/serde_human_readable_false.rs index 1a76ff5..3095657 100644 --- a/avro/tests/serde_human_readable_false.rs +++ b/avro/tests/serde_human_readable_false.rs @@ -35,11 +35,11 @@ fn avro_rs_53_uuid_with_fixed() -> TestResult { }; let mut buffer = Vec::new(); - // serialize the Uuid as Bytes + // serialize the Uuid as Fixed assert!(!apache_avro::util::set_serde_human_readable(false)); let bytes = SpecificSingleObjectWriter::<Comment>::with_capacity(64)? .write_ref(&payload, &mut buffer)?; - assert_eq!(bytes, 27); + assert_eq!(bytes, 26); Ok(()) } diff --git a/avro/tests/serde_human_readable_true.rs b/avro/tests/serde_human_readable_true.rs index b651b38..012dd7d 100644 --- a/avro/tests/serde_human_readable_true.rs +++ b/avro/tests/serde_human_readable_true.rs @@ -18,8 +18,7 @@ fn avro_rs_53_uuid_with_fixed_true() -> TestResult { "fields" : [ { "name" : "id", "type" : { - "type" : "fixed", - "size" : 16, + "type" : "string", "logicalType" : "uuid", "name": "FixedUUID" }
