This is an automated email from the ASF dual-hosted git repository. kriskras99 pushed a commit to branch fix/uuid_bytes_vs_string in repository https://gitbox.apache.org/repos/asf/avro-rs.git
commit 2d77a928f7b4e9f57bff7989768fb1cbd6d438ba Author: default <[email protected]> AuthorDate: Mon Jan 26 19:40:13 2026 +0000 fix: Don't allow serializing `bytes` as a `Uuid::String` --- avro/Cargo.toml | 1 + avro/src/serde/ser_schema.rs | 20 ++++++-- avro/tests/serde_human_readable_false.rs | 82 +++++++++++++++++++++++++++++++- avro/tests/serde_human_readable_true.rs | 77 +++++++++++++++++++++++++++++- 4 files changed, 173 insertions(+), 7 deletions(-) diff --git a/avro/Cargo.toml b/avro/Cargo.toml index 2cc9229..49c3c88 100644 --- a/avro/Cargo.toml +++ b/avro/Cargo.toml @@ -82,6 +82,7 @@ quad-rand = { default-features = false, version = "0.2.3" } rand = { default-features = false, version = "0.9.2", features = ["default"] } [dev-dependencies] +apache-avro-derive = { default-features = false, version = "0.22.0", path = "../avro_derive" } apache-avro-test-helper = { default-features = false, version = "0.22.0", path = "../avro_test_helper" } criterion = { default-features = false, version = "0.8.1" } hex-literal = { default-features = false, version = "1.1.0" } diff --git a/avro/src/serde/ser_schema.rs b/avro/src/serde/ser_schema.rs index 3ca05e1..d14ac97 100644 --- a/avro/src/serde/ser_schema.rs +++ b/avro/src/serde/ser_schema.rs @@ -1218,10 +1218,17 @@ impl<'s, W: Write> SchemaAwareWriteSerializer<'s, W> { }; match schema { - Schema::String - | Schema::Bytes - | Schema::Uuid(UuidSchema::Bytes | UuidSchema::String) - | Schema::BigDecimal => self.write_bytes(value), + Schema::String | Schema::Bytes | Schema::BigDecimal => self.write_bytes(value), + Schema::Uuid(UuidSchema::Bytes) => { + if value.len() == 16 { + self.write_bytes(value) + } else { + Err(create_error(format!( + "Expected 16 bytes for `Schema::Uuid(Bytes) but got {} bytes", + value.len() + ))) + } + } Schema::Fixed(fixed_schema) | Schema::Uuid(UuidSchema::Fixed(fixed_schema)) => { if value.len() == fixed_schema.size { self.writer @@ -1282,7 +1289,6 @@ impl<'s, W: Write> SchemaAwareWriteSerializer<'s, W> { match variant_schema { Schema::String | Schema::Bytes - | Schema::Uuid(UuidSchema::Bytes | UuidSchema::String) | Schema::BigDecimal | Schema::Decimal(DecimalSchema { inner: InnerDecimalSchema::Bytes, @@ -1292,6 +1298,10 @@ impl<'s, W: Write> SchemaAwareWriteSerializer<'s, W> { encode_int(i as i32, &mut *self.writer)?; return self.serialize_bytes_with_schema(value, variant_schema); } + Schema::Uuid(UuidSchema::Bytes) if value.len() == 16 => { + encode_int(i as i32, &mut *self.writer)?; + return self.serialize_bytes_with_schema(value, variant_schema); + } Schema::Fixed(fixed) | Schema::Uuid(UuidSchema::Fixed(fixed)) if fixed.size == value.len() => { diff --git a/avro/tests/serde_human_readable_false.rs b/avro/tests/serde_human_readable_false.rs index 8e37b04..71c463f 100644 --- a/avro/tests/serde_human_readable_false.rs +++ b/avro/tests/serde_human_readable_false.rs @@ -15,9 +15,10 @@ // specific language governing permissions and limitations // under the License. -use apache_avro::{AvroSchema, Schema, SpecificSingleObjectWriter}; +use apache_avro::{AvroSchema, Schema, SpecificSingleObjectWriter, schema::UuidSchema}; use apache_avro_test_helper::TestResult; use serde::{Deserialize, Serialize}; +use uuid::Uuid; #[test] fn avro_rs_53_uuid_with_fixed() -> TestResult { @@ -60,3 +61,82 @@ fn avro_rs_53_uuid_with_fixed() -> TestResult { Ok(()) } + +#[test] +fn avro_rs_440_uuid_string() -> TestResult { + #[derive(apache_avro_derive::AvroSchema, Serialize, Deserialize)] + #[serde(transparent)] + struct CustomUuid { + #[avro(with = || Schema::Uuid(UuidSchema::String))] + inner: Uuid, + } + let uuid = CustomUuid { + inner: Uuid::parse_str("550e8400-e29b-41d4-a716-446655440000")?, + }; + let mut buffer = Vec::new(); + + assert!(!apache_avro::util::set_serde_human_readable(false)); + let mut writer = SpecificSingleObjectWriter::with_capacity(64)?; + assert!(writer.write(uuid, &mut buffer).unwrap_err().to_string().contains("Failed to serialize value of type bytes using schema Uuid(String): 55e840e29b41d4a7164466554400. Cause: Expected String, Bytes, Uuid, BigDecimal, Fixed, Duration, Decimal, Ref or Union schema. Got: Uuid")); + + Ok(()) +} + +#[test] +fn avro_rs_440_uuid_bytes() -> TestResult { + #[derive(apache_avro_derive::AvroSchema, Serialize, Deserialize)] + #[serde(transparent)] + struct CustomUuid { + #[avro(with = || Schema::Uuid(UuidSchema::Bytes))] + inner: Uuid, + } + let uuid = CustomUuid { + inner: Uuid::parse_str("550e8400-e29b-41d4-a716-446655440000")?, + }; + let mut buffer = Vec::new(); + + assert!(!apache_avro::util::set_serde_human_readable(false)); + let mut writer = SpecificSingleObjectWriter::with_capacity(64)?; + writer.write(uuid, &mut buffer)?; + + assert_eq!( + buffer.as_slice(), + &[ + 195, 1, 46, 208, 56, 148, 57, 0, 104, 249, 32, 85, 14, 132, 0, 226, 155, 65, 212, 167, + 22, 68, 102, 85, 68, 0, 0 + ][..] + ); + + Ok(()) +} + +#[test] +fn avro_rs_440_uuid_fixed() -> TestResult { + #[derive(apache_avro_derive::AvroSchema, Serialize, Deserialize)] + #[serde(transparent)] + struct CustomUuid { + inner: Uuid, + } + assert!(matches!( + CustomUuid::get_schema(), + Schema::Uuid(UuidSchema::Fixed(_)) + )); + let uuid = CustomUuid { + inner: Uuid::parse_str("550e8400-e29b-41d4-a716-446655440000")?, + }; + let mut buffer = Vec::new(); + + assert!(!apache_avro::util::set_serde_human_readable(false)); + let mut writer = SpecificSingleObjectWriter::with_capacity(64)?; + writer.write(uuid, &mut buffer)?; + + assert_eq!( + buffer.as_slice(), + &[ + 195, 1, 22, 19, 155, 41, 216, 175, 73, 144, 85, 14, 132, 0, 226, 155, 65, 212, 167, 22, + 68, 102, 85, 68, 0, 0 + ][..] + ); + + Ok(()) +} diff --git a/avro/tests/serde_human_readable_true.rs b/avro/tests/serde_human_readable_true.rs index 75a2818..fbdafc7 100644 --- a/avro/tests/serde_human_readable_true.rs +++ b/avro/tests/serde_human_readable_true.rs @@ -15,9 +15,10 @@ // specific language governing permissions and limitations // under the License. -use apache_avro::{AvroSchema, Schema, SpecificSingleObjectWriter}; +use apache_avro::{AvroSchema, Schema, SpecificSingleObjectWriter, schema::UuidSchema}; use apache_avro_test_helper::TestResult; use serde::{Deserialize, Serialize}; +use uuid::Uuid; #[test] fn avro_rs_53_uuid_with_string_true() -> TestResult { @@ -59,3 +60,77 @@ fn avro_rs_53_uuid_with_string_true() -> TestResult { Ok(()) } + +#[test] +fn avro_rs_440_uuid_string() -> TestResult { + #[derive(apache_avro_derive::AvroSchema, Serialize, Deserialize)] + #[serde(transparent)] + struct CustomUuid { + #[avro(with = || Schema::Uuid(UuidSchema::String))] + inner: Uuid, + } + let uuid = CustomUuid { + inner: Uuid::parse_str("550e8400-e29b-41d4-a716-446655440000")?, + }; + let mut buffer = Vec::new(); + + assert!(apache_avro::util::set_serde_human_readable(true)); + let mut writer = SpecificSingleObjectWriter::with_capacity(64)?; + writer.write(uuid, &mut buffer)?; + + assert_eq!( + String::from_utf8_lossy(&buffer), + "�\u{1}'G�8�[\u{4}�H550e8400-e29b-41d4-a716-446655440000" + ); + + Ok(()) +} + +#[test] +fn avro_rs_440_uuid_bytes() -> TestResult { + #[derive(apache_avro_derive::AvroSchema, Serialize, Deserialize)] + #[serde(transparent)] + struct CustomUuid { + #[avro(with = || Schema::Uuid(UuidSchema::Bytes))] + inner: Uuid, + } + let uuid = CustomUuid { + inner: Uuid::parse_str("550e8400-e29b-41d4-a716-446655440000")?, + }; + let mut buffer = Vec::new(); + + assert!(apache_avro::util::set_serde_human_readable(true)); + let mut writer = SpecificSingleObjectWriter::with_capacity(64)?; + assert_eq!( + writer.write(uuid, &mut buffer).unwrap_err().to_string(), + "Failed to serialize value of type string using schema Uuid(Bytes): 550e8400-e29b-41d4-a716-446655440000. Cause: Expected: Uuid. Got: String" + ); + + Ok(()) +} + +#[test] +fn avro_rs_440_uuid_fixed() -> TestResult { + #[derive(apache_avro_derive::AvroSchema, Serialize, Deserialize)] + #[serde(transparent)] + struct CustomUuid { + inner: Uuid, + } + assert!(matches!( + CustomUuid::get_schema(), + Schema::Uuid(UuidSchema::Fixed(_)) + )); + let uuid = CustomUuid { + inner: Uuid::parse_str("550e8400-e29b-41d4-a716-446655440000")?, + }; + let mut buffer = Vec::new(); + + assert!(apache_avro::util::set_serde_human_readable(true)); + let mut writer = SpecificSingleObjectWriter::with_capacity(64)?; + assert_eq!( + writer.write(uuid, &mut buffer).unwrap_err().to_string(), + r#"Failed to serialize value of type string using schema Uuid(Fixed(FixedSchema { name: Name { name: "uuid", namespace: None }, aliases: None, doc: None, size: 16, default: None, attributes: {} })): 550e8400-e29b-41d4-a716-446655440000. Cause: Expected: Uuid. Got: String"# + ); + + Ok(()) +}
