This is an automated email from the ASF dual-hosted git repository.
kriskras99 pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/avro-rs.git
The following commit(s) were added to refs/heads/main by this push:
new 23a9af7 refactor: Simplify the UUID decoding logic (#255)
23a9af7 is described below
commit 23a9af70b2db35d9bb30344efeb9012b487e5309
Author: Kriskras99 <[email protected]>
AuthorDate: Mon Aug 11 17:53:28 2025 +0200
refactor: Simplify the UUID decoding logic (#255)
In the original implementation we first read the length and then
read the remainder either as `Fixed` if the length is 16 and `String`
if it is not. It uses a temporary `Vec` to be able to backtrack if
`Fixed` is wrong.
The backtrack can only happen if it fails to read a `Fixed` or if
it gets a different type than `Fixed` from `decode_internal`. The
last case can never happen, as `decode_internal` always returns the
type corresponding to the schema. The first case can only happen if
there are not enough bytes, because any byte pattern is valid for
`Fixed`. If `Fixed` cannot read enough bytes, than `String` won't
be able to read enough bytes either.
I did not add a fallback from `Uuid::from_slice` to `Uuuid::parse_str`
as an UUID string is at least 32 characters, so it would fail anyway.
---
avro/src/decode.rs | 63 ++++++++----------------------------------------------
1 file changed, 9 insertions(+), 54 deletions(-)
diff --git a/avro/src/decode.rs b/avro/src/decode.rs
index fc700ae..78fefbd 100644
--- a/avro/src/decode.rs
+++ b/avro/src/decode.rs
@@ -20,7 +20,6 @@ use crate::{
bigdecimal::deserialize_big_decimal,
decimal::Decimal,
duration::Duration,
- encode::encode_long,
error::Details,
schema::{
DecimalSchema, EnumSchema, FixedSchema, Name, Namespace, RecordSchema,
ResolvedSchema,
@@ -33,7 +32,6 @@ use std::{
borrow::Borrow,
collections::HashMap,
io::{ErrorKind, Read},
- str::FromStr,
};
use uuid::Uuid;
@@ -123,61 +121,18 @@ pub(crate) fn decode_internal<R: Read, S: Borrow<Schema>>(
}
}
Schema::Uuid => {
- let len = decode_len(reader)?;
- let mut bytes = vec![0u8; len];
- reader
- .read_exact(&mut bytes)
- .map_err(Details::ReadIntoBuf)?;
-
- // use a Vec to be able re-read the bytes more than once if needed
- let mut reader = Vec::with_capacity(len + 1);
- encode_long(len as i64, &mut reader)?;
- reader.extend_from_slice(&bytes);
-
- let decode_from_string = |reader| match decode_internal(
- &Schema::String,
- names,
- enclosing_namespace,
- reader,
- )? {
- Value::String(ref s) => {
- Uuid::from_str(s).map_err(|e|
Details::ConvertStrToUuid(e).into())
- }
- value =>
Err(Error::new(Details::GetUuidFromStringValue(value))),
+ let Value::Bytes(bytes) =
+ decode_internal(&Schema::Bytes, names, enclosing_namespace,
reader)?
+ else {
+ // Calling decode_internal with Schema::Bytes can only return
a Value::Bytes or an error
+ unreachable!();
};
- let uuid: Uuid = if len == 16 {
- // most probably a Fixed schema
- let fixed_result = decode_internal(
- &Schema::Fixed(FixedSchema {
- size: 16,
- name: "uuid".into(),
- aliases: None,
- doc: None,
- default: None,
- attributes: Default::default(),
- }),
- names,
- enclosing_namespace,
- &mut bytes.as_slice(),
- );
- if fixed_result.is_ok() {
- match fixed_result? {
- Value::Fixed(ref size, ref bytes) => {
- if *size != 16 {
- return
Err(Details::ConvertFixedToUuid(*size).into());
- }
-
Uuid::from_slice(bytes).map_err(Details::ConvertSliceToUuid)?
- }
- _ => decode_from_string(&mut reader.as_slice())?,
- }
- } else {
- // try to decode as string
- decode_from_string(&mut reader.as_slice())?
- }
+ let uuid = if bytes.len() == 16 {
+ Uuid::from_slice(&bytes).map_err(Details::ConvertSliceToUuid)?
} else {
- // definitely a string
- decode_from_string(&mut reader.as_slice())?
+ let string =
std::str::from_utf8(&bytes).map_err(Details::ConvertToUtf8Error)?;
+ Uuid::parse_str(string).map_err(Details::ConvertStrToUuid)?
};
Ok(Value::Uuid(uuid))
}