This is an automated email from the ASF dual-hosted git repository. kriskras99 pushed a commit to branch uuid in repository https://gitbox.apache.org/repos/asf/avro-rs.git
commit 731cebf81a96ee61c15f36939204a7023a53ef05 Author: Kriskras99 <[email protected]> AuthorDate: Fri Nov 14 11:06:21 2025 +0100 feat: Rework `Schema::Decimal` to not require a `Box<Schema>` --- avro/src/decode.rs | 54 ++++++++++++++++++--------------- avro/src/encode.rs | 24 ++++++++++----- avro/src/schema.rs | 73 +++++++++++++++++++++++++++------------------ avro/src/schema_equality.rs | 15 +++++++--- avro/src/ser_schema.rs | 48 ++++++++++++++--------------- avro/src/types.rs | 18 +++++------ avro/src/writer.rs | 15 +++++----- 7 files changed, 142 insertions(+), 105 deletions(-) diff --git a/avro/src/decode.rs b/avro/src/decode.rs index 78fefbd..5bdd657 100644 --- a/avro/src/decode.rs +++ b/avro/src/decode.rs @@ -15,6 +15,7 @@ // specific language governing permissions and limitations // under the License. +use crate::schema::InnerDecimalSchema; use crate::{ AvroResult, Error, bigdecimal::deserialize_big_decimal, @@ -101,18 +102,24 @@ pub(crate) fn decode_internal<R: Read, S: Borrow<Schema>>( } } } - Schema::Decimal(DecimalSchema { ref inner, .. }) => match &**inner { - Schema::Fixed { .. } => { - match decode_internal(inner, names, enclosing_namespace, reader)? { + Schema::Decimal(DecimalSchema { ref inner, .. }) => match inner { + InnerDecimalSchema::Fixed(fixed) => { + match decode_internal( + &Schema::Fixed(fixed.copy_only_size()), + names, + enclosing_namespace, + reader, + )? { Value::Fixed(_, bytes) => Ok(Value::Decimal(Decimal::from(bytes))), value => Err(Details::FixedValue(value).into()), } } - Schema::Bytes => match decode_internal(inner, names, enclosing_namespace, reader)? { - Value::Bytes(bytes) => Ok(Value::Decimal(Decimal::from(bytes))), - value => Err(Details::BytesValue(value).into()), - }, - schema => Err(Details::ResolveDecimalSchema(schema.into()).into()), + InnerDecimalSchema::Bytes => { + match decode_internal(&Schema::Bytes, names, enclosing_namespace, reader)? { + Value::Bytes(bytes) => Ok(Value::Decimal(Decimal::from(bytes))), + value => Err(Details::BytesValue(value).into()), + } + } }, Schema::BigDecimal => { match decode_internal(&Schema::Bytes, names, enclosing_namespace, reader)? { @@ -321,6 +328,7 @@ pub(crate) fn decode_internal<R: Read, S: Borrow<Schema>>( #[cfg(test)] #[allow(clippy::expect_fun_call)] mod tests { + use crate::schema::InnerDecimalSchema; use crate::{ Decimal, decode::decode, @@ -380,14 +388,13 @@ mod tests { fn test_negative_decimal_value() -> TestResult { use crate::{encode::encode, schema::Name}; use num_bigint::ToBigInt; - let inner = Box::new(Schema::Fixed( - FixedSchema::builder() - .name(Name::new("decimal")?) - .size(2) - .build(), - )); let schema = Schema::Decimal(DecimalSchema { - inner, + inner: InnerDecimalSchema::Fixed( + FixedSchema::builder() + .name(Name::new("decimal")?) + .size(2) + .build(), + ), precision: 4, scale: 2, }); @@ -408,16 +415,15 @@ mod tests { fn test_decode_decimal_with_bigger_than_necessary_size() -> TestResult { use crate::{encode::encode, schema::Name}; use num_bigint::ToBigInt; - let inner = Box::new(Schema::Fixed(FixedSchema { - size: 13, - name: Name::new("decimal")?, - aliases: None, - doc: None, - default: None, - attributes: Default::default(), - })); let schema = Schema::Decimal(DecimalSchema { - inner, + inner: InnerDecimalSchema::Fixed(FixedSchema { + size: 13, + name: Name::new("decimal")?, + aliases: None, + doc: None, + default: None, + attributes: Default::default(), + }), precision: 4, scale: 2, }); diff --git a/avro/src/encode.rs b/avro/src/encode.rs index f19352c..894b033 100644 --- a/avro/src/encode.rs +++ b/avro/src/encode.rs @@ -15,6 +15,7 @@ // specific language governing permissions and limitations // under the License. +use crate::schema::InnerDecimalSchema; use crate::{ AvroResult, bigdecimal::serialize_big_decimal, @@ -111,17 +112,24 @@ pub(crate) fn encode_internal<W: Write, S: Borrow<Schema>>( .write(&x.to_le_bytes()) .map_err(|e| Details::WriteBytes(e).into()), Value::Decimal(decimal) => match schema { - Schema::Decimal(DecimalSchema { inner, .. }) => match *inner.clone() { - Schema::Fixed(FixedSchema { size, .. }) => { - let bytes = decimal.to_sign_extended_bytes_with_len(size).unwrap(); + Schema::Decimal(DecimalSchema { inner, .. }) => match inner { + InnerDecimalSchema::Fixed(fixed) => { + let bytes = decimal.to_sign_extended_bytes_with_len(fixed.size)?; let num_bytes = bytes.len(); - if num_bytes != size { - return Err(Details::EncodeDecimalAsFixedError(num_bytes, size).into()); + if num_bytes != fixed.size { + return Err( + Details::EncodeDecimalAsFixedError(num_bytes, fixed.size).into() + ); } - encode(&Value::Fixed(size, bytes), inner, writer) + encode( + &Value::Fixed(fixed.size, bytes), + &Schema::Fixed(fixed.copy_only_size()), + writer, + ) + } + InnerDecimalSchema::Bytes => { + encode(&Value::Bytes(decimal.try_into()?), &Schema::Bytes, writer) } - Schema::Bytes => encode(&Value::Bytes(decimal.try_into()?), inner, writer), - _ => Err(Details::ResolveDecimalSchema(SchemaKind::from(*inner.clone())).into()), }, _ => Err(Details::EncodeValueAsSchemaError { value_kind: ValueKind::Decimal, diff --git a/avro/src/schema.rs b/avro/src/schema.rs index 8260cc2..84aa3d5 100644 --- a/avro/src/schema.rs +++ b/avro/src/schema.rs @@ -881,6 +881,23 @@ impl FixedSchema { Ok(map) } + + /// Create a new `FixedSchema` copying only the size. + /// + /// All other fields are `None` or empty. + pub(crate) fn copy_only_size(&self) -> Self { + Self { + name: Name { + name: String::new(), + namespace: None, + }, + aliases: None, + doc: None, + size: self.size, + default: None, + attributes: Default::default(), + } + } } /// A description of a Decimal schema. @@ -894,7 +911,26 @@ pub struct DecimalSchema { /// The number of digits to the right of the decimal point pub scale: DecimalMetadata, /// The inner schema of the decimal (fixed or bytes) - pub inner: Box<Schema>, + pub inner: InnerDecimalSchema, +} + +/// The inner schema of the Decimal type. +#[derive(Debug, Clone)] +pub enum InnerDecimalSchema { + Bytes, + Fixed(FixedSchema), +} + +impl TryFrom<Schema> for InnerDecimalSchema { + type Error = Error; + + fn try_from(value: Schema) -> Result<Self, Self::Error> { + match value { + Schema::Bytes => Ok(InnerDecimalSchema::Bytes), + Schema::Fixed(fixed) => Ok(InnerDecimalSchema::Fixed(fixed)), + _ => Err(Details::ResolveDecimalSchema(value.into()).into()), + } + } } /// A description of a Union schema @@ -1547,7 +1583,7 @@ impl Parser { Ok((precision, scale)) => Ok(Schema::Decimal(DecimalSchema { precision, scale, - inner: Box::new(inner), + inner: inner.try_into().unwrap_or_else(|_| unreachable!()), })), Err(err) => { warn!("Ignoring invalid decimal logical type: {err}"); @@ -2173,19 +2209,13 @@ impl Serialize for Schema { ref inner, }) => { let mut map = serializer.serialize_map(None)?; - match inner.as_ref() { - Schema::Fixed(fixed_schema) => { + match inner { + InnerDecimalSchema::Fixed(fixed_schema) => { map = fixed_schema.serialize_to_map::<S>(map)?; } - Schema::Bytes => { + InnerDecimalSchema::Bytes => { map.serialize_entry("type", "bytes")?; } - others => { - return Err(serde::ser::Error::custom(format!( - "DecimalSchema inner type must be Fixed or Bytes, got {:?}", - SchemaKind::from(others) - ))); - } } map.serialize_entry("logicalType", "decimal")?; map.serialize_entry("scale", scale)?; @@ -6883,14 +6913,14 @@ mod tests { let schema = Schema::Decimal(DecimalSchema { precision: 36, scale: 10, - inner: Box::new(Schema::Fixed(FixedSchema { + inner: InnerDecimalSchema::Fixed(FixedSchema { name: Name::new("decimal_36_10").unwrap(), aliases: None, doc: None, size: 16, default: None, attributes: Default::default(), - })), + }), }); let serialized_json = serde_json::to_string_pretty(&schema)?; @@ -6914,7 +6944,7 @@ mod tests { let schema = Schema::Decimal(DecimalSchema { precision: 36, scale: 10, - inner: Box::new(Schema::Bytes), + inner: InnerDecimalSchema::Bytes, }); let serialized_json = serde_json::to_string_pretty(&schema)?; @@ -6931,21 +6961,6 @@ mod tests { Ok(()) } - #[test] - fn test_avro_3925_serialize_decimal_inner_invalid() -> TestResult { - let schema = Schema::Decimal(DecimalSchema { - precision: 36, - scale: 10, - inner: Box::new(Schema::String), - }); - - let serialized_json = serde_json::to_string_pretty(&schema); - - assert!(serialized_json.is_err()); - - Ok(()) - } - #[test] fn test_avro_3927_serialize_array_with_custom_attributes() -> TestResult { let expected = Schema::array_with_attributes( diff --git a/avro/src/schema_equality.rs b/avro/src/schema_equality.rs index c14e5fb..20c114e 100644 --- a/avro/src/schema_equality.rs +++ b/avro/src/schema_equality.rs @@ -15,6 +15,7 @@ // specific language governing permissions and limitations // under the License. +use crate::schema::InnerDecimalSchema; use crate::{ Schema, schema::{ @@ -143,7 +144,13 @@ impl SchemataEq for StructFieldEq { Schema::Decimal(DecimalSchema { precision: precision_one, scale: scale_one, inner: inner_one }), Schema::Decimal(DecimalSchema { precision: precision_two, scale: scale_two, inner: inner_two }) ) => { - precision_one == precision_two && scale_one == scale_two && self.compare(inner_one, inner_two) + precision_one == precision_two && scale_one == scale_two && match (inner_one, inner_two) { + (InnerDecimalSchema::Bytes, InnerDecimalSchema::Bytes) => true, + (InnerDecimalSchema::Fixed(FixedSchema { size: size_one, .. }), InnerDecimalSchema::Fixed(FixedSchema { size: size_two, ..})) => { + size_one == size_two + } + _ => false, + } } (Schema::Decimal(_), _) => false, ( @@ -212,7 +219,7 @@ pub(crate) fn compare_schemata(schema_one: &Schema, schema_two: &Schema) -> bool #[allow(non_snake_case)] mod tests { use super::*; - use crate::schema::{Name, RecordFieldOrder}; + use crate::schema::{InnerDecimalSchema, Name, RecordFieldOrder}; use apache_avro_test_helper::TestResult; use serde_json::Value; use std::collections::BTreeMap; @@ -351,7 +358,7 @@ mod tests { let schema_one = Schema::Decimal(DecimalSchema { precision: 10, scale: 2, - inner: Box::new(Schema::Bytes), + inner: InnerDecimalSchema::Bytes, }); assert!(!SPECIFICATION_EQ.compare(&schema_one, &Schema::Boolean)); assert!(!STRUCT_FIELD_EQ.compare(&schema_one, &Schema::Boolean)); @@ -359,7 +366,7 @@ mod tests { let schema_two = Schema::Decimal(DecimalSchema { precision: 10, scale: 2, - inner: Box::new(Schema::Bytes), + inner: InnerDecimalSchema::Bytes, }); let specification_eq_res = SPECIFICATION_EQ.compare(&schema_one, &schema_two); diff --git a/avro/src/ser_schema.rs b/avro/src/ser_schema.rs index 7d0ed3e..87fbf8e 100644 --- a/avro/src/ser_schema.rs +++ b/avro/src/ser_schema.rs @@ -18,6 +18,7 @@ //! Logic for serde-compatible schema-aware serialization //! which writes directly to a `Write` stream +use crate::schema::InnerDecimalSchema; use crate::{ bigdecimal::big_decimal_as_bytes, encode::{encode_int, encode_long}, @@ -982,31 +983,30 @@ impl<'s, W: Write> SchemaAwareWriteSerializer<'s, W> { ))) } } - Schema::Decimal(decimal_schema) => match decimal_schema.inner.as_ref() { - Schema::Bytes => self.write_bytes(value), - Schema::Fixed(fixed_schema) => match fixed_schema.size.checked_sub(value.len()) { - Some(pad) => { - let pad_val = match value.len() { - 0 => 0, - _ => value[0], - }; - let padding = vec![pad_val; pad]; - self.writer - .write(padding.as_slice()) - .map_err(Details::WriteBytes)?; - self.writer - .write(value) - .map_err(|e| Details::WriteBytes(e).into()) - } - None => Err(Details::CompareFixedSizes { - size: fixed_schema.size, - n: value.len(), + Schema::Decimal(decimal_schema) => match &decimal_schema.inner { + InnerDecimalSchema::Bytes => self.write_bytes(value), + InnerDecimalSchema::Fixed(fixed_schema) => { + match fixed_schema.size.checked_sub(value.len()) { + Some(pad) => { + let pad_val = match value.len() { + 0 => 0, + _ => value[0], + }; + let padding = vec![pad_val; pad]; + self.writer + .write(padding.as_slice()) + .map_err(Details::WriteBytes)?; + self.writer + .write(value) + .map_err(|e| Details::WriteBytes(e).into()) + } + None => Err(Details::CompareFixedSizes { + size: fixed_schema.size, + n: value.len(), + } + .into()), } - .into()), - }, - unsupported => Err(create_error(format!( - "Decimal schema's inner should be Bytes or Fixed schema. Got: {unsupported}" - ))), + } }, Schema::Ref { name } => { let ref_schema = self.get_ref_schema(name)?; diff --git a/avro/src/types.rs b/avro/src/types.rs index 4448eef..ddd9781 100644 --- a/avro/src/types.rs +++ b/avro/src/types.rs @@ -16,6 +16,7 @@ // under the License. //! Logic handling the intermediate representation of Avro values. +use crate::schema::InnerDecimalSchema; use crate::{ AvroResult, Error, bigdecimal::{deserialize_big_decimal, serialize_big_decimal}, @@ -747,19 +748,18 @@ impl Value { self, precision: Precision, scale: Scale, - inner: &Schema, + inner: &InnerDecimalSchema, ) -> Result<Self, Error> { if scale > precision { return Err(Details::GetScaleAndPrecision { scale, precision }.into()); } match inner { - &Schema::Fixed(FixedSchema { size, .. }) => { + &InnerDecimalSchema::Fixed(FixedSchema { size, .. }) => { if max_prec_for_len(size)? < precision { return Err(Details::GetScaleWithFixedSize { size, precision }.into()); } } - Schema::Bytes => (), - _ => return Err(Details::ResolveDecimalSchema(inner.into()).into()), + InnerDecimalSchema::Bytes => (), }; match self { Value::Decimal(num) => { @@ -1716,7 +1716,7 @@ Field with name '"b"' is not a member of the map items"#, value.clone().resolve(&Schema::Decimal(DecimalSchema { precision: 10, scale: 4, - inner: Box::new(Schema::Bytes), + inner: InnerDecimalSchema::Bytes, }))?; assert!(value.resolve(&Schema::String).is_err()); @@ -1731,7 +1731,7 @@ Field with name '"b"' is not a member of the map items"#, .resolve(&Schema::Decimal(DecimalSchema { precision: 2, scale: 3, - inner: Box::new(Schema::Bytes), + inner: InnerDecimalSchema::Bytes, })) .is_err() ); @@ -1745,7 +1745,7 @@ Field with name '"b"' is not a member of the map items"#, .resolve(&Schema::Decimal(DecimalSchema { precision: 1, scale: 0, - inner: Box::new(Schema::Bytes), + inner: InnerDecimalSchema::Bytes, })) .is_ok() ); @@ -1760,14 +1760,14 @@ Field with name '"b"' is not a member of the map items"#, .resolve(&Schema::Decimal(DecimalSchema { precision: 10, scale: 1, - inner: Box::new(Schema::Fixed(FixedSchema { + inner: InnerDecimalSchema::Fixed(FixedSchema { name: Name::new("decimal").unwrap(), aliases: None, size: 20, doc: None, default: None, attributes: Default::default(), - })) + }) })) .is_ok() ); diff --git a/avro/src/writer.rs b/avro/src/writer.rs index 4bf2cde..5e0c9a7 100644 --- a/avro/src/writer.rs +++ b/avro/src/writer.rs @@ -787,6 +787,7 @@ mod tests { use serde::{Deserialize, Serialize}; use uuid::Uuid; + use crate::schema::InnerDecimalSchema; use crate::{codec::DeflateSettings, error::Details}; use apache_avro_test_helper::TestResult; @@ -988,41 +989,41 @@ mod tests { #[test] fn decimal_fixed() -> TestResult { let size = 30; - let inner = Schema::Fixed(FixedSchema { + let fixed = FixedSchema { name: Name::new("decimal")?, aliases: None, doc: None, size, default: None, attributes: Default::default(), - }); + }; + let inner = InnerDecimalSchema::Fixed(fixed.clone()); let value = vec![0u8; size]; logical_type_test( r#"{"type": {"type": "fixed", "size": 30, "name": "decimal"}, "logicalType": "decimal", "precision": 20, "scale": 5}"#, &Schema::Decimal(DecimalSchema { precision: 20, scale: 5, - inner: Box::new(inner.clone()), + inner: inner.clone(), }), Value::Decimal(Decimal::from(value.clone())), - &inner, + &Schema::Fixed(fixed), Value::Fixed(size, value), ) } #[test] fn decimal_bytes() -> TestResult { - let inner = Schema::Bytes; let value = vec![0u8; 10]; logical_type_test( r#"{"type": "bytes", "logicalType": "decimal", "precision": 4, "scale": 3}"#, &Schema::Decimal(DecimalSchema { precision: 4, scale: 3, - inner: Box::new(inner.clone()), + inner: InnerDecimalSchema::Bytes, }), Value::Decimal(Decimal::from(value.clone())), - &inner, + &Schema::Bytes, value, ) }
