This is an automated email from the ASF dual-hosted git repository. kriskras99 pushed a commit to branch feat/documentation in repository https://gitbox.apache.org/repos/asf/avro-rs.git
commit 4d18e9f3488f3063b0b04681a15b0e796f87404c Author: Kriskras99 <[email protected]> AuthorDate: Sat Jan 31 19:36:39 2026 +0100 docs: Further work --- avro/src/bytes.rs | 91 ++++++--------- avro/src/documentation/primer.rs | 28 +++-- avro/src/lib.rs | 239 --------------------------------------- avro/src/schema/mod.rs | 28 ++++- avro/src/schema_compatibility.rs | 6 +- avro/src/schema_equality.rs | 34 ++++++ avro/src/serde/derive.rs | 23 ++-- avro/src/serde/mod.rs | 8 ++ avro/src/serde/with.rs | 8 +- avro/src/validator.rs | 33 ++++++ avro/tests/io.rs | 2 +- avro_derive/tests/ui.rs | 2 +- 12 files changed, 176 insertions(+), 326 deletions(-) diff --git a/avro/src/bytes.rs b/avro/src/bytes.rs index 534ce47..341b01d 100644 --- a/avro/src/bytes.rs +++ b/avro/src/bytes.rs @@ -15,20 +15,21 @@ // specific language governing permissions and limitations // under the License. -//! Deprecated. See [`apache_avro::serde::*`] instead. +//! Deprecated. Use [`apache_avro::serde::*`] instead. //! //! [`apache_avro::serde::*`](crate::serde) -// Deprecated. See [`apache_avro::serde::bytes`] instead. -// -// [`apache_avro::serde::bytes`](crate::serde::bytes) +#[cfg(doc)] +use crate as apache_avro; + +/// Deprecated. Use [`apache_avro::serde::bytes`] instead. #[deprecated(since = "0.22.0", note = "Use `apache_avro::serde::bytes` instead")] pub mod serde_avro_bytes { + #[cfg(doc)] + use crate as apache_avro; use serde::{Deserializer, Serializer}; - // Deprecated. See [`apache_avro::serde::bytes::serialize`] instead. - // - // [`apache_avro::serde::bytes::serialize`](crate::serde::bytes::serialize) + /// Deprecated. Use [`apache_avro::serde::bytes::serialize`] instead. #[deprecated( since = "0.22.0", note = "Use `apache_avro::serde::bytes::serialize` instead" @@ -40,9 +41,7 @@ pub mod serde_avro_bytes { crate::serde::bytes::serialize(bytes, serializer) } - // Deprecated. See [`apache_avro::serde::bytes::deserialize`] instead. - // - // [`apache_avro::serde::bytes::deserialize`](crate::serde::bytes::deserialize) + /// Deprecated. Use [`apache_avro::serde::bytes::deserialize`] instead. #[deprecated( since = "0.22.0", note = "Use `apache_avro::serde::bytes::deserialize` instead" @@ -55,17 +54,17 @@ pub mod serde_avro_bytes { } } -// Deprecated. See [`apache_avro::serde::bytes_opt`] instead. -// -// [`apache_avro::serde::bytes_opt`](crate::serde::bytes_opt) +/// Deprecated. Use [`apache_avro::serde::bytes_opt`] instead. +/// +/// [`apache_avro::serde::bytes_opt`](crate::serde::bytes_opt) #[deprecated(since = "0.22.0", note = "Use `apache_avro::serde::bytes_opt` instead")] pub mod serde_avro_bytes_opt { + #[cfg(doc)] + use crate as apache_avro; use serde::{Deserializer, Serializer}; use std::borrow::Borrow; - // Deprecated. See [`apache_avro::serde::bytes_opt::serialize`] instead. - // - // [`apache_avro::serde::bytes_opt::serialize`](crate::serde::bytes_opt::serialize) + /// Deprecated. Use [`apache_avro::serde::bytes_opt::serialize`] instead. #[deprecated( since = "0.22.0", note = "Use `apache_avro::serde::bytes_opt::serialize` instead" @@ -78,9 +77,7 @@ pub mod serde_avro_bytes_opt { crate::serde::bytes_opt::serialize(bytes, serializer) } - // Deprecated. See [`apache_avro::serde::bytes_opt::deserialize`] instead. - // - // [`apache_avro::serde::bytes_opt::deserialize`](crate::serde::bytes_opt::deserialize) + /// Deprecated. Use [`apache_avro::serde::bytes_opt::deserialize`] instead. #[deprecated( since = "0.22.0", note = "Use `apache_avro::serde::bytes_opt::deserialize` instead" @@ -93,16 +90,14 @@ pub mod serde_avro_bytes_opt { } } -// Deprecated. See [`apache_avro::serde::fixed`] instead. -// -// [`apache_avro::serde::fixed`](crate::serde::fixed) +/// Deprecated. Use [`apache_avro::serde::fixed`] instead. #[deprecated(since = "0.22.0", note = "Use `apache_avro::serde::fixed` instead")] pub mod serde_avro_fixed { + #[cfg(doc)] + use crate as apache_avro; use serde::{Deserializer, Serializer}; - // Deprecated. See [`apache_avro::serde::fixed::serialize`] instead. - // - // [`apache_avro::serde::fixed::serialize`](crate::serde::fixed::serialize) + /// Deprecated. Use [`apache_avro::serde::fixed::serialize`] instead. #[deprecated( since = "0.22.0", note = "Use `apache_avro::serde::fixed::serialize` instead" @@ -114,9 +109,7 @@ pub mod serde_avro_fixed { crate::serde::fixed::serialize(bytes, serializer) } - // Deprecated. See [`apache_avro::serde::fixed::deserialize`] instead. - // - // [`apache_avro::serde::fixed::deserialize`](crate::serde::fixed::deserialize) + /// Deprecated. Use [`apache_avro::serde::fixed::deserialize`] instead. #[deprecated( since = "0.22.0", note = "Use `apache_avro::serde::fixed::deserialize` instead" @@ -129,17 +122,15 @@ pub mod serde_avro_fixed { } } -// Deprecated. See [`apache_avro::serde::fixed_opt`] instead. -// -// [`apache_avro::serde::fixed_opt`](crate::serde::fixed_opt) +/// Deprecated. Use [`apache_avro::serde::fixed_opt`] instead. #[deprecated(since = "0.22.0", note = "Use `apache_avro::serde::fixed_opt` instead")] pub mod serde_avro_fixed_opt { + #[cfg(doc)] + use crate as apache_avro; use serde::{Deserializer, Serializer}; use std::borrow::Borrow; - // Deprecated. See [`apache_avro::serde::fixed_opt::serialize`] instead. - // - // [`apache_avro::serde::fixed_opt::serialize`](crate::serde::fixed_opt::serialize) + /// Deprecated. Use [`apache_avro::serde::fixed_opt::serialize`] instead. #[deprecated( since = "0.22.0", note = "Use `apache_avro::serde::fixed_opt::serialize` instead" @@ -152,9 +143,7 @@ pub mod serde_avro_fixed_opt { crate::serde::fixed_opt::serialize(bytes, serializer) } - // Deprecated. See [`apache_avro::serde::fixed_opt::deserialize`] instead. - // - // [`apache_avro::serde::fixed_opt::deserialize`](crate::serde::fixed_opt::deserialize) + /// Deprecated. Use [`apache_avro::serde::fixed_opt::deserialize`] instead. #[deprecated( since = "0.22.0", note = "Use `apache_avro::serde::fixed_opt::deserialize` instead" @@ -167,16 +156,14 @@ pub mod serde_avro_fixed_opt { } } -// Deprecated. See [`apache_avro::serde::slice`] instead. -// -// [`apache_avro::serde::slice`](crate::serde::slice) +/// Deprecated. Use [`apache_avro::serde::slice`] instead. #[deprecated(since = "0.22.0", note = "Use `apache_avro::serde::slice` instead")] pub mod serde_avro_slice { + #[cfg(doc)] + use crate as apache_avro; use serde::{Deserializer, Serializer}; - // Deprecated. See [`apache_avro::serde::slice::serialize`] instead. - // - // [`apache_avro::serde::slice::serialize`](crate::serde::slice::serialize) + /// Deprecated. Use [`apache_avro::serde::slice::serialize`] instead. #[deprecated( since = "0.22.0", note = "Use `apache_avro::serde::slice::serialize` instead" @@ -188,9 +175,7 @@ pub mod serde_avro_slice { crate::serde::slice::serialize(bytes, serializer) } - // Deprecated. See [`apache_avro::serde::slice::deserialize`] instead. - // - // [`apache_avro::serde::slice::deserialize`](crate::serde::slice::deserialize) + /// Deprecated. Use [`apache_avro::serde::slice::deserialize`] instead. #[deprecated( since = "0.22.0", note = "Use `apache_avro::serde::slice::deserialize` instead" @@ -203,17 +188,15 @@ pub mod serde_avro_slice { } } -// Deprecated. See [`apache_avro::serde::slice_opt`] instead. -// -// [`apache_avro::serde::slice_opt`](crate::serde::slice_opt) +/// Deprecated. Use [`apache_avro::serde::slice_opt`] instead. #[deprecated(since = "0.22.0", note = "Use `apache_avro::serde::slice_opt` instead")] pub mod serde_avro_slice_opt { + #[cfg(doc)] + use crate as apache_avro; use serde::{Deserializer, Serializer}; use std::borrow::Borrow; - // Deprecated. See [`apache_avro::serde::slice_opt::serialize`] instead. - // - // [`apache_avro::serde::slice_opt::serialize`](crate::serde::slice_opt::serialize) + /// Deprecated. Use [`apache_avro::serde::slice_opt::serialize`] instead. #[deprecated( since = "0.22.0", note = "Use `apache_avro::serde::slice_opt::serialize` instead" @@ -226,9 +209,7 @@ pub mod serde_avro_slice_opt { crate::serde::slice_opt::serialize(bytes, serializer) } - // Deprecated. See [`apache_avro::serde::slice_opt::deserialize`] instead. - // - // [`apache_avro::serde::slice_opt::deserialize`](crate::serde::slice_opt::deserialize) + /// Deprecated. Use [`apache_avro::serde::slice_opt::deserialize`] instead. #[deprecated( since = "0.22.0", note = "Use `apache_avro::serde::slice_opt::deserialize` instead" diff --git a/avro/src/documentation/primer.rs b/avro/src/documentation/primer.rs index 392df76..7dc6228 100644 --- a/avro/src/documentation/primer.rs +++ b/avro/src/documentation/primer.rs @@ -37,34 +37,42 @@ //! //! [the schema section of the specification]: https://avro.apache.org/docs/++version++/specification/#schema-declaration //! -//! ## File formats -//! There are three official file formats for Avro. The data in these file formats is all encoded the same, but they differ -//! in how the schema is included. +//! ## Data serialization and deserialization +//! There are various formats to encode and decode Avro data. Most formats use the Avro binary encoding. //! //! #### [Object Container File](https://avro.apache.org/docs/++version++/specification/#object-container-files) -//! This is the most common file format used for Avro. It includes the schema in the file, and can therefore be decoded by -//! a reader who doesn't have the schema. It also supports including many records in one file. +//! This is the most common file format used for Avro, it uses the binary encoding. It includes the +//! schema in the file, and can therefore be decoded by a reader who doesn't have the schema. It includes +//! many records in one file. //! //! This file format can be used via the [`Reader`](crate::Reader) and [`Writer`](crate::Writer) types. //! //! #### [Single Object Encoding](https://avro.apache.org/docs/++version++/specification/#single-object-encoding) -//! In this file format, the schema is not included directly. It instead includes a fingerprint of the schema, which a reader -//! can look up in a schema database or compare with the fingerprint that the reader is expecting. This file format always contains -//! one record. +//! This file format also uses the binary encoding, but the schema is not included directly. It instead +//! includes a fingerprint of the schema, which a reader can look up in a schema database or compare +//! with the fingerprint that the reader is expecting. This file format always contains one record. //! //! This file format can be used via the [`GenericSingleObjectReader`](crate::GenericSingleObjectReader), //! [`GenericSingleObjectWriter`](crate::GenericSingleObjectReader), [`SpecificSingleObjectReader`](crate::SpecificSingleObjectReader), //! and [`SpecificSingleObjectWriter`](crate::SpecificSingleObjectWriter) types. //! //! #### Avro datums -//! This is not really a file format, as it's just the raw Avro encoded data. It does not include a schema and can therefore not be -//! decoded without the reader knowing **exactly** which schema was used to write it. +//! This is not really a file format, as it's just the raw Avro binary data. It does not include a +//! schema and can therefore not be decoded without the reader knowing **exactly** which schema was +//! used to write it. //! //! This file format can be used via the [`to_avro_datum`](crate::to_avro_datum), [`from_avro_datum`](crate::from_avro_datum), //! [`to_avro_datum_schemata`](crate::to_avro_datum_schemata), [`from_avro_datum_schemata`](crate::from_avro_datum_schemata), //! [`from_avro_datum_reader_schemata`](crate::from_avro_datum_reader_schemata), and //! [`write_avro_datum_ref`](crate::write_avro_datum_ref) functions. //! +//! #### [Avro JSON](https://avro.apache.org/docs/++version++/specification/#json-encoding) +//! Not be confused with the schema definition which is also in JSON. This is the Avro data encoded +//! in JSON. +//! +//! It can be used via the [`From<serde_json::Value> for Value`](crate::types::Value) and +//! [`TryFrom<Value> for serde_json::Value`](crate::types::Value) implementations. +//! //! ## Compression //! For records with low entropy it can be useful to compress the encoded data. Using the [#Object Container File] //! this is directly possible in Avro. Avro supports various compression codecs: diff --git a/avro/src/lib.rs b/avro/src/lib.rs index 53eff27..9b31bab 100644 --- a/avro/src/lib.rs +++ b/avro/src/lib.rs @@ -422,171 +422,6 @@ //! } //! ``` //! -//! `apache-avro` also supports the logical types listed in the [Avro specification](https://avro.apache.org/docs/current/specification/#logical-types): -//! -//! 1. `Decimal` using the [`num_bigint`](https://docs.rs/num-bigint/latest/num_bigint) crate -//! 1. UUID using the [`uuid`](https://docs.rs/uuid/latest/uuid) crate -//! 1. Date, Time (milli) as `i32` and Time (micro) as `i64` -//! 1. Timestamp (milli and micro) as `i64` -//! 1. Local timestamp (milli and micro) as `i64` -//! 1. Duration as a custom type with `months`, `days` and `millis` accessor methods each of which returns an `i32` -//! -//! Note that the on-disk representation is identical to the underlying primitive/complex type. -//! -//! ### Read and write logical types -//! -//! ```rust -//! use apache_avro::{ -//! types::Record, types::Value, Codec, Days, Decimal, DeflateSettings, Duration, Millis, Months, Reader, Schema, -//! Writer, Error, -//! }; -//! use num_bigint::ToBigInt; -//! -//! fn main() -> Result<(), Error> { -//! let raw_schema = r#" -//! { -//! "type": "record", -//! "name": "test", -//! "fields": [ -//! { -//! "name": "decimal_fixed", -//! "type": { -//! "type": "fixed", -//! "size": 2, -//! "name": "decimal" -//! }, -//! "logicalType": "decimal", -//! "precision": 4, -//! "scale": 2 -//! }, -//! { -//! "name": "decimal_var", -//! "type": "bytes", -//! "logicalType": "decimal", -//! "precision": 10, -//! "scale": 3 -//! }, -//! { -//! "name": "uuid", -//! "type": "string", -//! "logicalType": "uuid" -//! }, -//! { -//! "name": "date", -//! "type": "int", -//! "logicalType": "date" -//! }, -//! { -//! "name": "time_millis", -//! "type": "int", -//! "logicalType": "time-millis" -//! }, -//! { -//! "name": "time_micros", -//! "type": "long", -//! "logicalType": "time-micros" -//! }, -//! { -//! "name": "timestamp_millis", -//! "type": "long", -//! "logicalType": "timestamp-millis" -//! }, -//! { -//! "name": "timestamp_micros", -//! "type": "long", -//! "logicalType": "timestamp-micros" -//! }, -//! { -//! "name": "local_timestamp_millis", -//! "type": "long", -//! "logicalType": "local-timestamp-millis" -//! }, -//! { -//! "name": "local_timestamp_micros", -//! "type": "long", -//! "logicalType": "local-timestamp-micros" -//! }, -//! { -//! "name": "duration", -//! "type": { -//! "type": "fixed", -//! "size": 12, -//! "name": "duration" -//! }, -//! "logicalType": "duration" -//! } -//! ] -//! } -//! "#; -//! -//! let schema = Schema::parse_str(raw_schema)?; -//! -//! println!("{:?}", schema); -//! -//! let mut writer = Writer::with_codec(&schema, Vec::new(), Codec::Deflate(DeflateSettings::default())).unwrap() ; -//! -//! let mut record = Record::new(writer.schema()).unwrap(); -//! record.put("decimal_fixed", Decimal::from(9936.to_bigint().unwrap().to_signed_bytes_be())); -//! record.put("decimal_var", Decimal::from(((-32442).to_bigint().unwrap()).to_signed_bytes_be())); -//! record.put("uuid", uuid::Uuid::parse_str("550e8400-e29b-41d4-a716-446655440000").unwrap()); -//! record.put("date", Value::Date(1)); -//! record.put("time_millis", Value::TimeMillis(2)); -//! record.put("time_micros", Value::TimeMicros(3)); -//! record.put("timestamp_millis", Value::TimestampMillis(4)); -//! record.put("timestamp_micros", Value::TimestampMicros(5)); -//! record.put("timestamp_nanos", Value::TimestampNanos(6)); -//! record.put("local_timestamp_millis", Value::LocalTimestampMillis(4)); -//! record.put("local_timestamp_micros", Value::LocalTimestampMicros(5)); -//! record.put("local_timestamp_nanos", Value::LocalTimestampMicros(6)); -//! record.put("duration", Duration::new(Months::new(6), Days::new(7), Millis::new(8))); -//! -//! writer.append_value(record)?; -//! -//! let input = writer.into_inner()?; -//! let reader = Reader::with_schema(&schema, &input[..])?; -//! -//! for record in reader { -//! println!("{:?}", record?); -//! } -//! Ok(()) -//! } -//! ``` -//! -//! ## Calculate Avro schema fingerprint -//! -//! This library supports calculating the following fingerprints: -//! -//! - SHA-256 -//! - MD5 -//! - Rabin -//! -//! An example of fingerprinting for the supported fingerprints: -//! -//! ```rust -//! use apache_avro::rabin::Rabin; -//! use apache_avro::{Schema, Error}; -//! use md5::Md5; -//! use sha2::Sha256; -//! -//! fn main() -> Result<(), Error> { -//! let raw_schema = r#" -//! { -//! "type": "record", -//! "name": "test", -//! "fields": [ -//! {"name": "a", "type": "long", "default": 42}, -//! {"name": "b", "type": "string"} -//! ] -//! } -//! "#; -//! let schema = Schema::parse_str(raw_schema)?; -//! println!("{}", schema.fingerprint::<Sha256>()); -//! println!("{}", schema.fingerprint::<Md5>()); -//! println!("{}", schema.fingerprint::<Rabin>()); -//! Ok(()) -//! } -//! ``` -//! //! ## Ill-formed data //! //! In order to ease decoding, the Binary Encoding specification of Avro data @@ -616,80 +451,6 @@ //! //! ``` //! -//! ## Custom names validators -//! -//! By default the library follows the rules by the -//! [Avro specification](https://avro.apache.org/docs/1.11.1/specification/#names)! -//! -//! Some of the other Apache Avro language SDKs are not that strict and allow more -//! characters in names. For interoperability with those SDKs, the library provides -//! a way to customize the names validation. -//! -//! ```rust -//! use apache_avro::AvroResult; -//! use apache_avro::schema::Namespace; -//! use apache_avro::validator::{SchemaNameValidator, set_schema_name_validator}; -//! -//! struct MyCustomValidator; -//! -//! impl SchemaNameValidator for MyCustomValidator { -//! fn validate(&self, name: &str) -> AvroResult<(String, Namespace)> { -//! todo!() -//! } -//! } -//! -//! // don't parse any schema before registering the custom validator(s) ! -//! -//! set_schema_name_validator(Box::new(MyCustomValidator)); -//! -//! // ... use the library -//! ``` -//! -//! Similar logic could be applied to the schema namespace, enum symbols and field names validation. -//! -//! **Note**: the library allows to set a validator only once per the application lifetime! -//! If the application parses schemas before setting a validator, the default validator will be -//! registered and used! -//! -//! ## Custom schema equality comparators -//! -//! The library provides two implementations of schema equality comparators: -//! 1. `SpecificationEq` - a comparator that serializes the schemas to their -//! canonical forms (i.e. JSON) and compares them as strings. -//! See the [Avro specification](https://avro.apache.org/docs/1.11.1/specification/#parsing-canonical-form-for-schemas) -//! for more information! -//! 2. `StructFieldEq` - a comparator that compares the schemas structurally. -//! It is faster than the `SpecificationEq` because it returns `false` as soon as a difference -//! is found and does not require encoding the schema to JSON. -//! It is the default comparator. -//! -//! To use a custom comparator, you need to implement the `SchemataEq` trait and set it using the -//! `set_schemata_equality_comparator` function: -//! -//! ```rust -//! use apache_avro::{AvroResult, Schema}; -//! use apache_avro::schema::Namespace; -//! use apache_avro::schema_equality::{SchemataEq, set_schemata_equality_comparator}; -//! -//! #[derive(Debug)] -//! struct MyCustomSchemataEq; -//! -//! impl SchemataEq for MyCustomSchemataEq { -//! fn compare(&self, schema_one: &Schema, schema_two: &Schema) -> bool { -//! todo!() -//! } -//! } -//! -//! // don't parse any schema before registering the custom comparator ! -//! -//! set_schemata_equality_comparator(Box::new(MyCustomSchemataEq)); -//! -//! // ... use the library -//! ``` -//! **Note**: the library allows to set a comparator only once per the application lifetime! -//! If the application parses schemas before setting a comparator, the default comparator will be -//! registered and used! -//! //! ## Deserializing Avro Byte Arrays //! //! If using the Serde way to deserialize avro files, there are sometimes special derive statements diff --git a/avro/src/schema/mod.rs b/avro/src/schema/mod.rs index 38ebac2..e1c6c7d 100644 --- a/avro/src/schema/mod.rs +++ b/avro/src/schema/mod.rs @@ -406,7 +406,33 @@ impl Schema { Ok(this.canonical_form()) } - /// Generate [fingerprint] of Schema's [Parsing Canonical Form]. + /// Generate the [fingerprint] of the schema's [Parsing Canonical Form]. + /// + /// # Example + /// ``` + /// use apache_avro::rabin::Rabin; + /// use apache_avro::{Schema, Error}; + /// use md5::Md5; + /// use sha2::Sha256; + /// + /// fn main() -> Result<(), Error> { + /// let raw_schema = r#" + /// { + /// "type": "record", + /// "name": "test", + /// "fields": [ + /// {"name": "a", "type": "long", "default": 42}, + /// {"name": "b", "type": "string"} + /// ] + /// } + /// "#; + /// let schema = Schema::parse_str(raw_schema)?; + /// println!("{}", schema.fingerprint::<Sha256>()); + /// println!("{}", schema.fingerprint::<Md5>()); + /// println!("{}", schema.fingerprint::<Rabin>()); + /// Ok(()) + /// } + /// ``` /// /// [Parsing Canonical Form]: /// https://avro.apache.org/docs/current/specification/#parsing-canonical-form-for-schemas diff --git a/avro/src/schema_compatibility.rs b/avro/src/schema_compatibility.rs index d7994f2..44a0511 100644 --- a/avro/src/schema_compatibility.rs +++ b/avro/src/schema_compatibility.rs @@ -27,7 +27,7 @@ //! //! For example, an integer can always be resolved to a long: //! -//! ```rust +//! ``` //! # use apache_avro::{Schema, schema_compatibility::{Compatibility, SchemaCompatibility}}; //! let writers_schema = Schema::array(Schema::Int); //! let readers_schema = Schema::array(Schema::Long); @@ -38,7 +38,7 @@ //! //! For example, a long can never be resolved to a long: //! -//! ```rust +//! ``` //! # use apache_avro::{Schema, schema_compatibility::SchemaCompatibility}; //! let writers_schema = Schema::array(Schema::Long); //! let readers_schema = Schema::array(Schema::Int); @@ -49,7 +49,7 @@ //! //! For example, a union of a string and integer is only compatible with an integer if an integer was written: //! -//! ```rust +//! ``` //! # use apache_avro::{Error, Schema, schema_compatibility::{Compatibility, SchemaCompatibility}}; //! let writers_schema = Schema::union(vec![Schema::Int, Schema::String])?; //! let readers_schema = Schema::Int; diff --git a/avro/src/schema_equality.rs b/avro/src/schema_equality.rs index ecc793b..d8f1558 100644 --- a/avro/src/schema_equality.rs +++ b/avro/src/schema_equality.rs @@ -15,6 +15,40 @@ // specific language governing permissions and limitations // under the License. +//! # Custom schema equality comparators +//! +//! The library provides two implementations of schema equality comparators: +//! 1. `StructFieldEq` (default) - compares the schemas structurally, may slightly deviate from the specification. +//! 2. `SpecificationEq` - compares the schemas by serializing them to their canonical form and comparing +//! the resulting JSON. +//! +//! To use a custom comparator, you need to implement the `SchemataEq` trait and set it using the +//! `set_schemata_equality_comparator` function: +//! +//! ``` +//! use apache_avro::{AvroResult, Schema}; +//! use apache_avro::schema::Namespace; +//! use apache_avro::schema_equality::{SchemataEq, set_schemata_equality_comparator}; +//! +//! #[derive(Debug)] +//! struct MyCustomSchemataEq; +//! +//! impl SchemataEq for MyCustomSchemataEq { +//! fn compare(&self, schema_one: &Schema, schema_two: &Schema) -> bool { +//! todo!() +//! } +//! } +//! +//! // don't parse any schema before registering the custom comparator! +//! +//! set_schemata_equality_comparator(Box::new(MyCustomSchemataEq)); +//! +//! // ... use the library +//! ``` +//! **Note**: the library allows to set a comparator only once per the application lifetime! +//! If the application parses schemas before setting a comparator, the default comparator will be +//! registered and used! + use crate::schema::{InnerDecimalSchema, UuidSchema}; use crate::{ Schema, diff --git a/avro/src/serde/derive.rs b/avro/src/serde/derive.rs index ebd054e..0ef46f0 100644 --- a/avro/src/serde/derive.rs +++ b/avro/src/serde/derive.rs @@ -177,14 +177,16 @@ use std::collections::HashMap; /// /// To get the schema, it will call the functions `fn get_schema_in_ctxt(&mut Names, &Namespace) -> Schema` /// and `fn get_record_fields_in_ctxt(&mut Names, &Namespace) -> Schema` in the module provided -/// to the Serde attribute. +/// to the Serde attribute. See [`AvroSchemaComponent`] for details on how to implement those +/// functions. /// /// 2. By providing a function directly, `#[avro(with = some_fn)]`. /// /// To get the schema, it will call the function provided. It must have the signature /// `fn(&mut Names, &Namespace) -> Schema`. When this is used for a `transparent` struct, the -/// default implementation of [`AvroSchemaComponent::get_record_fields_in_ctxt`] will be used -/// which is implemented with a lot of backtracking and cloning. +/// default implementation of [`AvroSchemaComponent::get_record_fields_in_ctxt`] will be used. +/// This is only recommended for primitive types, as the default implementation cannot be efficiently +/// implemented for complex types. /// pub trait AvroSchema { /// Construct the full schema that represents this type. @@ -214,7 +216,7 @@ pub trait AvroSchema { /// Schema::Int /// } /// -/// fn get_record_fields_in_ctxt(_: &mut Names, _: &Namespace) -> Option<Vec<RecordField>> { +/// fn get_record_fields_in_ctxt(_: usize, _: &mut Names, _: &Namespace) -> Option<Vec<RecordField>> { /// None // A Schema::Int is not a Schema::Record so there are no fields to return /// } ///} @@ -235,8 +237,8 @@ pub trait AvroSchema { /// T::get_schema_in_ctxt(named_schemas, enclosing_namespace) /// } /// -/// fn get_record_fields_in_ctxt(named_schemas: &mut Names, enclosing_namespace: &Namespace) -> Option<Vec<RecordField>> { -/// T::get_record_fields_in_ctxt(named_schemas, enclosing_namespace) +/// fn get_record_fields_in_ctxt(first_field_position: usize, named_schemas: &mut Names, enclosing_namespace: &Namespace) -> Option<Vec<RecordField>> { +/// T::get_record_fields_in_ctxt(first_field_position, named_schemas, enclosing_namespace) /// } ///} /// ``` @@ -253,7 +255,7 @@ pub trait AvroSchema { /// with backtracking and a lot of cloning. /// - Even if your schema is not a record, still implement the function and just return `None` /// -/// ```rust +/// ``` /// # use apache_avro::{Schema, serde::{AvroSchemaComponent}, schema::{Name, Names, Namespace, RecordField, RecordSchema}}; /// # use serde::{Serialize, Deserialize}; /// # use std::time::Duration; @@ -275,7 +277,7 @@ pub trait AvroSchema { /// named_schemas.insert(name.clone(), Schema::Ref { name: name.clone() }); /// let schema = Schema::Record(RecordSchema::builder() /// .name(name.clone()) -/// .fields(Self::get_record_fields_in_ctxt(named_schemas, enclosing_namespace).expect("Impossible!")) +/// .fields(Self::get_record_fields_in_ctxt(0, named_schemas, enclosing_namespace).expect("Impossible!")) /// .build() /// ); /// named_schemas.insert(name, schema.clone()); @@ -283,19 +285,22 @@ pub trait AvroSchema { /// } /// } /// -/// fn get_record_fields_in_ctxt(named_schemas: &mut Names, enclosing_namespace: &Namespace) -> Option<Vec<RecordField>> { +/// fn get_record_fields_in_ctxt(first_field_position: usize, named_schemas: &mut Names, enclosing_namespace: &Namespace) -> Option<Vec<RecordField>> { /// Some(vec![ /// RecordField::builder() /// .name("one") /// .schema(String::get_schema_in_ctxt(named_schemas, enclosing_namespace)) +/// .position(first_field_position) /// .build(), /// RecordField::builder() /// .name("two") /// .schema(i32::get_schema_in_ctxt(named_schemas, enclosing_namespace)) +/// .position(first_field_position+1) /// .build(), /// RecordField::builder() /// .name("three") /// .schema(<Option<Duration>>::get_schema_in_ctxt(named_schemas, enclosing_namespace)) +/// .position(first_field_position+2) /// .build(), /// ]) /// } diff --git a/avro/src/serde/mod.rs b/avro/src/serde/mod.rs index 02cada6..22c43a7 100644 --- a/avro/src/serde/mod.rs +++ b/avro/src/serde/mod.rs @@ -45,8 +45,13 @@ //! struct Foo { //! a: i64, //! b: String, +//! // Otherwise it will be serialized as an array of integers +//! #[avro(with)] +//! #[serde(with = "apache_avro::serde::bytes")] +//! c: Vec<u8>, //! } //! +//! // Creating this schema is expensive, reuse it as much as possible //! let schema = Foo::get_schema(); //! // A writer needs the schema of the type that is going to be written //! let mut writer = Writer::new(&schema, Vec::new())?; @@ -54,10 +59,13 @@ //! let foo = Foo { //! a: 42, //! b: "Hello".to_string(), +//! c: b"Data".to_vec() //! }; //! //! // Serialize as many items as you want. //! writer.append_ser(&foo)?; +//! writer.append_ser(&foo)?; +//! writer.append_ser(&foo)?; //! //! // Always flush //! writer.flush(); diff --git a/avro/src/serde/with.rs b/avro/src/serde/with.rs index 870d064..08913bc 100644 --- a/avro/src/serde/with.rs +++ b/avro/src/serde/with.rs @@ -76,7 +76,6 @@ impl Drop for BorrowedGuard { /// ``` /// # use apache_avro::AvroSchema; /// # use serde::{Deserialize, Serialize}; -/// /// #[derive(AvroSchema, Serialize, Deserialize)] /// struct StructWithBytes { /// #[avro(with)] @@ -136,7 +135,6 @@ pub mod bytes { /// ``` /// # use apache_avro::AvroSchema; /// # use serde::{Deserialize, Serialize}; -/// /// #[derive(AvroSchema, Serialize, Deserialize)] /// struct StructWithBytes { /// #[avro(with)] @@ -200,7 +198,6 @@ pub mod bytes_opt { /// ``` /// # use apache_avro::AvroSchema; /// # use serde::{Deserialize, Serialize}; -/// /// #[derive(AvroSchema, Serialize, Deserialize)] /// struct StructWithBytes { /// #[avro(with)] @@ -275,7 +272,6 @@ pub mod fixed { /// ``` /// # use apache_avro::AvroSchema; /// # use serde::{Deserialize, Serialize}; -/// /// #[derive(AvroSchema, Serialize, Deserialize)] /// struct StructWithBytes { /// #[avro(with)] @@ -349,10 +345,9 @@ pub mod fixed_opt { /// Use [`apache_avro::serde::slice_opt`] for optional bytes/fixed borrowed values. /// /// See usage with below example: -/// ```rust +/// ``` /// # use apache_avro::AvroSchema; /// # use serde::{Deserialize, Serialize}; -/// /// #[derive(AvroSchema, Serialize, Deserialize)] /// struct StructWithBytes<'a> { /// #[avro(with)] @@ -415,7 +410,6 @@ pub mod slice { /// ``` /// # use apache_avro::AvroSchema; /// # use serde::{Deserialize, Serialize}; -/// /// #[derive(AvroSchema, Serialize, Deserialize)] /// struct StructWithBytes<'a> { /// #[avro(with)] diff --git a/avro/src/validator.rs b/avro/src/validator.rs index e8406a2..4623302 100644 --- a/avro/src/validator.rs +++ b/avro/src/validator.rs @@ -15,6 +15,39 @@ // specific language governing permissions and limitations // under the License. +//! # Custom name validation +//! +//! By default, the library follows the rules specified in the [Avro specification](https://avro.apache.org/docs/1.11.1/specification/#names). +//! +//! Some of the other Apache Avro language SDKs are more flexible in their name validation. For +//! interoperability with those SDKs, the library provides a way to customize the name validation. +//! +//! ``` +//! use apache_avro::AvroResult; +//! use apache_avro::schema::Namespace; +//! use apache_avro::validator::{SchemaNameValidator, set_schema_name_validator}; +//! +//! struct MyCustomValidator; +//! +//! impl SchemaNameValidator for MyCustomValidator { +//! fn validate(&self, name: &str) -> AvroResult<(String, Namespace)> { +//! todo!() +//! } +//! } +//! +//! // don't parse any schema before registering the custom validator(s)! +//! +//! set_schema_name_validator(Box::new(MyCustomValidator)); +//! +//! // ... use the library +//! ``` +//! +//! Similar logic could be applied to the schema namespace, enum symbols and field names validation. +//! +//! **Note**: the library allows to set a validator only once per the application lifetime! +//! If the application parses schemas before setting a validator, the default validator will be +//! registered and used! + use crate::{AvroResult, error::Details, schema::Namespace}; use log::debug; use regex_lite::Regex; diff --git a/avro/tests/io.rs b/avro/tests/io.rs index 5284426..dba7eef 100644 --- a/avro/tests/io.rs +++ b/avro/tests/io.rs @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -//! Port of https://github.com/apache/avro/blob/release-1.9.1/lang/py/test/test_io.py +//! Port of <https://github.com/apache/avro/blob/release-1.9.1/lang/py/test/test_io.py> use apache_avro::{Error, Schema, error::Details, from_avro_datum, to_avro_datum, types::Value}; use apache_avro_test_helper::TestResult; use pretty_assertions::assert_eq; diff --git a/avro_derive/tests/ui.rs b/avro_derive/tests/ui.rs index 5337257..9b9fd7d 100644 --- a/avro_derive/tests/ui.rs +++ b/avro_derive/tests/ui.rs @@ -17,7 +17,7 @@ /// These tests only run on nightly as the output can change per compiler version. /// -/// See https://github.com/dtolnay/trybuild/issues/84 +/// See <https://github.com/dtolnay/trybuild/issues/84> #[rustversion::attr(not(nightly), ignore)] #[test] fn ui() {
