This is an automated email from the ASF dual-hosted git repository. kriskras99 pushed a commit to branch feat/documentation in repository https://gitbox.apache.org/repos/asf/avro-rs.git
commit 2a22649d7457dcd1be781ad01ba3c4f30a67e266 Author: Kriskras99 <[email protected]> AuthorDate: Sun Jan 18 22:40:41 2026 +0100 feat(doc): Add documentation for the `AvroSchema` derive --- avro/src/serde/derive.rs | 165 ++++++++++++++++++++++++++++++++++++++++++++++- avro/src/serde/mod.rs | 66 +++++++++++++++++++ avro/src/serde/with.rs | 8 +-- avro_derive/src/lib.rs | 8 +++ 4 files changed, 240 insertions(+), 7 deletions(-) diff --git a/avro/src/serde/derive.rs b/avro/src/serde/derive.rs index 5164458..d883cc9 100644 --- a/avro/src/serde/derive.rs +++ b/avro/src/serde/derive.rs @@ -22,10 +22,169 @@ use crate::schema::{ use std::borrow::Cow; use std::collections::HashMap; -/// Trait for types that serve as an Avro data model. Derive implementation available -/// through `derive` feature. Do not implement directly! -/// Implement [`AvroSchemaComponent`] to get this trait +/// Trait for types that serve as an Avro data model. +/// +/// Do not implement directly! Either derive it or implement [`AvroSchemaComponent`] to get this trait /// through a blanket implementation. +/// +/// ## Deriving `AvroSchema` +/// +/// Using the custom derive requires that you enable the `"derive"` cargo +/// feature in your `Cargo.toml`: +/// +/// ```toml +/// [dependencies] +/// apache-avro = { version = "..", features = ["derive"] } +/// ``` +/// +/// Then, you add the `#[derive(AvroSchema)]` annotation to your `struct` and +/// `enum` type definition: +/// +/// ``` +/// # use serde::{Serialize, Deserialize}; +/// # use apache_avro::AvroSchema; +/// #[derive(AvroSchema, Serialize, Deserialize)] +/// pub struct Foo { +/// bar: Vec<Bar>, +/// } +/// +/// #[derive(AvroSchema, Serialize, Deserialize)] +/// pub enum Bar { +/// Spam, +/// Maps +/// } +/// ``` +/// +/// This will implement [`AvroSchemaComponent`] for the type, and `AvroSchema` +/// through the blanket implementation for `T: AvroSchemaComponent`. +/// +/// Every member of the `struct` and `enum` must also implement `AvroSchemaComponent`. +/// +/// ## Changing the generated schema +/// +/// The derive macro will read both the `avro` and `serde` attributes to modify the generated schema. +/// It will also check for compatibility between the various attributes. +/// +/// ### Container attributes +/// +/// - `#[serde(rename = "name")]` +/// +// TODO: Should we check if `name` contains any dots? As that would imply a namespace +/// Set the `name` of the schema to the given string. Defaults to the name of the type. +/// +/// - `#[avro(namespace = "some.name.space")]` +/// +/// Set the `namespace` of the schema. This will be the relative namespace if the schema is included +/// in another schema. +/// +/// - `#[avro(doc = "Some documentation")]` +/// +/// Set the `doc` attribute of the schema. Defaults to the documentation of the type. +/// +/// - `#[avro(alias = "name")]` +/// +/// Set the `alias` attribute of the schema. Can be specified multiple times. +/// +/// - `#[serde(rename_all = "camelCase")]` +/// +/// Rename all the fields or variants in the schema to follow the given case convention. The possible values +/// are `"lowercase"`, `"UPPERCASE"`, `"PascalCase"`, `"camelCase"`, `"snake_case"`, `"kebab-case"`, +/// `"SCREAMING_SNAKE_CASE"`, `"SCREAMING-KEBAB-CASE"`. +/// +/// - `#[serde(transparent)]` +/// +/// Use the schema of the inner field directly. Is only allowed on structs with only unskipped field. +/// +/// +/// ### Variant attributes +/// +/// - `#[serde(rename = "name")]` +/// +/// Rename the variant to the given name. +/// +/// +/// ### Field attributes +/// +/// - `#[serde(rename = "name")]` +/// +/// Rename the field name to the given name. +/// +/// - `#[avro(doc = "Some documentation")]` +/// +/// Set the `doc` attribute of the field. Defaults to the documentation of the field. +/// +/// - `#[avro(default = "null")]` +/// +/// Set the `default` attribute of the field. +/// +/// _Note:_ This is a JSON value not a Rust value, as this is put in the schema itself. +/// +/// - `#[serde(alias = "name")]` +/// +/// Set the `alias` attribute of the field. Can be specified multiple times. +/// +/// - `#[serde(flatten)]` +/// +/// Flatten the content of this field into the container it is defined in. +/// +/// - `#[serde(skip)]` +/// +/// Do not include this field in the schema. +/// +/// - `#[serde(skip_serializing)]` +/// +/// When combined with `#[serde(skip_deserializing)]`, don't include this field in the schema. +/// Otherwise, it will be included in the schema and the `#[avro(default)]` attribute **must** be +/// set. That value will be used for serializing. +/// +/// - `#[serde(skip_serializing_if)]` +/// +/// Conditionally use the value of the field or the value provided by `#[avro(default)]`. The +/// `#[avro(default)]` attribute **must** be set. +/// +/// - `#[avro(with)]` and `#[serde(with = "module")]` +/// +/// Override the schema used for this field. See [Working with foreign types](#working-with-foreign-types). +/// +/// ### Incompatible Serde attributes +/// +/// The derive macro is compatible with most Serde attributes, but it is incompatible with +/// the following attributes: +/// +/// - Container attributes +/// - `tag` +/// - `content` +/// - `untagged` +/// - `variant_identifier` +/// - `field_identifier` +/// - `remote` +/// - `rename_all(serialize = "..", deserialize = "..")` where `serialize` != `deserialize` +/// - Variant attributes +/// - `other` +/// - `untagged` +/// - Field attributes +/// - `getter` +/// +/// ## Working with foreign types +/// +/// Most foreign types won't have a [`AvroSchema`] implementation. This crate implements it only +/// for built-in types, [`serde_json::Map`] and [`uuid::Uuid`]. Notable exceptions are [`char`] and +/// [`u64`] types, as there is no equivalent for char in Avro and the largest integer type in Avro +/// is `long` (equal to an [`i64`]). +/// +/// To still be able to derive schemas for fields of foreign types, the `#[avro(with)`] +/// attribute can be used to get the schema for those fields. It can be used in two ways: +/// +/// 1. In combination with `#[serde(with = "path::to::module)]` +/// +/// To get the schema, it will call the function `fn get_schema_in_ctxt(&mut Names, &Namespace) -> Schema` +/// in the module provided to the Serde attribute. +/// +/// 2. By providing a function directly, `#[avro(with = some_fn)]`. +/// +/// To get the schema, it will call the function provided. It must have the signature +/// `fn(&mut Names, &Namespace) -> Schema` +/// pub trait AvroSchema { fn get_schema() -> Schema; } diff --git a/avro/src/serde/mod.rs b/avro/src/serde/mod.rs index 2a62b33..a2d8377 100644 --- a/avro/src/serde/mod.rs +++ b/avro/src/serde/mod.rs @@ -15,6 +15,72 @@ // specific language governing permissions and limitations // under the License. +//! Everything needed to use this crate with Serde. +//! +//! # Using `apache-avro` for `serde` +//! +//! Avro is a schema-based format, this means it requires a few extra steps to use compared to +//! a data format like JSON. +//! +//! ## Schemas +//! It's strongly recommended to derive the schemas for your types using the [`AvroSchema`] derive macro. +//! The macro uses the Serde attributes to generate a matching schema and checks that no attributes are +//! used that are incompatible with the Serde implementation in this crate. See [the trait documentation] for +//! details on how to change the generated schema. +//! +//! Alternatively, you can write your own schema. If you go down this path, it is recommended you start with +//! the schema derived by [`AvroSchema`] and then modify it to fit your needs. +//! +//! ### Using existing schemas +//! If you have schemas that are already being used in other parts of your software stack, generating types +//! from the schema can be very useful. There is a **third-party** crate [`rsgen-avro`] that implements this. +//! +//! ## Reading and writing data +//! +//! ``` +//! # use std::io::Cursor; +//! # use serde::{Serialize, Deserialize}; +//! # use apache_avro::{AvroSchema, Error, Reader, Writer, serde::{from_value, to_value}}; +//! +//! #[derive(AvroSchema, Serialize, Deserialize, PartialEq, Debug)] +//! struct Foo { +//! a: i64, +//! b: String, +//! } +//! +//! let schema = Foo::get_schema(); +//! // A writer needs the schema of the type that is going to be written +//! let mut writer = Writer::new(&schema, Vec::new())?; +//! +//! let foo = Foo { +//! a: 42, +//! b: "Hello".to_string(), +//! }; +//! +//! // There are two ways to serialize data. +//! // 1: Serialize directly to the writer: +//! writer.append_ser(&foo)?; +//! // 2: First serialize to an Avro `Value` then write that: +//! let foo_value = to_value(&foo)?; +//! writer.append(foo_value)?; +//! +//! // Always flush or consume the writer +//! let data = writer.into_inner()?; +//! +//! // The reader does not need a schema as it's included in the data +//! let reader = Reader::new(Cursor::new(data))?; +//! // The reader is an iterator +//! for result in reader { +//! let value = result?; +//! let new_foo: Foo = from_value(&value)?; +//! assert_eq!(new_foo, foo); +//! } +//! # Ok::<(), Error>(()) +//! ``` +//! +//! [`rsgen-avro`]: https://docs.rs/rsgen-avro/latest/rsgen_avro/ +//! [the trait documentation]: AvroSchema + mod de; mod derive; mod ser; diff --git a/avro/src/serde/with.rs b/avro/src/serde/with.rs index 670955b..870d064 100644 --- a/avro/src/serde/with.rs +++ b/avro/src/serde/with.rs @@ -18,14 +18,14 @@ use std::cell::Cell; thread_local! { - /// A thread local that is used to decide how to serialize Rust bytes into an Avro - /// `types::Value` of type bytes. + /// A thread local that is used to decide if Rust bytes need to be serialized to + /// [`Value::Bytes`] or [`Value::Fixed`]. /// /// Relies on the fact that serde's serialization process is single-threaded. pub(crate) static SER_BYTES_TYPE: Cell<BytesType> = const { Cell::new(BytesType::Bytes) }; - /// A thread local that is used to decide how to deserialize an Avro `types::Value` - /// of type bytes into Rust bytes. + /// A thread local that is used to decide if a [`Value::Bytes`] needs to be deserialized to + /// a [`Vec`] or slice. /// /// Relies on the fact that serde's deserialization process is single-threaded. pub(crate) static DE_BYTES_BORROWED: Cell<bool> = const { Cell::new(false) }; diff --git a/avro_derive/src/lib.rs b/avro_derive/src/lib.rs index 3904452..0e1631f 100644 --- a/avro_derive/src/lib.rs +++ b/avro_derive/src/lib.rs @@ -17,6 +17,14 @@ #![cfg_attr(nightly, feature(proc_macro_diagnostic))] +//! This crate provides the `AvroSchema` derive macro. +//! ```no_run +//! #[derive(AvroSchema)] +//! ``` +//! Please see the documentation of the [`AvroSchema`] trait for instructions on how to use it. +//! +//! [`AvroSchema`]: https://docs.rs/apache-avro/latest/apache_avro/schema/trait.AvroSchema.html + mod attributes; mod case;
