This is an automated email from the ASF dual-hosted git repository. kriskras99 pushed a commit to branch feat/documentation in repository https://gitbox.apache.org/repos/asf/avro-rs.git
commit 7fe545cacbbc559342fe4bc7292c2ae20987b5f3 Author: default <[email protected]> AuthorDate: Mon Jan 26 15:34:04 2026 +0000 docs: Move documentation away from lib.rs --- avro/src/documentation/mod.rs | 5 ++ avro/src/documentation/primer.rs | 40 ++++++++++++ avro/src/lib.rs | 127 +++++++-------------------------------- avro/src/schema_compatibility.rs | 94 +++++++++++++++++++++-------- avro/src/serde/derive.rs | 2 + 5 files changed, 139 insertions(+), 129 deletions(-) diff --git a/avro/src/documentation/mod.rs b/avro/src/documentation/mod.rs new file mode 100644 index 0000000..a7ff9b1 --- /dev/null +++ b/avro/src/documentation/mod.rs @@ -0,0 +1,5 @@ +//! # General documentation on Apache Avro +//! +//! This module does not contain any code, and is only available during `rustdoc` builds. + +pub mod primer; \ No newline at end of file diff --git a/avro/src/documentation/primer.rs b/avro/src/documentation/primer.rs new file mode 100644 index 0000000..918ede5 --- /dev/null +++ b/avro/src/documentation/primer.rs @@ -0,0 +1,40 @@ +//! # A primer on Apache Avro +//! +//! Avro is a schema based encoding system, like Protobuf. This means that if you have raw Avro data +//! without a schema, you are unable to decode it. It also means that the format is very space +//! efficient. +//! +//! ## Schemas +//! +//! Schemas are defined in JSON and look like this: +//! ```json +//! { +//! "type": "record", +//! "name": "example", +//! "fields": [ +//! {"name": "a", "type": "long", "default": 42}, +//! {"name": "b", "type": "string"} +//! ] +//! } +//! ``` +//! For all possible types and extra attributes, see [the schema section of the specification]. +//! +//! [the schema section of the specification]: https://avro.apache.org/docs/++version++/specification/#schema-declaration +//! +//! ## File formats +//! There are three official file formats for Avro. The data in these file formats is all encoded the same, but they differ +//! in how the schema is included. +//! +//! ### [Object Container File](https://avro.apache.org/docs/++version++/specification/#object-container-files) +//! This is the most common file format used for Avro. It includes the schema in the file, and can therefore be decoded by +//! a reader who doesn't have the schema. It also supports including many records in one file. +//! +//! ### [Single Object Encoding](https://avro.apache.org/docs/++version++/specification/#single-object-encoding) +//! In this file format, the schema is not included directly. It instead includes a fingerprint of the schema, which a reader +//! can lookup in a schema database or compare with the fingerprint that the reader is expecting. This file format always contains +//! one record. +//! +//! ### Avro datums +//! This is not really a file format, as it's just the raw Avro encoded data. It does not include a schema and can therefore not be +//! decoded without the reader knowing **exactly** which schema was used to write it. +//! diff --git a/avro/src/lib.rs b/avro/src/lib.rs index f60c8c2..d8dd0f9 100644 --- a/avro/src/lib.rs +++ b/avro/src/lib.rs @@ -34,6 +34,8 @@ //! **[Apache Avro](https://avro.apache.org/)** is a data serialization system which provides rich //! data structures and a compact, fast, binary data format. //! +//! If you are not familiar with the data format, please read [`documentation::primer`] first. +//! //! All data in Avro is schematized, as in the following example: //! //! ```json @@ -55,64 +57,13 @@ //! **apache-avro** provides a way to read and write both these data representations easily and //! efficiently. //! -//! # Installing the library -//! -//! -//! Add to your `Cargo.toml`: -//! -//! ```toml -//! [dependencies] -//! apache-avro = "x.y" -//! ``` -//! -//! Or in case you want to leverage the **Snappy** codec: -//! -//! ```toml -//! [dependencies.apache-avro] -//! version = "x.y" -//! features = ["snappy"] -//! ``` -//! -//! Or in case you want to leverage the **Zstandard** codec: -//! -//! ```toml -//! [dependencies.apache-avro] -//! version = "x.y" -//! features = ["zstandard"] -//! ``` -//! -//! Or in case you want to leverage the **Bzip2** codec: -//! -//! ```toml -//! [dependencies.apache-avro] -//! version = "x.y" -//! features = ["bzip"] -//! ``` -//! -//! Or in case you want to leverage the **Xz** codec: -//! -//! ```toml -//! [dependencies.apache-avro] -//! version = "x.y" -//! features = ["xz"] -//! ``` -//! -//! # Upgrading to a newer minor version -//! -//! The library is still in beta, so there might be backward-incompatible changes between minor -//! versions. If you have troubles upgrading, check the release notes. -//! -//! # Minimum supported Rust version -//! -//! 1.88.0 -//! //! # Defining a schema //! -//! An Avro data cannot exist without an Avro schema. Schemas **must** be used while writing and +//! Avro data cannot exist without an Avro schema. Schemas **must** be used while writing and //! **can** be used while reading and they carry the information regarding the type of data we are //! handling. Avro schemas are used for both schema validation and resolution of Avro data. //! -//! Avro schemas are defined in **JSON** format and can just be parsed out of a raw string: +//! Avro schemas are defined in JSON format and can just be parsed out of a raw string: //! //! ``` //! use apache_avro::Schema; @@ -128,14 +79,10 @@ //! } //! "#; //! -//! // if the schema is not valid, this function will return an error //! let schema = Schema::parse_str(raw_schema).unwrap(); -//! -//! // schemas can be printed for debugging -//! println!("{:?}", schema); //! ``` //! -//! Additionally, a list of of definitions (which may depend on each other) can be given and all of +//! Additionally, a list of definitions (which may depend on each other) can be given and all of //! them will be parsed into the corresponding schemas. //! //! ``` @@ -158,22 +105,12 @@ //! ] //! }"#; //! -//! // if the schemas are not valid, this function will return an error //! let schemas = Schema::parse_list(&[raw_schema_1, raw_schema_2]).unwrap(); -//! -//! // schemas can be printed for debugging -//! println!("{:?}", schemas); //! ``` -//! *N.B.* It is important to note that the composition of schema definitions requires schemas with names. -//! For this reason, only schemas of type Record, Enum, and Fixed should be input into this function. -//! -//! The library provides also a programmatic interface to define schemas without encoding them in -//! JSON (for advanced use), but we highly recommend the JSON interface. Please read the API -//! reference in case you are interested. //! //! For more information about schemas and what kind of information you can encapsulate in them, //! please refer to the appropriate section of the -//! [Avro Specification](https://avro.apache.org/docs/current/specification/#schema-declaration). +//! [Avro Specification](https://avro.apache.org/docs/++version++/specification/#schema-declaration). //! //! # Writing data //! @@ -280,10 +217,6 @@ //! let encoded = writer.into_inner(); //! ``` //! -//! ### Importance of the fields' order -//! -//! *Important*: The order of the fields in the struct must match the order of the fields in the Avro schema! -//! //! ### Simple types //! //! The vast majority of the times, schemas tend to define a record as a top-level container @@ -752,37 +685,6 @@ //! //! ``` //! -//! ## Check schemas compatibility -//! -//! This library supports checking for schemas compatibility. -//! -//! Examples of checking for compatibility: -//! -//! 1. Compatible schemas -//! -//! Explanation: an int array schema can be read by a long array schema- an int -//! (32bit signed integer) fits into a long (64bit signed integer) -//! -//! ```rust -//! use apache_avro::{Schema, schema_compatibility::SchemaCompatibility}; -//! -//! let writers_schema = Schema::parse_str(r#"{"type": "array", "items":"int"}"#).unwrap(); -//! let readers_schema = Schema::parse_str(r#"{"type": "array", "items":"long"}"#).unwrap(); -//! assert!(SchemaCompatibility::can_read(&writers_schema, &readers_schema).is_ok()); -//! ``` -//! -//! 2. Incompatible schemas (a long array schema cannot be read by an int array schema) -//! -//! Explanation: a long array schema cannot be read by an int array schema- a -//! long (64bit signed integer) does not fit into an int (32bit signed integer) -//! -//! ```rust -//! use apache_avro::{Schema, schema_compatibility::SchemaCompatibility}; -//! -//! let writers_schema = Schema::parse_str(r#"{"type": "array", "items":"long"}"#).unwrap(); -//! let readers_schema = Schema::parse_str(r#"{"type": "array", "items":"int"}"#).unwrap(); -//! assert!(SchemaCompatibility::can_read(&writers_schema, &readers_schema).is_err()); -//! ``` //! ## Custom names validators //! //! By default the library follows the rules by the @@ -938,6 +840,21 @@ //! assert_eq!(records, deserialized_records); //! } //! ``` +//! +//! # Features +//! +//! - `derive`: enable support for deriving [`AvroSchema`] +//! - `snappy`: enable support for the Snappy codec +//! - `zstandard`: enable support for the Zstandard codec +//! - `bzip`: enable support for the Bzip2 codec +//! - `xz`: enable support for the Xz codec +//! +//! # MSRV +//! +//! The current MSRV is 1.88.0. +//! +//! The MSRV may be bumped in minor releases. +//! mod bigdecimal; mod bytes; @@ -949,6 +866,8 @@ mod encode; mod reader; mod writer; +#[cfg(doc)] +pub mod documentation; pub mod error; pub mod headers; pub mod rabin; diff --git a/avro/src/schema_compatibility.rs b/avro/src/schema_compatibility.rs index 74ae558..3df2a9d 100644 --- a/avro/src/schema_compatibility.rs +++ b/avro/src/schema_compatibility.rs @@ -15,7 +15,48 @@ // specific language governing permissions and limitations // under the License. -//! Logic for checking schema compatibility +//! Check if the reader's schema is compatible with the writer's schema. +//! +//! To allow for schema evolution, Avro supports resolving the writer's schema to the reader's schema. +//! To check if this is possible, [`SchemaCompatibility`] can be used. For the complete rules see +//! [the specification](https://avro.apache.org/docs/++version++/specification/#schema-resolution). +//! +//! There are three levels of compatibility. +//! +//! 1. Fully compatible schemas (`Ok(Compatibility::Full)`) +//! +//! For example, an integer can always be resolved to a long: +//! +//! ```rust +//! # use apache_avro::{Schema, schema_compatibility::{Compatibility, SchemaCompatibility}}; +//! let writers_schema = Schema::array(Schema::Int); +//! let readers_schema = Schema::array(Schema::Long); +//! assert_eq!(SchemaCompatibility::can_read(&writers_schema, &readers_schema), Ok(Compatibility::Full)); +//! ``` +//! +//! 2. Incompatible schemas (`Err`) +//! +//! For example, a long can never be resolved to a long: +//! +//! ```rust +//! # use apache_avro::{Schema, schema_compatibility::SchemaCompatibility}; +//! let writers_schema = Schema::array(Schema::Long); +//! let readers_schema = Schema::array(Schema::Int); +//! assert!(SchemaCompatibility::can_read(&writers_schema, &readers_schema).is_err()); +//! ``` +//! +//! 3. Partially compatible schemas (`Ok(Compatibility::Partial)`) +//! +//! For example, a union of a string and integer is only compatible with an integer if an integer was written: +//! +//! ```rust +//! # use apache_avro::{Error, Schema, schema_compatibility::{Compatibility, SchemaCompatibility}}; +//! let writers_schema = Schema::union(vec![Schema::Int, Schema::String])?; +//! let readers_schema = Schema::Int; +//! assert_eq!(SchemaCompatibility::can_read(&writers_schema, &readers_schema), Ok(Compatibility::Partial)); +//! # Ok::<(), Error>(()) +//! ``` +//! use crate::{ error::CompatibilityError, schema::{ @@ -31,9 +72,35 @@ use std::{ ptr, }; +/// Check if two schemas can be resolved. +/// +/// See [the module documentation] for more details. +/// +/// [the module documentation]: crate::schema_compatibility pub struct SchemaCompatibility; -/// How compatible are two schemas. +impl SchemaCompatibility { + /// Recursively check if the reader's schema can be resolved to the writer's schema + pub fn can_read( + writers_schema: &Schema, + readers_schema: &Schema, + ) -> Result<Compatibility, CompatibilityError> { + let mut c = Checker::new(); + c.can_read(writers_schema, readers_schema) + } + + /// Recursively check if both schemas can be resolved to each other + pub fn mutual_read( + schema_a: &Schema, + schema_b: &Schema, + ) -> Result<Compatibility, CompatibilityError> { + let mut c = SchemaCompatibility::can_read(schema_a, schema_b)?; + c &= SchemaCompatibility::can_read(schema_b, schema_a)?; + Ok(c) + } +} + +/// How compatible two schemas are. #[derive(Debug, Copy, Clone, Eq, PartialEq)] pub enum Compatibility { /// Full compatibility, resolving will always work. @@ -375,29 +442,6 @@ impl Checker { } } -impl SchemaCompatibility { - /// `can_read` performs a full, recursive check that a datum written using the - /// writers_schema can be read using the readers_schema. - pub fn can_read( - writers_schema: &Schema, - readers_schema: &Schema, - ) -> Result<Compatibility, CompatibilityError> { - let mut c = Checker::new(); - c.can_read(writers_schema, readers_schema) - } - - /// `mutual_read` performs a full, recursive check that a datum written using either - /// the writers_schema or the readers_schema can be read using the other schema. - pub fn mutual_read( - writers_schema: &Schema, - readers_schema: &Schema, - ) -> Result<Compatibility, CompatibilityError> { - let mut c = SchemaCompatibility::can_read(writers_schema, readers_schema)?; - c &= SchemaCompatibility::can_read(readers_schema, writers_schema)?; - Ok(c) - } -} - #[cfg(test)] mod tests { use std::collections::BTreeMap; diff --git a/avro/src/serde/derive.rs b/avro/src/serde/derive.rs index b42e011..19eaac8 100644 --- a/avro/src/serde/derive.rs +++ b/avro/src/serde/derive.rs @@ -192,6 +192,8 @@ pub trait AvroSchema { /// implementation available through `derive` feature. This is what is implemented by /// the `derive(AvroSchema)` macro. /// +/// TODO: Explain that field ordering is important +/// /// # Implementation guide /// /// ### Simple implementation
