(avro-rs) 02/04: docs: Move documentation away from lib.rs

kriskras99 Sun, 01 Feb 2026 01:27:22 -0800

This is an automated email from the ASF dual-hosted git repository.

kriskras99 pushed a commit to branch feat/documentation
in repository https://gitbox.apache.org/repos/asf/avro-rs.git


commit f8b9ca9a6fcfa1885e5f0658129c79451f60b524
Author: default <[email protected]>
AuthorDate: Mon Jan 26 15:34:04 2026 +0000

    docs: Move documentation away from lib.rs
---
 avro/src/documentation/mod.rs    |   5 ++
 avro/src/documentation/primer.rs |  40 ++++++++++++
 avro/src/lib.rs                  | 127 +++++++--------------------------------
 avro/src/schema_compatibility.rs |  94 +++++++++++++++++++++--------
 avro/src/serde/derive.rs         |   2 +
 5 files changed, 139 insertions(+), 129 deletions(-)

diff --git a/avro/src/documentation/mod.rs b/avro/src/documentation/mod.rs
new file mode 100644
index 0000000..a7ff9b1
--- /dev/null
+++ b/avro/src/documentation/mod.rs
@@ -0,0 +1,5 @@
+//! # General documentation on Apache Avro
+//!
+//! This module does not contain any code, and is only available during 
`rustdoc` builds.
+
+pub mod primer;
\ No newline at end of file
diff --git a/avro/src/documentation/primer.rs b/avro/src/documentation/primer.rs
new file mode 100644
index 0000000..918ede5
--- /dev/null
+++ b/avro/src/documentation/primer.rs
@@ -0,0 +1,40 @@
+//! # A primer on Apache Avro
+//!
+//! Avro is a schema based encoding system, like Protobuf. This means that if 
you have raw Avro data
+//! without a schema, you are unable to decode it. It also means that the 
format is very space
+//! efficient.
+//!
+//! ## Schemas
+//! 
+//! Schemas are defined in JSON and look like this:
+//! ```json
+//! {
+//!     "type": "record",
+//!     "name": "example",
+//!     "fields": [
+//!         {"name": "a", "type": "long", "default": 42},
+//!         {"name": "b", "type": "string"}
+//!     ]
+//! }
+//! ```
+//! For all possible types and extra attributes, see [the schema section of 
the specification].
+//! 
+//! [the schema section of the specification]: 
https://avro.apache.org/docs/++version++/specification/#schema-declaration
+//! 
+//! ## File formats
+//! There are three official file formats for Avro. The data in these file 
formats is all encoded the same, but they differ
+//! in how the schema is included.
+//! 
+//! ### [Object Container 
File](https://avro.apache.org/docs/++version++/specification/#object-container-files)
+//! This is the most common file format used for Avro. It includes the schema 
in the file, and can therefore be decoded by
+//! a reader who doesn't have the schema. It also supports including many 
records in one file.
+//! 
+//! ### [Single Object 
Encoding](https://avro.apache.org/docs/++version++/specification/#single-object-encoding)
+//! In this file format, the schema is not included directly. It instead 
includes a fingerprint of the schema, which a reader
+//! can lookup in a schema database or compare with the fingerprint that the 
reader is expecting. This file format always contains
+//! one record.
+//! 
+//! ### Avro datums
+//! This is not really a file format, as it's just the raw Avro encoded data. 
It does not include a schema and can therefore not be
+//! decoded without the reader knowing **exactly** which schema was used to 
write it.
+//! 
diff --git a/avro/src/lib.rs b/avro/src/lib.rs
index d7aece5..35c8ee5 100644
--- a/avro/src/lib.rs
+++ b/avro/src/lib.rs
@@ -34,6 +34,8 @@
 //! **[Apache Avro](https://avro.apache.org/)** is a data serialization system 
which provides rich
 //! data structures and a compact, fast, binary data format.
 //!
+//! If you are not familiar with the data format, please read 
[`documentation::primer`] first.
+//!
 //! All data in Avro is schematized, as in the following example:
 //!
 //! ```json
@@ -55,64 +57,13 @@
 //! **apache-avro** provides a way to read and write both these data 
representations easily and
 //! efficiently.
 //!
-//! # Installing the library
-//!
-//!
-//! Add to your `Cargo.toml`:
-//!
-//! ```toml
-//! [dependencies]
-//! apache-avro = "x.y"
-//! ```
-//!
-//! Or in case you want to leverage the **Snappy** codec:
-//!
-//! ```toml
-//! [dependencies.apache-avro]
-//! version = "x.y"
-//! features = ["snappy"]
-//! ```
-//!
-//! Or in case you want to leverage the **Zstandard** codec:
-//!
-//! ```toml
-//! [dependencies.apache-avro]
-//! version = "x.y"
-//! features = ["zstandard"]
-//! ```
-//!
-//! Or in case you want to leverage the **Bzip2** codec:
-//!
-//! ```toml
-//! [dependencies.apache-avro]
-//! version = "x.y"
-//! features = ["bzip"]
-//! ```
-//!
-//! Or in case you want to leverage the **Xz** codec:
-//!
-//! ```toml
-//! [dependencies.apache-avro]
-//! version = "x.y"
-//! features = ["xz"]
-//! ```
-//!
-//! # Upgrading to a newer minor version
-//!
-//! The library is still in beta, so there might be backward-incompatible 
changes between minor
-//! versions. If you have troubles upgrading, check the release notes.
-//!
-//! # Minimum supported Rust version
-//!
-//! 1.88.0
-//!
 //! # Defining a schema
 //!
-//! An Avro data cannot exist without an Avro schema. Schemas **must** be used 
while writing and
+//! Avro data cannot exist without an Avro schema. Schemas **must** be used 
while writing and
 //! **can** be used while reading and they carry the information regarding the 
type of data we are
 //! handling. Avro schemas are used for both schema validation and resolution 
of Avro data.
 //!
-//! Avro schemas are defined in **JSON** format and can just be parsed out of 
a raw string:
+//! Avro schemas are defined in JSON format and can just be parsed out of a 
raw string:
 //!
 //! ```
 //! use apache_avro::Schema;
@@ -128,14 +79,10 @@
 //!     }
 //! "#;
 //!
-//! // if the schema is not valid, this function will return an error
 //! let schema = Schema::parse_str(raw_schema).unwrap();
-//!
-//! // schemas can be printed for debugging
-//! println!("{:?}", schema);
 //! ```
 //!
-//! Additionally, a list of of definitions (which may depend on each other) 
can be given and all of
+//! Additionally, a list of definitions (which may depend on each other) can 
be given and all of
 //! them will be parsed into the corresponding schemas.
 //!
 //! ```
@@ -158,22 +105,12 @@
 //!         ]
 //!     }"#;
 //!
-//! // if the schemas are not valid, this function will return an error
 //! let schemas = Schema::parse_list(&[raw_schema_1, raw_schema_2]).unwrap();
-//!
-//! // schemas can be printed for debugging
-//! println!("{:?}", schemas);
 //! ```
-//! *N.B.* It is important to note that the composition of schema definitions 
requires schemas with names.
-//! For this reason, only schemas of type Record, Enum, and Fixed should be 
input into this function.
-//!
-//! The library provides also a programmatic interface to define schemas 
without encoding them in
-//! JSON (for advanced use), but we highly recommend the JSON interface. 
Please read the API
-//! reference in case you are interested.
 //!
 //! For more information about schemas and what kind of information you can 
encapsulate in them,
 //! please refer to the appropriate section of the
-//! [Avro 
Specification](https://avro.apache.org/docs/current/specification/#schema-declaration).
+//! [Avro 
Specification](https://avro.apache.org/docs/++version++/specification/#schema-declaration).
 //!
 //! # Writing data
 //!
@@ -280,10 +217,6 @@
 //! let encoded = writer.into_inner();
 //! ```
 //!
-//! ### Importance of the fields' order
-//!
-//! *Important*: The order of the fields in the struct must match the order of 
the fields in the Avro schema!
-//!
 //! ### Simple types
 //!
 //! The vast majority of the times, schemas tend to define a record as a 
top-level container
@@ -752,37 +685,6 @@
 //!
 //! ```
 //!
-//! ## Check schemas compatibility
-//!
-//! This library supports checking for schemas compatibility.
-//!
-//! Examples of checking for compatibility:
-//!
-//! 1. Compatible schemas
-//!
-//! Explanation: an int array schema can be read by a long array schema- an int
-//! (32bit signed integer) fits into a long (64bit signed integer)
-//!
-//! ```rust
-//! use apache_avro::{Schema, schema_compatibility::SchemaCompatibility};
-//!
-//! let writers_schema = Schema::parse_str(r#"{"type": "array", 
"items":"int"}"#).unwrap();
-//! let readers_schema = Schema::parse_str(r#"{"type": "array", 
"items":"long"}"#).unwrap();
-//! assert!(SchemaCompatibility::can_read(&writers_schema, 
&readers_schema).is_ok());
-//! ```
-//!
-//! 2. Incompatible schemas (a long array schema cannot be read by an int 
array schema)
-//!
-//! Explanation: a long array schema cannot be read by an int array schema- a
-//! long (64bit signed integer) does not fit into an int (32bit signed integer)
-//!
-//! ```rust
-//! use apache_avro::{Schema, schema_compatibility::SchemaCompatibility};
-//!
-//! let writers_schema = Schema::parse_str(r#"{"type": "array", 
"items":"long"}"#).unwrap();
-//! let readers_schema = Schema::parse_str(r#"{"type": "array", 
"items":"int"}"#).unwrap();
-//! assert!(SchemaCompatibility::can_read(&writers_schema, 
&readers_schema).is_err());
-//! ```
 //! ## Custom names validators
 //!
 //! By default the library follows the rules by the
@@ -938,6 +840,21 @@
 //!   assert_eq!(records, deserialized_records);
 //! }
 //! ```
+//!
+//! # Features
+//!
+//! - `derive`: enable support for deriving [`AvroSchema`]
+//! - `snappy`: enable support for the Snappy codec
+//! - `zstandard`: enable support for the Zstandard codec
+//! - `bzip`: enable support for the Bzip2 codec
+//! - `xz`: enable support for the Xz codec
+//!
+//! # MSRV
+//!
+//! The current MSRV is 1.88.0.
+//!
+//! The MSRV may be bumped in minor releases.
+//!
 
 mod bigdecimal;
 mod bytes;
@@ -949,6 +866,8 @@ mod encode;
 mod reader;
 mod writer;
 
+#[cfg(doc)]
+pub mod documentation;
 pub mod error;
 pub mod headers;
 pub mod rabin;
diff --git a/avro/src/schema_compatibility.rs b/avro/src/schema_compatibility.rs
index 9e62978..aa0ab41 100644
--- a/avro/src/schema_compatibility.rs
+++ b/avro/src/schema_compatibility.rs
@@ -15,7 +15,48 @@
 // specific language governing permissions and limitations
 // under the License.
 
-//! Logic for checking schema compatibility
+//! Check if the reader's schema is compatible with the writer's schema.
+//!
+//! To allow for schema evolution, Avro supports resolving the writer's schema 
to the reader's schema.
+//! To check if this is possible, [`SchemaCompatibility`] can be used. For the 
complete rules see
+//! [the 
specification](https://avro.apache.org/docs/++version++/specification/#schema-resolution).
+//! 
+//! There are three levels of compatibility.
+//!
+//! 1. Fully compatible schemas (`Ok(Compatibility::Full)`)
+//!
+//! For example, an integer can always be resolved to a long:
+//!
+//! ```rust
+//! # use apache_avro::{Schema, schema_compatibility::{Compatibility, 
SchemaCompatibility}};
+//! let writers_schema = Schema::array(Schema::Int);
+//! let readers_schema = Schema::array(Schema::Long);
+//! assert_eq!(SchemaCompatibility::can_read(&writers_schema, 
&readers_schema), Ok(Compatibility::Full));
+//! ```
+//!
+//! 2. Incompatible schemas (`Err`)
+//!
+//! For example, a long can never be resolved to a long:
+//!
+//! ```rust
+//! # use apache_avro::{Schema, schema_compatibility::SchemaCompatibility};
+//! let writers_schema = Schema::array(Schema::Long);
+//! let readers_schema = Schema::array(Schema::Int);
+//! assert!(SchemaCompatibility::can_read(&writers_schema, 
&readers_schema).is_err());
+//! ```
+//! 
+//! 3. Partially compatible schemas (`Ok(Compatibility::Partial)`)
+//!
+//! For example, a union of a string and integer is only compatible with an 
integer if an integer was written:
+//! 
+//! ```rust
+//! # use apache_avro::{Error, Schema, schema_compatibility::{Compatibility, 
SchemaCompatibility}};
+//! let writers_schema = Schema::union(vec![Schema::Int, Schema::String])?;
+//! let readers_schema = Schema::Int;
+//! assert_eq!(SchemaCompatibility::can_read(&writers_schema, 
&readers_schema), Ok(Compatibility::Partial));
+//! # Ok::<(), Error>(())
+//! ```
+//! 
 use crate::{
     error::CompatibilityError,
     schema::{
@@ -31,9 +72,35 @@ use std::{
     ptr,
 };
 
+/// Check if two schemas can be resolved.
+/// 
+/// See [the module documentation] for more details.
+/// 
+/// [the module documentation]: crate::schema_compatibility
 pub struct SchemaCompatibility;
 
-/// How compatible are two schemas.
+impl SchemaCompatibility {
+    /// Recursively check if the reader's schema can be resolved to the 
writer's schema
+    pub fn can_read(
+        writers_schema: &Schema,
+        readers_schema: &Schema,
+    ) -> Result<Compatibility, CompatibilityError> {
+        let mut c = Checker::new();
+        c.can_read(writers_schema, readers_schema)
+    }
+
+    /// Recursively check if both schemas can be resolved to each other
+    pub fn mutual_read(
+        schema_a: &Schema,
+        schema_b: &Schema,
+    ) -> Result<Compatibility, CompatibilityError> {
+        let mut c = SchemaCompatibility::can_read(schema_a, schema_b)?;
+        c &= SchemaCompatibility::can_read(schema_b, schema_a)?;
+        Ok(c)
+    }
+}
+
+/// How compatible two schemas are.
 #[derive(Debug, Copy, Clone, Eq, PartialEq)]
 pub enum Compatibility {
     /// Full compatibility, resolving will always work.
@@ -375,29 +442,6 @@ impl Checker {
     }
 }
 
-impl SchemaCompatibility {
-    /// `can_read` performs a full, recursive check that a datum written using 
the
-    /// writers_schema can be read using the readers_schema.
-    pub fn can_read(
-        writers_schema: &Schema,
-        readers_schema: &Schema,
-    ) -> Result<Compatibility, CompatibilityError> {
-        let mut c = Checker::new();
-        c.can_read(writers_schema, readers_schema)
-    }
-
-    /// `mutual_read` performs a full, recursive check that a datum written 
using either
-    /// the writers_schema or the readers_schema can be read using the other 
schema.
-    pub fn mutual_read(
-        writers_schema: &Schema,
-        readers_schema: &Schema,
-    ) -> Result<Compatibility, CompatibilityError> {
-        let mut c = SchemaCompatibility::can_read(writers_schema, 
readers_schema)?;
-        c &= SchemaCompatibility::can_read(readers_schema, writers_schema)?;
-        Ok(c)
-    }
-}
-
 #[cfg(test)]
 mod tests {
     use std::collections::BTreeMap;
diff --git a/avro/src/serde/derive.rs b/avro/src/serde/derive.rs
index d883cc9..3a1dcbb 100644
--- a/avro/src/serde/derive.rs
+++ b/avro/src/serde/derive.rs
@@ -193,6 +193,8 @@ pub trait AvroSchema {
 /// implementation available through `derive` feature. This is what is 
implemented by
 /// the `derive(AvroSchema)` macro.
 ///
+/// TODO: Explain that field ordering is important
+///
 /// # Implementation guide
 ///
 /// ### Simple implementation

(avro-rs) 02/04: docs: Move documentation away from lib.rs

Reply via email to