This is an automated email from the ASF dual-hosted git repository.
kriskras99 pushed a commit to branch feat/documentation
in repository https://gitbox.apache.org/repos/asf/avro-rs.git
The following commit(s) were added to refs/heads/feat/documentation by this
push:
new e4a43c7 docs: Continue working on moving documentation away from
lib.rs and enable and fix some Clippy lints for documentation
e4a43c7 is described below
commit e4a43c7fd44f0d1e9f1596c8c162549b94b446b5
Author: Kriskras99 <[email protected]>
AuthorDate: Thu Jan 29 22:12:39 2026 +0100
docs: Continue working on moving documentation away from lib.rs and enable
and fix some Clippy lints for documentation
---
Cargo.toml | 5 ++
avro/src/documentation/mod.rs | 2 +-
avro/src/documentation/primer.rs | 61 +++++++++++++---
avro/src/duration.rs | 4 +-
avro/src/error.rs | 4 +-
avro/src/headers.rs | 25 ++++---
avro/src/lib.rs | 86 +++-------------------
avro/src/rabin.rs | 12 ++--
avro/src/reader.rs | 12 ++--
avro/src/schema/mod.rs | 46 +++++++-----
avro/src/schema/name.rs | 15 ++--
avro/src/schema/parser.rs | 42 +++++------
avro/src/schema/union.rs | 2 +-
avro/src/schema_compatibility.rs | 12 ++--
avro/src/schema_equality.rs | 18 +++--
avro/src/serde/derive.rs | 152 +++++++++++++++++++++++++++------------
avro/src/serde/mod.rs | 11 ++-
avro/src/serde/ser_schema.rs | 41 ++++++-----
avro/src/types.rs | 8 +--
avro/src/util.rs | 19 ++---
avro/src/validator.rs | 63 +++++++++-------
avro/src/writer.rs | 19 +++--
avro_derive/build.rs | 2 +-
avro_derive/src/case.rs | 16 ++---
avro_test_helper/src/lib.rs | 3 +-
25 files changed, 366 insertions(+), 314 deletions(-)
diff --git a/Cargo.toml b/Cargo.toml
index f5a1231..06b1dca 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -54,4 +54,9 @@ opt-level = "s"
[workspace.lints]
# By setting the priority to -1, this lint group can be overridden for
specific lints
clippy.all = { level = "warn", priority = -1 }
+clippy.too_long_first_doc_paragraph = "warn"
+clippy.doc_markdown = "warn"
+# TODO: Needs more work
+#clippy.missing_errors_doc = "warn"
+#clippy.missing_panics_doc = "warn"
rust.unexpected_cfgs = { level = "warn", check-cfg = ['cfg(nightly)'] }
diff --git a/avro/src/documentation/mod.rs b/avro/src/documentation/mod.rs
index a7ff9b1..b138cf5 100644
--- a/avro/src/documentation/mod.rs
+++ b/avro/src/documentation/mod.rs
@@ -2,4 +2,4 @@
//!
//! This module does not contain any code, and is only available during
`rustdoc` builds.
-pub mod primer;
\ No newline at end of file
+pub mod primer;
diff --git a/avro/src/documentation/primer.rs b/avro/src/documentation/primer.rs
index 918ede5..392df76 100644
--- a/avro/src/documentation/primer.rs
+++ b/avro/src/documentation/primer.rs
@@ -5,7 +5,7 @@
//! efficient.
//!
//! ## Schemas
-//!
+//!
//! Schemas are defined in JSON and look like this:
//! ```json
//! {
@@ -18,23 +18,62 @@
//! }
//! ```
//! For all possible types and extra attributes, see [the schema section of
the specification].
-//!
+//!
+//! Schemas can depend on each other. For example, the schema defined above
can be used again or a
+//! schema can include itself:
+//! ```json
+//! {
+//! "type": "record",
+//! "name": "references",
+//! "fields": [
+//! {"name": "a", "type": "example"},
+//! {"name": "b", "type": "bytes"},
+//! {"name": "recursive", "type": ["null", "references"]}
+//! ]
+//! }
+//! ```
+//!
+//! Schemas are represented using the [`Schema`](crate::Schema) type.
+//!
//! [the schema section of the specification]:
https://avro.apache.org/docs/++version++/specification/#schema-declaration
-//!
+//!
//! ## File formats
//! There are three official file formats for Avro. The data in these file
formats is all encoded the same, but they differ
//! in how the schema is included.
-//!
-//! ### [Object Container
File](https://avro.apache.org/docs/++version++/specification/#object-container-files)
+//!
+//! #### [Object Container
File](https://avro.apache.org/docs/++version++/specification/#object-container-files)
//! This is the most common file format used for Avro. It includes the schema
in the file, and can therefore be decoded by
//! a reader who doesn't have the schema. It also supports including many
records in one file.
-//!
-//! ### [Single Object
Encoding](https://avro.apache.org/docs/++version++/specification/#single-object-encoding)
+//!
+//! This file format can be used via the [`Reader`](crate::Reader) and
[`Writer`](crate::Writer) types.
+//!
+//! #### [Single Object
Encoding](https://avro.apache.org/docs/++version++/specification/#single-object-encoding)
//! In this file format, the schema is not included directly. It instead
includes a fingerprint of the schema, which a reader
-//! can lookup in a schema database or compare with the fingerprint that the
reader is expecting. This file format always contains
+//! can look up in a schema database or compare with the fingerprint that the
reader is expecting. This file format always contains
//! one record.
-//!
-//! ### Avro datums
+//!
+//! This file format can be used via the
[`GenericSingleObjectReader`](crate::GenericSingleObjectReader),
+//! [`GenericSingleObjectWriter`](crate::GenericSingleObjectReader),
[`SpecificSingleObjectReader`](crate::SpecificSingleObjectReader),
+//! and [`SpecificSingleObjectWriter`](crate::SpecificSingleObjectWriter)
types.
+//!
+//! #### Avro datums
//! This is not really a file format, as it's just the raw Avro encoded data.
It does not include a schema and can therefore not be
//! decoded without the reader knowing **exactly** which schema was used to
write it.
-//!
+//!
+//! This file format can be used via the
[`to_avro_datum`](crate::to_avro_datum),
[`from_avro_datum`](crate::from_avro_datum),
+//! [`to_avro_datum_schemata`](crate::to_avro_datum_schemata),
[`from_avro_datum_schemata`](crate::from_avro_datum_schemata),
+//!
[`from_avro_datum_reader_schemata`](crate::from_avro_datum_reader_schemata), and
+//! [`write_avro_datum_ref`](crate::write_avro_datum_ref) functions.
+//!
+//! ## Compression
+//! For records with low entropy it can be useful to compress the encoded
data. Using the [#Object Container File]
+//! this is directly possible in Avro. Avro supports various compression
codecs:
+//!
+//! - deflate
+//! - bzip2
+//! - Snappy
+//! - XZ
+//! - Zstandard
+//!
+//! All readers are required to implement the `deflate` codec, but most
implementation implement most
+//! codecs.
diff --git a/avro/src/duration.rs b/avro/src/duration.rs
index cf24bc0..eecfca1 100644
--- a/avro/src/duration.rs
+++ b/avro/src/duration.rs
@@ -15,10 +15,10 @@
// specific language governing permissions and limitations
// under the License.
-/// A struct representing duration that hides the details of endianness and
conversion between
-/// platform-native u32 and byte arrays.
use serde::{Deserialize, Serialize, de};
+/// A struct representing duration that hides the details of endianness and
conversion between
+/// platform-native u32 and byte arrays.
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
pub struct Duration {
months: Months,
diff --git a/avro/src/error.rs b/avro/src/error.rs
index cbb00c0..4bd6a25 100644
--- a/avro/src/error.rs
+++ b/avro/src/error.rs
@@ -554,11 +554,11 @@ pub enum Details {
#[error("Failed to convert JSON to string: {0}")]
ConvertJsonToString(#[source] serde_json::Error),
- /// Error while converting float to json value
+ /// Error while converting float to JSON value
#[error("failed to convert avro float to json: {0}")]
ConvertF64ToJson(f64),
- /// Error while resolving Schema::Ref
+ /// Error while resolving [`Schema::Ref`]
#[error("Unresolved schema reference: {0}")]
SchemaResolutionError(Name),
diff --git a/avro/src/headers.rs b/avro/src/headers.rs
index dce134f..8a97f7b 100644
--- a/avro/src/headers.rs
+++ b/avro/src/headers.rs
@@ -20,17 +20,18 @@ use uuid::Uuid;
use crate::{AvroResult, Schema, rabin::Rabin, schema::SchemaFingerprint};
-/// This trait represents that an object is able to construct an Avro message
header. It is
-/// implemented for some known header types already. If you need a header type
that is not already
-/// included here, then you can create your own struct and implement this
trait.
+/// This trait represents that an object is able to construct an Avro message
header.
+///
+/// It is implemented for some known header types already. If you need a
header type that is not
+/// already included here, then you can create your own struct and implement
this trait.
pub trait HeaderBuilder {
fn build_header(&self) -> Vec<u8>;
}
-/// HeaderBuilder based on the Rabin schema fingerprint
+/// [`HeaderBuilder`] based on the Rabin schema fingerprint.
///
/// This is the default and will be used automatically by the `new` impls in
-/// [crate::reader::GenericSingleObjectReader] and
[crate::writer::GenericSingleObjectWriter].
+/// [`GenericSingleObjectReader`](crate::GenericSingleObjectReader) and
[`GenericSingleObjectWriter`](crate::GenericSingleObjectWriter).
pub struct RabinFingerprintHeader {
fingerprint: SchemaFingerprint,
}
@@ -53,16 +54,15 @@ impl HeaderBuilder for RabinFingerprintHeader {
}
}
-/// HeaderBuilder based on
-/// [Glue](https://docs.aws.amazon.com/glue/latest/dg/what-is-glue.html)
schema UUID
+/// [`HeaderBuilder`] for
[Glue](https://docs.aws.amazon.com/glue/latest/dg/what-is-glue.html).
///
-/// See the function docs for usage details
+/// See the function docs for usage details.
pub struct GlueSchemaUuidHeader {
schema_uuid: Uuid,
}
impl GlueSchemaUuidHeader {
- /// Create an instance of the struct from a Glue Schema UUID
+ /// Create an instance of the struct from a Glue Schema UUID.
///
/// Code for writing messages will most likely want to use this. You will
need to determine
/// via other means the correct Glue schema UUID and use it with this
method to be able to
@@ -72,12 +72,11 @@ impl GlueSchemaUuidHeader {
}
/// The minimum length of a Glue header.
- /// 2 bytes for the special prefix (3, 0) plus
- /// 16 bytes for the Uuid
+ ///
+ /// 2 bytes for the special prefix (3, 0) plus 16 bytes for the Uuid.
const GLUE_HEADER_LENGTH: usize = 18;
- /// Create an instance of the struct based on parsing the UUID out of the
header of a raw
- /// message
+ /// Create an instance of the struct based on parsing the UUID out of the
header of a raw message
///
/// Code for reading messages will most likely want to use this. Once you
receive the raw bytes
/// of a message, use this function to build the struct from it. That
struct can then be used
diff --git a/avro/src/lib.rs b/avro/src/lib.rs
index d8dd0f9..3fd312c 100644
--- a/avro/src/lib.rs
+++ b/avro/src/lib.rs
@@ -32,86 +32,17 @@
//! Please check our [documentation](https://docs.rs/apache-avro) for
examples, tutorials and API reference.
//!
//! **[Apache Avro](https://avro.apache.org/)** is a data serialization system
which provides rich
-//! data structures and a compact, fast, binary data format.
+//! data structures and a compact, fast, binary data format. If you are not
familiar with the data
+//! format, please read [`documentation::primer`] first.
//!
-//! If you are not familiar with the data format, please read
[`documentation::primer`] first.
+//! There are two ways of working with Avro data in this crate:
//!
-//! All data in Avro is schematized, as in the following example:
-//!
-//! ```json
-//! {
-//! "type": "record",
-//! "name": "test",
-//! "fields": [
-//! {"name": "a", "type": "long", "default": 42},
-//! {"name": "b", "type": "string"}
-//! ]
-//! }
-//! ```
-//!
-//! There are basically two ways of handling Avro data in Rust:
-//!
-//! * **as Avro-specialized data types** based on an Avro schema;
-//! * **as generic Rust serde-compatible types** implementing/deriving
`Serialize` and `Deserialize`;
+//! * Via the generic [`Value`](types::Value) type, which allows for
dynamically dealing with data at runtime.
+//! * Via types implementing [`Serialize`](::serde::Serialize),
[`Deserialize`](::serde::Deserialize), and [`AvroSchema`].
//!
//! **apache-avro** provides a way to read and write both these data
representations easily and
//! efficiently.
//!
-//! # Defining a schema
-//!
-//! Avro data cannot exist without an Avro schema. Schemas **must** be used
while writing and
-//! **can** be used while reading and they carry the information regarding the
type of data we are
-//! handling. Avro schemas are used for both schema validation and resolution
of Avro data.
-//!
-//! Avro schemas are defined in JSON format and can just be parsed out of a
raw string:
-//!
-//! ```
-//! use apache_avro::Schema;
-//!
-//! let raw_schema = r#"
-//! {
-//! "type": "record",
-//! "name": "test",
-//! "fields": [
-//! {"name": "a", "type": "long", "default": 42},
-//! {"name": "b", "type": "string"}
-//! ]
-//! }
-//! "#;
-//!
-//! let schema = Schema::parse_str(raw_schema).unwrap();
-//! ```
-//!
-//! Additionally, a list of definitions (which may depend on each other) can
be given and all of
-//! them will be parsed into the corresponding schemas.
-//!
-//! ```
-//! use apache_avro::Schema;
-//!
-//! let raw_schema_1 = r#"{
-//! "name": "A",
-//! "type": "record",
-//! "fields": [
-//! {"name": "field_one", "type": "float"}
-//! ]
-//! }"#;
-//!
-//! // This definition depends on the definition of A above
-//! let raw_schema_2 = r#"{
-//! "name": "B",
-//! "type": "record",
-//! "fields": [
-//! {"name": "field_one", "type": "A"}
-//! ]
-//! }"#;
-//!
-//! let schemas = Schema::parse_list(&[raw_schema_1, raw_schema_2]).unwrap();
-//! ```
-//!
-//! For more information about schemas and what kind of information you can
encapsulate in them,
-//! please refer to the appropriate section of the
-//! [Avro
Specification](https://avro.apache.org/docs/++version++/specification/#schema-declaration).
-//!
//! # Writing data
//!
//! Once we have defined a schema, we are ready to serialize data in Avro,
validating them against
@@ -724,14 +655,13 @@
//!
//! The library provides two implementations of schema equality comparators:
//! 1. `SpecificationEq` - a comparator that serializes the schemas to their
-//! canonical forms (i.e. JSON) and compares them as strings. It is the
only implementation
-//! until apache_avro 0.16.0.
+//! canonical forms (i.e. JSON) and compares them as strings.
//! See the [Avro
specification](https://avro.apache.org/docs/1.11.1/specification/#parsing-canonical-form-for-schemas)
//! for more information!
//! 2. `StructFieldEq` - a comparator that compares the schemas structurally.
//! It is faster than the `SpecificationEq` because it returns `false` as
soon as a difference
-//! is found and is recommended for use!
-//! It is the default comparator since apache_avro 0.17.0.
+//! is found and does not require encoding the schema to JSON.
+//! It is the default comparator.
//!
//! To use a custom comparator, you need to implement the `SchemataEq` trait
and set it using the
//! `set_schemata_equality_comparator` function:
diff --git a/avro/src/rabin.rs b/avro/src/rabin.rs
index 2587a6d..497f476 100644
--- a/avro/src/rabin.rs
+++ b/avro/src/rabin.rs
@@ -43,12 +43,12 @@ fn fp_table() -> &'static [i64; 256] {
})
}
-/// Implementation of the Rabin fingerprint algorithm using the Digest trait
as described in
[schema_fingerprints](https://avro.apache.org/docs/current/specification/#schema-fingerprints).
+/// Implementation of the Rabin fingerprint algorithm using the
[`Digest`](digest::Digest) trait as described in [schema fingerprints].
///
/// The digest is returned as the 8-byte little-endian encoding of the Rabin
hash.
-/// This is what is used for avro [single object
encoding](https://avro.apache.org/docs/current/specification/#single-object-encoding)
+/// This is what is used for Avro [single object encoding]
///
-/// ```rust
+/// ```
/// use apache_avro::rabin::Rabin;
/// use digest::Digest;
/// use hex_literal::hex;
@@ -65,9 +65,9 @@ fn fp_table() -> &'static [i64; 256] {
/// assert_eq!(result[..], hex!("60335ba6d0415528"));
/// ```
///
-/// To convert the digest to the commonly used 64-bit integer value, you can
use the i64::from_le_bytes() function
+/// To convert the digest to the commonly used 64-bit integer value, you can
use the [`i64::from_le_bytes()`] function
///
-/// ```rust
+/// ```
/// # use apache_avro::rabin::Rabin;
/// # use digest::Digest;
/// # use hex_literal::hex;
@@ -84,6 +84,8 @@ fn fp_table() -> &'static [i64; 256] {
///
/// assert_eq!(i, 2906301498937520992)
/// ```
+/// [single object
encoding](https://avro.apache.org/docs/current/specification/#single-object-encoding)
+/// [schema
fingerprints](https://avro.apache.org/docs/current/specification/#schema-fingerprints)
#[derive(Clone)]
pub struct Rabin {
result: i64,
diff --git a/avro/src/reader.rs b/avro/src/reader.rs
index 967edb5..d101ea1 100644
--- a/avro/src/reader.rs
+++ b/avro/src/reader.rs
@@ -460,12 +460,12 @@ pub fn from_avro_datum<R: Read>(
}
}
-/// Decode a `Value` encoded in Avro format given the provided `Schema` and
anything implementing `io::Read`
-/// to read from.
+/// Decode a `Value` from raw Avro data.
+///
/// If the writer schema is incomplete, i.e. contains `Schema::Ref`s then it
will use the provided
/// schemata to resolve any dependencies.
///
-/// In case a reader `Schema` is provided, schema resolution will also be
performed.
+/// When a reader `Schema` is provided, schema resolution will also be
performed.
pub fn from_avro_datum_schemata<R: Read>(
writer_schema: &Schema,
writer_schemata: Vec<&Schema>,
@@ -481,12 +481,12 @@ pub fn from_avro_datum_schemata<R: Read>(
)
}
-/// Decode a `Value` encoded in Avro format given the provided `Schema` and
anything implementing `io::Read`
-/// to read from.
+/// Decode a `Value` from raw Avro data.
+///
/// If the writer schema is incomplete, i.e. contains `Schema::Ref`s then it
will use the provided
/// schemata to resolve any dependencies.
///
-/// In case a reader `Schema` is provided, schema resolution will also be
performed.
+/// When a reader `Schema` is provided, schema resolution will also be
performed.
pub fn from_avro_datum_reader_schemata<R: Read>(
writer_schema: &Schema,
writer_schemata: Vec<&Schema>,
diff --git a/avro/src/schema/mod.rs b/avro/src/schema/mod.rs
index 3e6e4b1..9a0cb0b 100644
--- a/avro/src/schema/mod.rs
+++ b/avro/src/schema/mod.rs
@@ -52,7 +52,8 @@ use parser::Parser;
/// Represents documentation for complex Avro schemas.
pub type Documentation = Option<String>;
-/// Represents an Avro schema fingerprint
+/// Represents an Avro schema fingerprint.
+///
/// More information about Avro schema fingerprints can be found in the
/// [Avro Schema Fingerprint
documentation](https://avro.apache.org/docs/current/specification/#schema-fingerprints)
pub struct SchemaFingerprint {
@@ -75,7 +76,7 @@ impl fmt::Display for SchemaFingerprint {
/// Represents any valid Avro schema
/// More information about Avro schemas can be found in the
-/// [Avro
Specification](https://avro.apache.org/docs/current/specification/#schema-declaration)
+/// [Avro
Specification](https://avro.apache.org/docs/++version++/specification/#schema-declaration)
#[derive(Clone, Debug, EnumDiscriminants, Display)]
#[strum_discriminants(name(SchemaKind), derive(Hash, Ord, PartialOrd))]
pub enum Schema {
@@ -92,17 +93,20 @@ pub enum Schema {
/// A `double` Avro schema.
Double,
/// A `bytes` Avro schema.
+ ///
/// `Bytes` represents a sequence of 8-bit unsigned bytes.
Bytes,
/// A `string` Avro schema.
+ ///
/// `String` represents a unicode character sequence.
String,
- /// A `array` Avro schema. Avro arrays are required to have the same type
for each element.
- /// This variant holds the `Schema` for the array element type.
+ /// A `array` Avro schema.
+ ///
+ /// All items will have the same schema.
Array(ArraySchema),
/// A `map` Avro schema.
- /// `Map` holds a pointer to the `Schema` of its values, which must all be
the same schema.
- /// `Map` keys are assumed to be `string`.
+ ///
+ /// Keys are always a `Schema::String` and all values will have the same
schema.
Map(MapSchema),
/// A `union` Avro schema.
Union(UnionSchema),
@@ -112,22 +116,27 @@ pub enum Schema {
Enum(EnumSchema),
/// A `fixed` Avro schema.
Fixed(FixedSchema),
- /// Logical type which represents `Decimal` values. The underlying type is
serialized and
- /// deserialized as `Schema::Bytes` or `Schema::Fixed`.
+ /// Logical type which represents `Decimal` values.
+ ///
+ /// The underlying type is serialized and deserialized as `Schema::Bytes`
or `Schema::Fixed`.
Decimal(DecimalSchema),
/// Logical type which represents `Decimal` values without predefined
scale.
+ ///
/// The underlying type is serialized and deserialized as `Schema::Bytes`
BigDecimal,
/// A universally unique identifier, annotating a string, bytes or fixed.
Uuid(UuidSchema),
/// Logical type which represents the number of days since the unix epoch.
+ ///
/// Serialization format is `Schema::Int`.
Date,
- /// The time of day in number of milliseconds after midnight with no
reference any calendar,
- /// time zone or date in particular.
+ /// The time of day in number of milliseconds after midnight.
+ ///
+ /// This type has no reference to any calendar, time zone or date in
particular.
TimeMillis,
- /// The time of day in number of microseconds after midnight with no
reference any calendar,
- /// time zone or date in particular.
+ /// The time of day in number of microseconds after midnight.
+ ///
+ /// This type has no reference to any calendar, time zone or date in
particular.
TimeMicros,
/// An instant in time represented as the number of milliseconds after the
UNIX epoch.
TimestampMillis,
@@ -829,7 +838,7 @@ impl Schema {
}
}
- /// Returns a Schema::Map with the given types.
+ /// Returns a `Schema::Map` with the given types.
pub fn map(types: Schema) -> Self {
Schema::Map(MapSchema {
types: Box::new(types),
@@ -837,7 +846,7 @@ impl Schema {
})
}
- /// Returns a Schema::Map with the given types and custom attributes.
+ /// Returns a `Schema::Map` with the given types and custom attributes.
pub fn map_with_attributes(types: Schema, attributes: BTreeMap<String,
Value>) -> Self {
Schema::Map(MapSchema {
types: Box::new(types),
@@ -845,7 +854,7 @@ impl Schema {
})
}
- /// Returns a Schema::Array with the given items.
+ /// Returns a `Schema::Array` with the given items.
pub fn array(items: Schema) -> Self {
Schema::Array(ArraySchema {
items: Box::new(items),
@@ -853,7 +862,7 @@ impl Schema {
})
}
- /// Returns a Schema::Array with the given items and custom attributes.
+ /// Returns a `Schema::Array` with the given items and custom attributes.
pub fn array_with_attributes(items: Schema, attributes: BTreeMap<String,
Value>) -> Self {
Schema::Array(ArraySchema {
items: Box::new(items),
@@ -1124,8 +1133,9 @@ impl Serialize for Schema {
}
}
-/// Parses a **valid** avro schema into the Parsing Canonical Form.
-///
https://avro.apache.org/docs/current/specification/#parsing-canonical-form-for-schemas
+/// Parses a valid Avro schema into [the Parsing Canonical Form].
+///
+/// [the Parsing Canonical
From](https://avro.apache.org/docs/current/specification/#parsing-canonical-form-for-schemas)
fn parsing_canonical_form(schema: &Value, defined_names: &mut HashSet<String>)
-> String {
match schema {
Value::Object(map) => pcf_map(map, defined_names),
diff --git a/avro/src/schema/name.rs b/avro/src/schema/name.rs
index 624443f..57c1cae 100644
--- a/avro/src/schema/name.rs
+++ b/avro/src/schema/name.rs
@@ -121,18 +121,19 @@ impl Name {
}
}
- /// Return the fully qualified name needed for indexing or searching for
the schema within a schema/schema env context. Puts the enclosing namespace
into the name's namespace for clarity in schema/schema env parsing
- /// ```ignore
- /// use apache_avro::schema::Name;
+ /// Construct the fully qualified name
///
+ /// ```
+ /// # use apache_avro::{Error, schema::Name};
/// assert_eq!(
- ///
Name::new("some_name")?.fully_qualified_name(&Some("some_namespace".into())),
- /// Name::new("some_namespace.some_name")?
+ ///
Name::new("some_name")?.fully_qualified_name(&Some("some_namespace".into())),
+ /// Name::new("some_namespace.some_name")?
/// );
/// assert_eq!(
- ///
Name::new("some_namespace.some_name")?.fully_qualified_name(&Some("other_namespace".into())),
- /// Name::new("some_namespace.some_name")?
+ ///
Name::new("some_namespace.some_name")?.fully_qualified_name(&Some("other_namespace".into())),
+ /// Name::new("some_namespace.some_name")?
/// );
+ /// # Ok::<(), Error>(())
/// ```
pub fn fully_qualified_name(&self, enclosing_namespace: &Namespace) ->
Name {
Name {
diff --git a/avro/src/schema/parser.rs b/avro/src/schema/parser.rs
index ced79bc..2f3f6a1 100644
--- a/avro/src/schema/parser.rs
+++ b/avro/src/schema/parser.rs
@@ -33,12 +33,10 @@ use std::collections::{BTreeMap, HashMap, HashSet};
#[derive(Default)]
pub(crate) struct Parser {
input_schemas: HashMap<Name, Value>,
- /// A map of name -> Schema::Ref
/// Used to resolve cyclic references, i.e. when a
/// field's type is a reference to its record's type
resolving_schemas: Names,
input_order: Vec<Name>,
- /// A map of name -> fully parsed Schema
/// Used to avoid parsing the same schema twice
parsed_schemas: Names,
}
@@ -67,8 +65,9 @@ impl Parser {
self.parse(&value, &None)
}
- /// Create an array of `Schema`'s from an iterator of JSON Avro schemas.
It is allowed that
- /// the schemas have cross-dependencies; these will be resolved during
parsing.
+ /// Create an array of `Schema`'s from an iterator of JSON Avro schemas.
+ ///
+ /// It is allowed that the schemas have cross-dependencies; these will be
resolved during parsing.
pub(super) fn parse_list(&mut self) -> AvroResult<Vec<Schema>> {
self.parse_input_schemas()?;
@@ -83,7 +82,7 @@ impl Parser {
Ok(parsed_schemas)
}
- /// Convert the input schemas to parsed_schemas
+ /// Convert the input schemas to `parsed_schemas`.
pub(super) fn parse_input_schemas(&mut self) -> Result<(), Error> {
while !self.input_schemas.is_empty() {
let next_name = self
@@ -103,8 +102,7 @@ impl Parser {
Ok(())
}
- /// Create a `Schema` from a `serde_json::Value` representing a JSON Avro
- /// schema.
+ /// Create a `Schema` from a `serde_json::Value` representing a JSON Avro
schema.
pub(super) fn parse(
&mut self,
value: &Value,
@@ -120,9 +118,7 @@ impl Parser {
}
}
- /// Parse a `serde_json::Value` representing an Avro type whose Schema is
known into a
- /// `Schema`. A Schema for a `serde_json::Value` is known if it is
primitive or has
- /// been parsed previously by the parsed and stored in its map of
parsed_schemas.
+ /// Parse a string as a primitive type or reference to `parsed_schemas`.
fn parse_known_schema(
&mut self,
name: &str,
@@ -142,9 +138,10 @@ impl Parser {
}
/// Given a name, tries to retrieve the parsed schema from
`parsed_schemas`.
+ ///
/// If a parsed schema is not found, it checks if a currently resolving
/// schema with that name exists.
- /// If a resolving schema is not found, it checks if a json with that name
exists
+ /// If a resolving schema is not found, it checks if a JSON with that name
exists
/// in `input_schemas` and then parses it (removing it from
`input_schemas`)
/// and adds the parsed schema to `parsed_schemas`.
///
@@ -240,11 +237,10 @@ impl Parser {
}
}
- /// Parse a `serde_json::Value` representing a complex Avro type into a
- /// `Schema`.
+ /// Parse a `serde_json::Value` representing a complex Avro type into a
`Schema`.
///
/// Avro supports "recursive" definition of types.
- /// e.g: {"type": {"type": "string"}}
+ /// e.g: `{"type": {"type": "string"}}`
pub(super) fn parse_complex(
&mut self,
complex: &Map<String, Value>,
@@ -539,8 +535,7 @@ impl Parser {
}
}
- /// Parse a `serde_json::Value` representing a Avro record type into a
- /// `Schema`.
+ /// Parse a `serde_json::Value` representing a Avro record type into a
`Schema`.
fn parse_record(
&mut self,
complex: &Map<String, Value>,
@@ -619,8 +614,7 @@ impl Parser {
custom_attributes
}
- /// Parse a `serde_json::Value` representing a Avro enum type into a
- /// `Schema`.
+ /// Parse a `serde_json::Value` representing a Avro enum type into a
`Schema`.
fn parse_enum(
&mut self,
complex: &Map<String, Value>,
@@ -697,8 +691,7 @@ impl Parser {
Ok(schema)
}
- /// Parse a `serde_json::Value` representing a Avro array type into a
- /// `Schema`.
+ /// Parse a `serde_json::Value` representing a Avro array type into a
`Schema`.
fn parse_array(
&mut self,
complex: &Map<String, Value>,
@@ -716,8 +709,7 @@ impl Parser {
})
}
- /// Parse a `serde_json::Value` representing a Avro map type into a
- /// `Schema`.
+ /// Parse a `serde_json::Value` representing a Avro map type into a
`Schema`.
fn parse_map(
&mut self,
complex: &Map<String, Value>,
@@ -735,8 +727,7 @@ impl Parser {
})
}
- /// Parse a `serde_json::Value` representing a Avro union type into a
- /// `Schema`.
+ /// Parse a `serde_json::Value` representing a Avro union type into a
`Schema`.
fn parse_union(
&mut self,
items: &[Value],
@@ -764,8 +755,7 @@ impl Parser {
})
}
- /// Parse a `serde_json::Value` representing a Avro fixed type into a
- /// `Schema`.
+ /// Parse a `serde_json::Value` representing a Avro fixed type into a
`Schema`.
fn parse_fixed(
&mut self,
complex: &Map<String, Value>,
diff --git a/avro/src/schema/union.rs b/avro/src/schema/union.rs
index 5bf631a..7510a13 100644
--- a/avro/src/schema/union.rs
+++ b/avro/src/schema/union.rs
@@ -36,7 +36,7 @@ pub struct UnionSchema {
}
impl UnionSchema {
- /// Creates a new UnionSchema from a vector of schemas.
+ /// Creates a new `UnionSchema` from a vector of schemas.
///
/// # Errors
/// Will return an error if `schemas` has duplicate unnamed schemas or if
`schemas`
diff --git a/avro/src/schema_compatibility.rs b/avro/src/schema_compatibility.rs
index 3df2a9d..f176a04 100644
--- a/avro/src/schema_compatibility.rs
+++ b/avro/src/schema_compatibility.rs
@@ -20,7 +20,7 @@
//! To allow for schema evolution, Avro supports resolving the writer's schema
to the reader's schema.
//! To check if this is possible, [`SchemaCompatibility`] can be used. For the
complete rules see
//! [the
specification](https://avro.apache.org/docs/++version++/specification/#schema-resolution).
-//!
+//!
//! There are three levels of compatibility.
//!
//! 1. Fully compatible schemas (`Ok(Compatibility::Full)`)
@@ -44,11 +44,11 @@
//! let readers_schema = Schema::array(Schema::Int);
//! assert!(SchemaCompatibility::can_read(&writers_schema,
&readers_schema).is_err());
//! ```
-//!
+//!
//! 3. Partially compatible schemas (`Ok(Compatibility::Partial)`)
//!
//! For example, a union of a string and integer is only compatible with an
integer if an integer was written:
-//!
+//!
//! ```rust
//! # use apache_avro::{Error, Schema, schema_compatibility::{Compatibility,
SchemaCompatibility}};
//! let writers_schema = Schema::union(vec![Schema::Int, Schema::String])?;
@@ -56,7 +56,7 @@
//! assert_eq!(SchemaCompatibility::can_read(&writers_schema,
&readers_schema), Ok(Compatibility::Partial));
//! # Ok::<(), Error>(())
//! ```
-//!
+//!
use crate::{
error::CompatibilityError,
schema::{
@@ -73,9 +73,9 @@ use std::{
};
/// Check if two schemas can be resolved.
-///
+///
/// See [the module documentation] for more details.
-///
+///
/// [the module documentation]: crate::schema_compatibility
pub struct SchemaCompatibility;
diff --git a/avro/src/schema_equality.rs b/avro/src/schema_equality.rs
index cd1c2cd..ecc793b 100644
--- a/avro/src/schema_equality.rs
+++ b/avro/src/schema_equality.rs
@@ -27,15 +27,16 @@ use log::debug;
use std::{fmt::Debug, sync::OnceLock};
/// A trait that compares two schemata for equality.
-/// To register a custom one use [set_schemata_equality_comparator].
+///
+/// To register a custom one use [`set_schemata_equality_comparator`].
pub trait SchemataEq: Debug + Send + Sync {
/// Compares two schemata for equality.
fn compare(&self, schema_one: &Schema, schema_two: &Schema) -> bool;
}
-/// Compares two schemas according to the Avro specification by using
-/// their canonical forms.
-/// See
<https://avro.apache.org/docs/1.11.1/specification/#parsing-canonical-form-for-schemas>
+/// Compares two schemas according to the Avro specification by using [their
canonical forms].
+///
+/// [their canonical
forms](https://avro.apache.org/docs/1.11.1/specification/#parsing-canonical-form-for-schemas)
#[derive(Debug)]
pub struct SpecificationEq;
impl SchemataEq for SpecificationEq {
@@ -44,12 +45,15 @@ impl SchemataEq for SpecificationEq {
}
}
-/// Compares two schemas for equality field by field, using only the fields
that
-/// are used to construct their canonical forms.
-/// See
<https://avro.apache.org/docs/1.11.1/specification/#parsing-canonical-form-for-schemas>
+/// Compares [the canonical forms] of two schemas for equality field by field.
+///
+/// This means that attributes like `aliases`, `doc`, `default` and
`logicalType` are ignored.
+///
+/// [the canonical
forms](https://avro.apache.org/docs/1.11.1/specification/#parsing-canonical-form-for-schemas)
#[derive(Debug)]
pub struct StructFieldEq {
/// Whether to include custom attributes in the comparison.
+ ///
/// The custom attributes are not used to construct the canonical form of
the schema!
pub include_attributes: bool,
}
diff --git a/avro/src/serde/derive.rs b/avro/src/serde/derive.rs
index 19eaac8..341f0c7 100644
--- a/avro/src/serde/derive.rs
+++ b/avro/src/serde/derive.rs
@@ -16,7 +16,7 @@
// under the License.
use crate::Schema;
-use crate::schema::{FixedSchema, Name, Names, Namespace, UnionSchema,
UuidSchema};
+use crate::schema::{FixedSchema, Name, Names, Namespace, RecordField,
UnionSchema, UuidSchema};
use serde_json::Map;
use std::borrow::Cow;
use std::collections::HashMap;
@@ -57,14 +57,14 @@ use std::collections::HashMap;
/// This will implement [`AvroSchemaComponent`] for the type, and `AvroSchema`
/// through the blanket implementation for `T: AvroSchemaComponent`.
///
-/// Every member of the `struct` and `enum` must also implement
`AvroSchemaComponent`.
+/// When deriving `struct`s, every member must also implement
`AvroSchemaComponent`.
///
/// ## Changing the generated schema
///
/// The derive macro will read both the `avro` and `serde` attributes to
modify the generated schema.
/// It will also check for compatibility between the various attributes.
///
-/// ### Container attributes
+/// #### Container attributes
///
/// - `#[serde(rename = "name")]`
///
@@ -95,14 +95,14 @@ use std::collections::HashMap;
/// Use the schema of the inner field directly. Is only allowed on structs
with only unskipped field.
///
///
-/// ### Variant attributes
+/// #### Variant attributes
///
/// - `#[serde(rename = "name")]`
///
/// Rename the variant to the given name.
///
///
-/// ### Field attributes
+/// #### Field attributes
///
/// - `#[serde(rename = "name")]`
///
@@ -145,7 +145,7 @@ use std::collections::HashMap;
///
/// Override the schema used for this field. See [Working with foreign
types](#working-with-foreign-types).
///
-/// ### Incompatible Serde attributes
+/// #### Incompatible Serde attributes
///
/// The derive macro is compatible with most Serde attributes, but it is
incompatible with
/// the following attributes:
@@ -167,83 +167,147 @@ use std::collections::HashMap;
/// ## Working with foreign types
///
/// Most foreign types won't have a [`AvroSchema`] implementation. This crate
implements it only
-/// for built-in types, [`serde_json::Map`] and [`uuid::Uuid`]. Notable
exceptions are [`char`] and
-/// [`u64`] types, as there is no equivalent for char in Avro and the largest
integer type in Avro
-/// is `long` (equal to an [`i64`]).
+/// for built-in types and [`uuid::Uuid`].
///
/// To still be able to derive schemas for fields of foreign types, the
`#[avro(with)`]
/// attribute can be used to get the schema for those fields. It can be used
in two ways:
///
/// 1. In combination with `#[serde(with = "path::to::module)]`
///
-/// To get the schema, it will call the function `fn
get_schema_in_ctxt(&mut Names, &Namespace) -> Schema`
-/// in the module provided to the Serde attribute.
+/// To get the schema, it will call the functions `fn
get_schema_in_ctxt(&mut Names, &Namespace) -> Schema`
+/// and `fn get_record_fields_in_ctxt(&mut Names, &Namespace) -> Schema` in
the module provided
+/// to the Serde attribute.
///
/// 2. By providing a function directly, `#[avro(with = some_fn)]`.
///
/// To get the schema, it will call the function provided. It must have the
signature
-/// `fn(&mut Names, &Namespace) -> Schema`
+/// `fn(&mut Names, &Namespace) -> Schema`. When this is used for a
`transparent` struct, the
+/// default implementation of
[`AvroSchemaComponent::get_record_fields_in_ctxt`] will be used
+/// which is implemented with a lot of backtracking and cloning.
///
pub trait AvroSchema {
+ /// Construct the full schema that represents this type.
+ ///
+ /// The returned schema is fully independent and contains only
`Schema::Ref` to named types defined
+ /// earlier in the schema.
fn get_schema() -> Schema;
}
-/// Trait for types that serve as fully defined components inside an Avro data
model. Derive
-/// implementation available through `derive` feature. This is what is
implemented by
-/// the `derive(AvroSchema)` macro.
+/// Trait for types that serve as fully defined components inside an Avro data
model.
///
-/// TODO: Explain that field ordering is important
+/// This trait can be derived with [`#[derive(AvroSchema)]`](AvroSchema) when
the `derive` feature is enabled.
///
/// # Implementation guide
///
-/// ### Simple implementation
-/// To construct a non named simple schema, it is possible to ignore the input
argument making the
-/// general form implementation look like
-/// ```ignore
-/// impl AvroSchemaComponent for AType {
+/// ### Implementation for returning primitive types
+/// When the schema you want to return is a primitive type (a type without a
name), the function
+/// arguments can be ignored.
+///
+/// For example, you have a custom integer type:
+/// ```
+/// # use apache_avro::{Schema, serde::{AvroSchemaComponent}, schema::{Names,
Namespace, RecordField}};
+/// // Make sure to implement `Serialize` and `Deserialize` to use the right
serialization methods
+/// pub struct U24([u8; 3]);
+/// impl AvroSchemaComponent for U24 {
/// fn get_schema_in_ctxt(_: &mut Names, _: &Namespace) -> Schema {
-/// Schema::?
-/// }
+/// Schema::Int
+/// }
+///
+/// fn get_record_fields_in_ctxt(_: &mut Names, _: &Namespace) ->
Option<Vec<RecordField>> {
+/// None // A Schema::Int is not a Schema::Record so there are no
fields to return
+/// }
///}
/// ```
///
/// ### Passthrough implementation
///
-/// To construct a schema for a Type that acts as in "inner" type, such as for
smart pointers, simply
-/// pass through the arguments to the inner type
-/// ```ignore
-/// impl AvroSchemaComponent for PassthroughType {
+/// To construct a schema for a type is "transparent", such as for smart
pointers, simply
+/// pass through the arguments to the inner type:
+/// ```
+/// # use apache_avro::{Schema, serde::{AvroSchemaComponent}, schema::{Names,
Namespace, RecordField}};
+/// # use serde::{Serialize, Deserialize};
+/// #[derive(Serialize, Deserialize)]
+/// #[serde(transparent)] // This attribute is important for all passthrough
implementations!
+/// pub struct Transparent<T>(T);
+/// impl<T: AvroSchemaComponent> AvroSchemaComponent for Transparent<T> {
/// fn get_schema_in_ctxt(named_schemas: &mut Names, enclosing_namespace:
&Namespace) -> Schema {
-/// InnerType::get_schema_in_ctxt(named_schemas, enclosing_namespace)
-/// }
+/// T::get_schema_in_ctxt(named_schemas, enclosing_namespace)
+/// }
+///
+/// fn get_record_fields_in_ctxt(named_schemas: &mut Names,
enclosing_namespace: &Namespace) -> Option<Vec<RecordField>> {
+/// T::get_record_fields_in_ctxt(named_schemas, enclosing_namespace)
+/// }
///}
/// ```
///
-/// ### Complex implementation
+/// ### Implementation for complex types
+/// When the schema you want to return is a complex type (a type with a name),
special care has to
+/// be taken to avoid duplicate type definitions and getting the correct
namespace.
///
-/// To implement this for Named schema there is a general form needed to avoid
creating invalid
-/// schemas or infinite loops.
-/// ```ignore
-/// impl AvroSchemaComponent for ComplexType {
+/// Things to keep in mind:
+/// - If the fully qualified name already exists, return a [`Schema::Ref`]
+/// - Use the `AvroSchemaComponent` implementations to get the schemas for
the subtypes
+/// - The ordering of fields in the schema **must** match with the ordering
in Serde
+/// - Implement `get_record_fields_in_ctxt` as the default implementation has
to be implemented
+/// with backtracking and a lot of cloning.
+/// - Even if your schema is not a record, still implement the function
and just return `None`
+///
+/// ```rust
+/// # use apache_avro::{Schema, serde::{AvroSchemaComponent}, schema::{Name,
Names, Namespace, RecordField, RecordSchema}};
+/// # use serde::{Serialize, Deserialize};
+/// # use std::time::Duration;
+/// pub struct Foo {
+/// one: String,
+/// two: i32,
+/// three: Option<Duration>
+/// }
+///
+/// impl AvroSchemaComponent for Foo {
/// fn get_schema_in_ctxt(named_schemas: &mut Names, enclosing_namespace:
&Namespace) -> Schema {
/// // Create the fully qualified name for your type given the
enclosing namespace
-/// let name = apache_avro::schema::Name::new("MyName")
-/// .expect("Unable to parse schema name")
-/// .fully_qualified_name(enclosing_namespace);
-/// let enclosing_namespace = &name.namespace;
-/// // Check, if your name is already defined, and if so, return a ref
to that name
+/// let name =
Name::new("Foo").unwrap().fully_qualified_name(enclosing_namespace);
/// if named_schemas.contains_key(&name) {
-/// apache_avro::schema::Schema::Ref{name: name.clone()}
+/// Schema::Ref { name }
/// } else {
-/// named_schemas.insert(name.clone(),
apache_avro::schema::Schema::Ref{name: name.clone()});
-/// // YOUR SCHEMA DEFINITION HERE with the name equivalent to
"MyName".
-/// // For non-simple sub types delegate to their implementation
of AvroSchemaComponent
+/// let enclosing_namespace = &name.namespace;
+/// // This is needed because otherwise recursive types will
recurse forever and cause a stack overflow
+/// named_schemas.insert(name.clone(), Schema::Ref { name:
name.clone() });
+/// let schema = Schema::Record(RecordSchema::builder()
+/// .name(name.clone())
+/// .fields(Self::get_record_fields_in_ctxt(named_schemas,
enclosing_namespace).expect("Impossible!"))
+/// .build()
+/// );
+/// named_schemas.insert(name, schema.clone());
+/// schema
/// }
-/// }
+/// }
+///
+/// fn get_record_fields_in_ctxt(named_schemas: &mut Names,
enclosing_namespace: &Namespace) -> Option<Vec<RecordField>> {
+/// Some(vec![
+/// RecordField::builder()
+/// .name("one")
+/// .schema(String::get_schema_in_ctxt(named_schemas,
enclosing_namespace))
+/// .build(),
+/// RecordField::builder()
+/// .name("two")
+/// .schema(i32::get_schema_in_ctxt(named_schemas,
enclosing_namespace))
+/// .build(),
+/// RecordField::builder()
+/// .name("three")
+///
.schema(<Option<Duration>>::get_schema_in_ctxt(named_schemas,
enclosing_namespace))
+/// .build(),
+/// ])
+/// }
///}
/// ```
pub trait AvroSchemaComponent {
fn get_schema_in_ctxt(named_schemas: &mut Names, enclosing_namespace:
&Namespace) -> Schema;
+ fn get_record_fields_in_ctxt(
+ named_schemas: &mut Names,
+ enclosing_namespace: &Namespace,
+ ) -> Option<Vec<RecordField>> {
+ None
+ }
}
impl<T> AvroSchema for T
diff --git a/avro/src/serde/mod.rs b/avro/src/serde/mod.rs
index 5129e6c..840351a 100644
--- a/avro/src/serde/mod.rs
+++ b/avro/src/serde/mod.rs
@@ -41,7 +41,6 @@
//! # use std::io::Cursor;
//! # use serde::{Serialize, Deserialize};
//! # use apache_avro::{AvroSchema, Error, Reader, Writer, serde::{from_value,
to_value}};
-//!
//! #[derive(AvroSchema, Serialize, Deserialize, PartialEq, Debug)]
//! struct Foo {
//! a: i64,
@@ -57,14 +56,12 @@
//! b: "Hello".to_string(),
//! };
//!
-//! // There are two ways to serialize data.
-//! // 1: Serialize directly to the writer:
+//! // Serialize as many items as you want.
//! writer.append_ser(&foo)?;
-//! // 2: First serialize to an Avro `Value` then write that:
-//! let foo_value = to_value(&foo)?;
-//! writer.append(foo_value)?;
//!
-//! // Always flush or consume the writer
+//! // Always flush
+//! writer.flush();
+//! // Or consume the writer
//! let data = writer.into_inner()?;
//!
//! // The reader does not need a schema as it's included in the data
diff --git a/avro/src/serde/ser_schema.rs b/avro/src/serde/ser_schema.rs
index 3ca05e1..084e1cd 100644
--- a/avro/src/serde/ser_schema.rs
+++ b/avro/src/serde/ser_schema.rs
@@ -15,8 +15,7 @@
// specific language governing permissions and limitations
// under the License.
-//! Logic for serde-compatible schema-aware serialization
-//! which writes directly to a `Write` stream
+//! Logic for serde-compatible schema-aware serialization which writes
directly to a writer.
use crate::schema::{DecimalSchema, InnerDecimalSchema, UuidSchema};
use crate::{
@@ -35,11 +34,11 @@ const COLLECTION_SERIALIZER_DEFAULT_INIT_ITEM_CAPACITY:
usize = 32;
const SINGLE_VALUE_INIT_BUFFER_SIZE: usize = 128;
/// The sequence serializer for [`SchemaAwareWriteSerializer`].
-/// [`SchemaAwareWriteSerializeSeq`] may break large arrays up into multiple
blocks to avoid having
+///
+/// This may break large arrays up into multiple blocks to avoid having
/// to obtain the length of the entire array before being able to write any
data to the underlying
-/// [`std::fmt::Write`] stream. (See the
+/// writer (see the [Data Serialization and Deserialization] for more info).
/// [Data Serialization and
Deserialization](https://avro.apache.org/docs/1.12.0/specification/#data-serialization-and-deserialization)
-/// section of the Avro spec for more info.)
pub struct SchemaAwareWriteSerializeSeq<'a, 's, W: Write> {
ser: &'a mut SchemaAwareWriteSerializer<'s, W>,
item_schema: &'s Schema,
@@ -143,11 +142,11 @@ impl<W: Write> ser::SerializeTuple for
SchemaAwareWriteSerializeSeq<'_, '_, W> {
}
/// The map serializer for [`SchemaAwareWriteSerializer`].
-/// [`SchemaAwareWriteSerializeMap`] may break large maps up into multiple
blocks to avoid having to
-/// obtain the size of the entire map before being able to write any data to
the underlying
-/// [`std::fmt::Write`] stream. (See the
+///
+/// This may break large maps up into multiple blocks to avoid having to
obtain the size of the entire
+/// map before being able to write any data to the underlying writer
+/// (see [Data Serialization and Deserialization] for more info)
/// [Data Serialization and
Deserialization](https://avro.apache.org/docs/1.12.0/specification/#data-serialization-and-deserialization)
-/// section of the Avro spec for more info.)
pub struct SchemaAwareWriteSerializeMap<'a, 's, W: Write> {
ser: &'a mut SchemaAwareWriteSerializer<'s, W>,
item_schema: &'s Schema,
@@ -245,9 +244,9 @@ impl<W: Write> ser::SerializeMap for
SchemaAwareWriteSerializeMap<'_, '_, W> {
}
/// The struct serializer for [`SchemaAwareWriteSerializer`], which can
serialize Avro records.
-/// [`SchemaAwareWriteSerializeStruct`] can accept fields out of order, but
doing so incurs a
-/// performance penalty, since it requires [`SchemaAwareWriteSerializeStruct`]
to buffer serialized
-/// values in order to write them to the stream in order.
+///
+/// This can accept fields out of order, but doing so incurs a performance
penalty, since it requires
+/// buffering serialized values in order to write them to the stream in order.
pub struct SchemaAwareWriteSerializeStruct<'a, 's, W: Write> {
ser: &'a mut SchemaAwareWriteSerializer<'s, W>,
record_schema: &'s RecordSchema,
@@ -427,7 +426,10 @@ impl<W: Write> ser::SerializeStruct for
SchemaAwareWriteSerializeStruct<'_, '_,
}
}
-/// This implementation is used to support `#[serde(flatten)]` as that uses
SerializeMap instead of SerializeStruct.
+/// This implementation is used to support `#[serde(flatten)]` as that uses
[`SerializeMap`] instead of [`SerializeStruct`].
+///
+/// [`SerializeMap`](ser::SerializeMap)
+/// [`SerializeStruct`](ser::SerializeStruct)
impl<W: Write> ser::SerializeMap for SchemaAwareWriteSerializeStruct<'_, '_,
W> {
type Ok = usize;
type Error = Error;
@@ -531,7 +533,8 @@ impl<W: Write> ser::SerializeMap for
SchemaAwareWriteSerializeMapOrStruct<'_, '_
}
/// The tuple struct serializer for [`SchemaAwareWriteSerializer`].
-/// [`SchemaAwareWriteSerializeTupleStruct`] can serialize to an Avro array,
record, or big-decimal.
+///
+/// This can serialize to an Avro array, record, or big-decimal.
/// When serializing to a record, fields must be provided in the correct
order, since no names are provided.
pub enum SchemaAwareWriteSerializeTupleStruct<'a, 's, W: Write> {
Record(SchemaAwareWriteSerializeStruct<'a, 's, W>),
@@ -593,11 +596,11 @@ impl<W: Write> ser::SerializeTupleVariant for
SchemaAwareWriteSerializeTupleStru
}
}
-/// A [`serde::ser::Serializer`] implementation that serializes directly to a
[`std::fmt::Write`]
-/// using the provided schema. If [`SchemaAwareWriteSerializer`] isn't able
to match the incoming
-/// data with its schema, it will return an error.
-/// A [`SchemaAwareWriteSerializer`] instance can be re-used to serialize
multiple values matching
-/// the schema to its [`std::fmt::Write`] stream.
+/// A [`Serializer`](ser::Serializer) implementation that serializes directly
to raw Avro data.
+///
+/// If data does not match with the schema it will return an error.
+///
+/// This does not keep state and can therefore be reused to write to the same
writer.
pub struct SchemaAwareWriteSerializer<'s, W: Write> {
writer: &'s mut W,
root_schema: &'s Schema,
diff --git a/avro/src/types.rs b/avro/src/types.rs
index 0060987..e10e4ed 100644
--- a/avro/src/types.rs
+++ b/avro/src/types.rs
@@ -240,11 +240,10 @@ impl Record<'_> {
}
}
- /// Put a compatible value (implementing the `ToAvro` trait) in the
- /// `Record` for a given `field` name.
+ /// Add a field to the `Record`.
///
- /// **NOTE** Only ensure that the field name is present in the `Schema`
given when creating
- /// this `Record`. Does not perform any schema validation.
+ // TODO: This should return an error at least panic
+ /// **NOTE**: If the field name does not exist in the schema, the value is
silently dropped.
pub fn put<V>(&mut self, field: &str, value: V)
where
V: Into<Value>,
@@ -255,6 +254,7 @@ impl Record<'_> {
}
/// Get the value for a given field name.
+ ///
/// Returns `None` if the field is not present in the schema
pub fn get(&self, field: &str) -> Option<&Value> {
self.schema_lookup
diff --git a/avro/src/util.rs b/avro/src/util.rs
index 748e923..8acbfa4 100644
--- a/avro/src/util.rs
+++ b/avro/src/util.rs
@@ -24,20 +24,21 @@ use std::{
sync::OnceLock,
};
-/// Maximum number of bytes that can be allocated when decoding
-/// Avro-encoded values. This is a protection against ill-formed
-/// data, whose length field might be interpreted as enormous.
-/// See max_allocation_bytes to change this limit.
+/// Maximum number of bytes that can be allocated when decoding Avro-encoded
values.
+///
+/// This is a protection against ill-formed data, whose length field might be
interpreted as enormous.
+///
+/// See [`max_allocation_bytes`] to change this limit.
pub const DEFAULT_MAX_ALLOCATION_BYTES: usize = 512 * 1024 * 1024;
static MAX_ALLOCATION_BYTES: OnceLock<usize> = OnceLock::new();
-/// Whether to set serialization & deserialization traits
-/// as `human_readable` or not.
-/// See [set_serde_human_readable] to change this value.
+/// Whether to set serialization & deserialization traits as `human_readable`
or not.
+///
+/// See [`set_serde_human_readable`] to change this value.
+pub const DEFAULT_SERDE_HUMAN_READABLE: bool = false;
+/// Whether the serializer and deserializer should indicate to types that the
format is human-readable.
// crate-visible for testing
pub(crate) static SERDE_HUMAN_READABLE: OnceLock<bool> = OnceLock::new();
-/// Whether the serializer and deserializer should indicate to types that the
format is human-readable.
-pub const DEFAULT_SERDE_HUMAN_READABLE: bool = false;
pub(crate) trait MapHelper {
fn string(&self, key: &str) -> Option<String>;
diff --git a/avro/src/validator.rs b/avro/src/validator.rs
index 971870c..e8406a2 100644
--- a/avro/src/validator.rs
+++ b/avro/src/validator.rs
@@ -24,10 +24,12 @@ use std::sync::OnceLock;
struct SpecificationValidator;
/// A trait that validates schema names.
-/// To register a custom one use [set_schema_name_validator].
+///
+/// To register a custom one use [`set_schema_name_validator`].
pub trait SchemaNameValidator: Send + Sync {
- /// Returns the regex used to validate the schema name
- /// according to the Avro specification.
+ /// The regex used to validate the schema name.
+ ///
+ /// The default implementation uses the Avro specified regex.
fn regex(&self) -> &'static Regex {
static SCHEMA_NAME_ONCE: OnceLock<Regex> = OnceLock::new();
SCHEMA_NAME_ONCE.get_or_init(|| {
@@ -39,8 +41,9 @@ pub trait SchemaNameValidator: Send + Sync {
})
}
- /// Validates the schema name and returns the name and the optional
namespace,
- /// or [Details::InvalidSchemaName] if it is invalid.
+ /// Validates the schema name and returns the name and the optional
namespace.
+ ///
+ /// Should return [`Details::InvalidSchemaName`] if it is invalid.
fn validate(&self, schema_name: &str) -> AvroResult<(String, Namespace)>;
}
@@ -61,8 +64,7 @@ static NAME_VALIDATOR_ONCE: OnceLock<Box<dyn
SchemaNameValidator + Send + Sync>>
/// Sets a custom schema name validator.
///
-/// Returns a unit if the registration was successful or the already
-/// registered validator if the registration failed.
+/// Returns `Err(validator)` if a validator is already configured.
///
/// **Note**: This function must be called before parsing any schema because
this will
/// register the default validator and the registration is one time only!
@@ -83,10 +85,12 @@ pub(crate) fn validate_schema_name(schema_name: &str) ->
AvroResult<(String, Nam
}
/// A trait that validates schema namespaces.
-/// To register a custom one use [set_schema_namespace_validator].
+///
+/// To register a custom one use [`set_schema_namespace_validator`].
pub trait SchemaNamespaceValidator: Send + Sync {
- /// Returns the regex used to validate the schema namespace
- /// according to the Avro specification.
+ /// The regex used to validate the schema namespace.
+ ///
+ /// The default implementation uses the Avro specified regex.
fn regex(&self) -> &'static Regex {
static NAMESPACE_ONCE: OnceLock<Regex> = OnceLock::new();
NAMESPACE_ONCE.get_or_init(|| {
@@ -94,7 +98,9 @@ pub trait SchemaNamespaceValidator: Send + Sync {
})
}
- /// Validates the schema namespace or [Details::InvalidNamespace] if it is
invalid.
+ /// Validates a schema namespace.
+ ///
+ /// Should return [`Details::InvalidNamespace`] if it is invalid.
fn validate(&self, namespace: &str) -> AvroResult<()>;
}
@@ -114,8 +120,7 @@ static NAMESPACE_VALIDATOR_ONCE: OnceLock<Box<dyn
SchemaNamespaceValidator + Sen
/// Sets a custom schema namespace validator.
///
-/// Returns a unit if the registration was successful or the already
-/// registered validator if the registration failed.
+/// Returns `Err(validator)` if a validator is already configured.
///
/// **Note**: This function must be called before parsing any schema because
this will
/// register the default validator and the registration is one time only!
@@ -135,17 +140,20 @@ pub(crate) fn validate_namespace(ns: &str) ->
AvroResult<()> {
}
/// A trait that validates enum symbol names.
-/// To register a custom one use [set_enum_symbol_name_validator].
+///
+/// To register a custom one use [`set_enum_symbol_name_validator`].
pub trait EnumSymbolNameValidator: Send + Sync {
- /// Returns the regex used to validate the symbols of enum schema
- /// according to the Avro specification.
+ /// The regex used to validate the symbols of enums.
+ ///
+ /// The default implementation uses the Avro specified regex.
fn regex(&self) -> &'static Regex {
static ENUM_SYMBOL_NAME_ONCE: OnceLock<Regex> = OnceLock::new();
ENUM_SYMBOL_NAME_ONCE.get_or_init(||
Regex::new(r"^[A-Za-z_][A-Za-z0-9_]*$").unwrap())
}
- /// Validates the symbols of an Enum schema name and returns nothing
(unit),
- /// or [Details::EnumSymbolName] if it is invalid.
+ /// Validate the symbol of an enum.
+ ///
+ /// Should return [`Details::EnumSymbolName`] if it is invalid.
fn validate(&self, name: &str) -> AvroResult<()>;
}
@@ -165,8 +173,7 @@ static ENUM_SYMBOL_NAME_VALIDATOR_ONCE: OnceLock<Box<dyn
EnumSymbolNameValidator
/// Sets a custom enum symbol name validator.
///
-/// Returns a unit if the registration was successful or the already
-/// registered validator if the registration failed.
+/// Returns `Err(validator)` if a validator is already configured.
///
/// **Note**: This function must be called before parsing any schema because
this will
/// register the default validator and the registration is one time only!
@@ -186,17 +193,20 @@ pub(crate) fn validate_enum_symbol_name(symbol: &str) ->
AvroResult<()> {
}
/// A trait that validates record field names.
-/// To register a custom one use [set_record_field_name_validator].
+///
+/// To register a custom one use [`set_record_field_name_validator`].
pub trait RecordFieldNameValidator: Send + Sync {
- /// Returns the regex used to validate the record field names
- /// according to the Avro specification.
+ /// The regex used to validate the record field names.
+ ///
+ /// The default implementation uses the Avro specified regex.
fn regex(&self) -> &'static Regex {
static FIELD_NAME_ONCE: OnceLock<Regex> = OnceLock::new();
FIELD_NAME_ONCE.get_or_init(||
Regex::new(r"^[A-Za-z_][A-Za-z0-9_]*$").unwrap())
}
- /// Validates the record field's names and returns nothing (unit),
- /// or [Details::FieldName] if it is invalid.
+ /// Validate the name of a record field.
+ ///
+ /// Should return [`Details::FieldName`] if it is invalid.
fn validate(&self, name: &str) -> AvroResult<()>;
}
@@ -216,8 +226,7 @@ static RECORD_FIELD_NAME_VALIDATOR_ONCE: OnceLock<Box<dyn
RecordFieldNameValidat
/// Sets a custom record field name validator.
///
-/// Returns a unit if the registration was successful or the already
-/// registered validator if the registration failed.
+/// Returns `Err(validator)` if a validator is already configured.
///
/// **Note**: This function must be called before parsing any schema because
this will
/// register the default validator and the registration is one time only!
diff --git a/avro/src/writer.rs b/avro/src/writer.rs
index 4ec219f..e28639f 100644
--- a/avro/src/writer.rs
+++ b/avro/src/writer.rs
@@ -172,8 +172,7 @@ impl<'a, W: Write> Writer<'a, W> {
self.schema
}
- /// Append a compatible value (implementing the `ToAvro` trait) to a
`Writer`, also performing
- /// schema validation.
+ /// Append a value to the `Writer`, also performs schema validation.
///
/// Returns the number of bytes written (it might be 0, see below).
///
@@ -187,7 +186,7 @@ impl<'a, W: Write> Writer<'a, W> {
self.append_value_ref(&avro).map(|m| m + n)
}
- /// Append a compatible value to a `Writer`, also performing schema
validation.
+ /// Append a compatible value to a `Writer`, also performs schema
validation.
///
/// Returns the number of bytes written (it might be 0, see below).
///
@@ -235,8 +234,7 @@ impl<'a, W: Write> Writer<'a, W> {
Ok(n)
}
- /// Extend a `Writer` with an `Iterator` of compatible values
(implementing the `ToAvro`
- /// trait), also performing schema validation.
+ /// Extend a `Writer` with an `Iterator` of values, also performs schema
validation.
///
/// Returns the number of bytes written.
///
@@ -499,8 +497,7 @@ impl<W: Write> Drop for Writer<'_, W> {
}
}
-/// Encode a compatible value (implementing the `ToAvro` trait) into Avro
format, also performing
-/// schema validation.
+/// Encode a value into raw Avro data, also performs schema validation.
///
/// This is an internal function which gets the bytes buffer where to write as
parameter instead of
/// creating a new one like `to_avro_datum`.
@@ -706,8 +703,7 @@ fn write_value_ref_owned_resolved(
Ok(())
}
-/// Encode a compatible value (implementing the `ToAvro` trait) into Avro
format, also
-/// performing schema validation.
+/// Encode a value into raw Avro data, also performs schema validation.
///
/// **NOTE**: This function has a quite small niche of usage and does NOT
generate headers and sync
/// markers; use [`Writer`] to be fully Avro-compatible if you don't know what
@@ -719,6 +715,7 @@ pub fn to_avro_datum<T: Into<Value>>(schema: &Schema,
value: T) -> AvroResult<Ve
}
/// Write the referenced [Serialize]able object to the provided [Write] object.
+///
/// Returns a result with the number of bytes written.
///
/// **NOTE**: This function has a quite small niche of usage and does **NOT**
generate headers and sync
@@ -735,8 +732,8 @@ pub fn write_avro_datum_ref<T: Serialize, W: Write>(
Ok(bytes_written)
}
-/// Encode a compatible value (implementing the `ToAvro` trait) into Avro
format, also
-/// performing schema validation.
+/// Encode a value into raw Avro data, also performs schema validation.
+///
/// If the provided `schema` is incomplete then its dependencies must be
/// provided in `schemata`
pub fn to_avro_datum_schemata<T: Into<Value>>(
diff --git a/avro_derive/build.rs b/avro_derive/build.rs
index 54691a8..40e33af 100644
--- a/avro_derive/build.rs
+++ b/avro_derive/build.rs
@@ -18,7 +18,7 @@
//! Set the `nightly` cfg value on nightly toolchains.
//!
//! We would prefer to just do `#![rustversion::attr(nightly,
feature(proc_macro_diagnostic)]`
-//! but that's currently not possible, see
https://github.com/dtolnay/rustversion/issues/8
+//! but that's currently not possible, see
<https://github.com/dtolnay/rustversion/issues/8>
#[rustversion::nightly]
fn main() {
diff --git a/avro_derive/src/case.rs b/avro_derive/src/case.rs
index b5d5c38..c958562 100644
--- a/avro_derive/src/case.rs
+++ b/avro_derive/src/case.rs
@@ -30,24 +30,24 @@ pub enum RenameRule {
/// Don't apply a default rename rule.
#[default]
None,
- /// Rename direct children to "lowercase" style.
+ /// Rename direct children to `lowercase` style.
LowerCase,
- /// Rename direct children to "UPPERCASE" style.
+ /// Rename direct children to `UPPERCASE` style.
UpperCase,
- /// Rename direct children to "PascalCase" style, as typically used for
+ /// Rename direct children to `PascalCase` style, as typically used for
/// enum variants.
PascalCase,
- /// Rename direct children to "camelCase" style.
+ /// Rename direct children to `camelCase` style.
CamelCase,
- /// Rename direct children to "snake_case" style, as commonly used for
+ /// Rename direct children to `snake_case` style, as commonly used for
/// fields.
SnakeCase,
- /// Rename direct children to "SCREAMING_SNAKE_CASE" style, as commonly
+ /// Rename direct children to `SCREAMING_SNAKE_CASE` style, as commonly
/// used for constants.
ScreamingSnakeCase,
- /// Rename direct children to "kebab-case" style.
+ /// Rename direct children to `kebab-case` style.
KebabCase,
- /// Rename direct children to "SCREAMING-KEBAB-CASE" style.
+ /// Rename direct children to `SCREAMING-KEBAB-CASE` style.
ScreamingKebabCase,
}
diff --git a/avro_test_helper/src/lib.rs b/avro_test_helper/src/lib.rs
index 6366a8e..9b5248d 100644
--- a/avro_test_helper/src/lib.rs
+++ b/avro_test_helper/src/lib.rs
@@ -53,7 +53,8 @@ fn after_all() {
#[derive(Debug)]
pub struct TestError;
-/// A converter of any error into [TestError].
+/// A converter of any error into [`TestError`].
+///
/// It is used to print better error messages in the tests.
/// Borrowed from
<https://bluxte.net/musings/2023/01/08/improving_failure_messages_rust_tests/>
// The Display bound is needed so that the `From` implementation doesn't