This is an automated email from the ASF dual-hosted git repository. kriskras99 pushed a commit to branch feat/documentation in repository https://gitbox.apache.org/repos/asf/avro-rs.git
commit ec18d63f4938d47d479a6f043edad9728b0416e1 Author: Kriskras99 <[email protected]> AuthorDate: Thu Jan 29 22:12:39 2026 +0100 docs: Continue working on moving documentation away from lib.rs and enable and fix some Clippy lints for documentation --- Cargo.toml | 5 ++ avro/src/documentation/mod.rs | 2 +- avro/src/documentation/primer.rs | 61 ++++++++++++++--- avro/src/duration.rs | 4 +- avro/src/error.rs | 4 +- avro/src/headers.rs | 25 ++++--- avro/src/lib.rs | 86 +++-------------------- avro/src/rabin.rs | 12 ++-- avro/src/reader.rs | 12 ++-- avro/src/schema/mod.rs | 46 ++++++++----- avro/src/schema/name.rs | 15 ++-- avro/src/schema/parser.rs | 42 +++++------- avro/src/schema/union.rs | 2 +- avro/src/schema_compatibility.rs | 12 ++-- avro/src/schema_equality.rs | 18 +++-- avro/src/serde/derive.rs | 144 +++++++++++++++++++++++++++------------ avro/src/serde/mod.rs | 11 ++- avro/src/serde/ser_schema.rs | 41 +++++------ avro/src/types.rs | 8 +-- avro/src/util.rs | 19 +++--- avro/src/validator.rs | 63 +++++++++-------- avro/src/writer.rs | 18 +++-- avro_derive/build.rs | 2 +- avro_derive/src/case.rs | 16 ++--- avro_test_helper/src/lib.rs | 3 +- 25 files changed, 359 insertions(+), 312 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index ed46b50..184db25 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -54,4 +54,9 @@ opt-level = "s" [workspace.lints] # By setting the priority to -1, this lint group can be overridden for specific lints clippy.all = { level = "warn", priority = -1 } +clippy.too_long_first_doc_paragraph = "warn" +clippy.doc_markdown = "warn" +# TODO: Needs more work +#clippy.missing_errors_doc = "warn" +#clippy.missing_panics_doc = "warn" rust.unexpected_cfgs = { level = "warn", check-cfg = ['cfg(nightly)'] } diff --git a/avro/src/documentation/mod.rs b/avro/src/documentation/mod.rs index a7ff9b1..b138cf5 100644 --- a/avro/src/documentation/mod.rs +++ b/avro/src/documentation/mod.rs @@ -2,4 +2,4 @@ //! //! This module does not contain any code, and is only available during `rustdoc` builds. -pub mod primer; \ No newline at end of file +pub mod primer; diff --git a/avro/src/documentation/primer.rs b/avro/src/documentation/primer.rs index 918ede5..392df76 100644 --- a/avro/src/documentation/primer.rs +++ b/avro/src/documentation/primer.rs @@ -5,7 +5,7 @@ //! efficient. //! //! ## Schemas -//! +//! //! Schemas are defined in JSON and look like this: //! ```json //! { @@ -18,23 +18,62 @@ //! } //! ``` //! For all possible types and extra attributes, see [the schema section of the specification]. -//! +//! +//! Schemas can depend on each other. For example, the schema defined above can be used again or a +//! schema can include itself: +//! ```json +//! { +//! "type": "record", +//! "name": "references", +//! "fields": [ +//! {"name": "a", "type": "example"}, +//! {"name": "b", "type": "bytes"}, +//! {"name": "recursive", "type": ["null", "references"]} +//! ] +//! } +//! ``` +//! +//! Schemas are represented using the [`Schema`](crate::Schema) type. +//! //! [the schema section of the specification]: https://avro.apache.org/docs/++version++/specification/#schema-declaration -//! +//! //! ## File formats //! There are three official file formats for Avro. The data in these file formats is all encoded the same, but they differ //! in how the schema is included. -//! -//! ### [Object Container File](https://avro.apache.org/docs/++version++/specification/#object-container-files) +//! +//! #### [Object Container File](https://avro.apache.org/docs/++version++/specification/#object-container-files) //! This is the most common file format used for Avro. It includes the schema in the file, and can therefore be decoded by //! a reader who doesn't have the schema. It also supports including many records in one file. -//! -//! ### [Single Object Encoding](https://avro.apache.org/docs/++version++/specification/#single-object-encoding) +//! +//! This file format can be used via the [`Reader`](crate::Reader) and [`Writer`](crate::Writer) types. +//! +//! #### [Single Object Encoding](https://avro.apache.org/docs/++version++/specification/#single-object-encoding) //! In this file format, the schema is not included directly. It instead includes a fingerprint of the schema, which a reader -//! can lookup in a schema database or compare with the fingerprint that the reader is expecting. This file format always contains +//! can look up in a schema database or compare with the fingerprint that the reader is expecting. This file format always contains //! one record. -//! -//! ### Avro datums +//! +//! This file format can be used via the [`GenericSingleObjectReader`](crate::GenericSingleObjectReader), +//! [`GenericSingleObjectWriter`](crate::GenericSingleObjectReader), [`SpecificSingleObjectReader`](crate::SpecificSingleObjectReader), +//! and [`SpecificSingleObjectWriter`](crate::SpecificSingleObjectWriter) types. +//! +//! #### Avro datums //! This is not really a file format, as it's just the raw Avro encoded data. It does not include a schema and can therefore not be //! decoded without the reader knowing **exactly** which schema was used to write it. -//! +//! +//! This file format can be used via the [`to_avro_datum`](crate::to_avro_datum), [`from_avro_datum`](crate::from_avro_datum), +//! [`to_avro_datum_schemata`](crate::to_avro_datum_schemata), [`from_avro_datum_schemata`](crate::from_avro_datum_schemata), +//! [`from_avro_datum_reader_schemata`](crate::from_avro_datum_reader_schemata), and +//! [`write_avro_datum_ref`](crate::write_avro_datum_ref) functions. +//! +//! ## Compression +//! For records with low entropy it can be useful to compress the encoded data. Using the [#Object Container File] +//! this is directly possible in Avro. Avro supports various compression codecs: +//! +//! - deflate +//! - bzip2 +//! - Snappy +//! - XZ +//! - Zstandard +//! +//! All readers are required to implement the `deflate` codec, but most implementation implement most +//! codecs. diff --git a/avro/src/duration.rs b/avro/src/duration.rs index cf24bc0..eecfca1 100644 --- a/avro/src/duration.rs +++ b/avro/src/duration.rs @@ -15,10 +15,10 @@ // specific language governing permissions and limitations // under the License. -/// A struct representing duration that hides the details of endianness and conversion between -/// platform-native u32 and byte arrays. use serde::{Deserialize, Serialize, de}; +/// A struct representing duration that hides the details of endianness and conversion between +/// platform-native u32 and byte arrays. #[derive(Debug, Copy, Clone, Eq, PartialEq)] pub struct Duration { months: Months, diff --git a/avro/src/error.rs b/avro/src/error.rs index cbb00c0..4bd6a25 100644 --- a/avro/src/error.rs +++ b/avro/src/error.rs @@ -554,11 +554,11 @@ pub enum Details { #[error("Failed to convert JSON to string: {0}")] ConvertJsonToString(#[source] serde_json::Error), - /// Error while converting float to json value + /// Error while converting float to JSON value #[error("failed to convert avro float to json: {0}")] ConvertF64ToJson(f64), - /// Error while resolving Schema::Ref + /// Error while resolving [`Schema::Ref`] #[error("Unresolved schema reference: {0}")] SchemaResolutionError(Name), diff --git a/avro/src/headers.rs b/avro/src/headers.rs index dce134f..8a97f7b 100644 --- a/avro/src/headers.rs +++ b/avro/src/headers.rs @@ -20,17 +20,18 @@ use uuid::Uuid; use crate::{AvroResult, Schema, rabin::Rabin, schema::SchemaFingerprint}; -/// This trait represents that an object is able to construct an Avro message header. It is -/// implemented for some known header types already. If you need a header type that is not already -/// included here, then you can create your own struct and implement this trait. +/// This trait represents that an object is able to construct an Avro message header. +/// +/// It is implemented for some known header types already. If you need a header type that is not +/// already included here, then you can create your own struct and implement this trait. pub trait HeaderBuilder { fn build_header(&self) -> Vec<u8>; } -/// HeaderBuilder based on the Rabin schema fingerprint +/// [`HeaderBuilder`] based on the Rabin schema fingerprint. /// /// This is the default and will be used automatically by the `new` impls in -/// [crate::reader::GenericSingleObjectReader] and [crate::writer::GenericSingleObjectWriter]. +/// [`GenericSingleObjectReader`](crate::GenericSingleObjectReader) and [`GenericSingleObjectWriter`](crate::GenericSingleObjectWriter). pub struct RabinFingerprintHeader { fingerprint: SchemaFingerprint, } @@ -53,16 +54,15 @@ impl HeaderBuilder for RabinFingerprintHeader { } } -/// HeaderBuilder based on -/// [Glue](https://docs.aws.amazon.com/glue/latest/dg/what-is-glue.html) schema UUID +/// [`HeaderBuilder`] for [Glue](https://docs.aws.amazon.com/glue/latest/dg/what-is-glue.html). /// -/// See the function docs for usage details +/// See the function docs for usage details. pub struct GlueSchemaUuidHeader { schema_uuid: Uuid, } impl GlueSchemaUuidHeader { - /// Create an instance of the struct from a Glue Schema UUID + /// Create an instance of the struct from a Glue Schema UUID. /// /// Code for writing messages will most likely want to use this. You will need to determine /// via other means the correct Glue schema UUID and use it with this method to be able to @@ -72,12 +72,11 @@ impl GlueSchemaUuidHeader { } /// The minimum length of a Glue header. - /// 2 bytes for the special prefix (3, 0) plus - /// 16 bytes for the Uuid + /// + /// 2 bytes for the special prefix (3, 0) plus 16 bytes for the Uuid. const GLUE_HEADER_LENGTH: usize = 18; - /// Create an instance of the struct based on parsing the UUID out of the header of a raw - /// message + /// Create an instance of the struct based on parsing the UUID out of the header of a raw message /// /// Code for reading messages will most likely want to use this. Once you receive the raw bytes /// of a message, use this function to build the struct from it. That struct can then be used diff --git a/avro/src/lib.rs b/avro/src/lib.rs index 35c8ee5..53eff27 100644 --- a/avro/src/lib.rs +++ b/avro/src/lib.rs @@ -32,86 +32,17 @@ //! Please check our [documentation](https://docs.rs/apache-avro) for examples, tutorials and API reference. //! //! **[Apache Avro](https://avro.apache.org/)** is a data serialization system which provides rich -//! data structures and a compact, fast, binary data format. +//! data structures and a compact, fast, binary data format. If you are not familiar with the data +//! format, please read [`documentation::primer`] first. //! -//! If you are not familiar with the data format, please read [`documentation::primer`] first. +//! There are two ways of working with Avro data in this crate: //! -//! All data in Avro is schematized, as in the following example: -//! -//! ```json -//! { -//! "type": "record", -//! "name": "test", -//! "fields": [ -//! {"name": "a", "type": "long", "default": 42}, -//! {"name": "b", "type": "string"} -//! ] -//! } -//! ``` -//! -//! There are basically two ways of handling Avro data in Rust: -//! -//! * **as Avro-specialized data types** based on an Avro schema; -//! * **as generic Rust serde-compatible types** implementing/deriving `Serialize` and `Deserialize`; +//! * Via the generic [`Value`](types::Value) type, which allows for dynamically dealing with data at runtime. +//! * Via types implementing [`Serialize`](::serde::Serialize), [`Deserialize`](::serde::Deserialize), and [`AvroSchema`]. //! //! **apache-avro** provides a way to read and write both these data representations easily and //! efficiently. //! -//! # Defining a schema -//! -//! Avro data cannot exist without an Avro schema. Schemas **must** be used while writing and -//! **can** be used while reading and they carry the information regarding the type of data we are -//! handling. Avro schemas are used for both schema validation and resolution of Avro data. -//! -//! Avro schemas are defined in JSON format and can just be parsed out of a raw string: -//! -//! ``` -//! use apache_avro::Schema; -//! -//! let raw_schema = r#" -//! { -//! "type": "record", -//! "name": "test", -//! "fields": [ -//! {"name": "a", "type": "long", "default": 42}, -//! {"name": "b", "type": "string"} -//! ] -//! } -//! "#; -//! -//! let schema = Schema::parse_str(raw_schema).unwrap(); -//! ``` -//! -//! Additionally, a list of definitions (which may depend on each other) can be given and all of -//! them will be parsed into the corresponding schemas. -//! -//! ``` -//! use apache_avro::Schema; -//! -//! let raw_schema_1 = r#"{ -//! "name": "A", -//! "type": "record", -//! "fields": [ -//! {"name": "field_one", "type": "float"} -//! ] -//! }"#; -//! -//! // This definition depends on the definition of A above -//! let raw_schema_2 = r#"{ -//! "name": "B", -//! "type": "record", -//! "fields": [ -//! {"name": "field_one", "type": "A"} -//! ] -//! }"#; -//! -//! let schemas = Schema::parse_list(&[raw_schema_1, raw_schema_2]).unwrap(); -//! ``` -//! -//! For more information about schemas and what kind of information you can encapsulate in them, -//! please refer to the appropriate section of the -//! [Avro Specification](https://avro.apache.org/docs/++version++/specification/#schema-declaration). -//! //! # Writing data //! //! Once we have defined a schema, we are ready to serialize data in Avro, validating them against @@ -724,14 +655,13 @@ //! //! The library provides two implementations of schema equality comparators: //! 1. `SpecificationEq` - a comparator that serializes the schemas to their -//! canonical forms (i.e. JSON) and compares them as strings. It is the only implementation -//! until apache_avro 0.16.0. +//! canonical forms (i.e. JSON) and compares them as strings. //! See the [Avro specification](https://avro.apache.org/docs/1.11.1/specification/#parsing-canonical-form-for-schemas) //! for more information! //! 2. `StructFieldEq` - a comparator that compares the schemas structurally. //! It is faster than the `SpecificationEq` because it returns `false` as soon as a difference -//! is found and is recommended for use! -//! It is the default comparator since apache_avro 0.17.0. +//! is found and does not require encoding the schema to JSON. +//! It is the default comparator. //! //! To use a custom comparator, you need to implement the `SchemataEq` trait and set it using the //! `set_schemata_equality_comparator` function: diff --git a/avro/src/rabin.rs b/avro/src/rabin.rs index 2587a6d..497f476 100644 --- a/avro/src/rabin.rs +++ b/avro/src/rabin.rs @@ -43,12 +43,12 @@ fn fp_table() -> &'static [i64; 256] { }) } -/// Implementation of the Rabin fingerprint algorithm using the Digest trait as described in [schema_fingerprints](https://avro.apache.org/docs/current/specification/#schema-fingerprints). +/// Implementation of the Rabin fingerprint algorithm using the [`Digest`](digest::Digest) trait as described in [schema fingerprints]. /// /// The digest is returned as the 8-byte little-endian encoding of the Rabin hash. -/// This is what is used for avro [single object encoding](https://avro.apache.org/docs/current/specification/#single-object-encoding) +/// This is what is used for Avro [single object encoding] /// -/// ```rust +/// ``` /// use apache_avro::rabin::Rabin; /// use digest::Digest; /// use hex_literal::hex; @@ -65,9 +65,9 @@ fn fp_table() -> &'static [i64; 256] { /// assert_eq!(result[..], hex!("60335ba6d0415528")); /// ``` /// -/// To convert the digest to the commonly used 64-bit integer value, you can use the i64::from_le_bytes() function +/// To convert the digest to the commonly used 64-bit integer value, you can use the [`i64::from_le_bytes()`] function /// -/// ```rust +/// ``` /// # use apache_avro::rabin::Rabin; /// # use digest::Digest; /// # use hex_literal::hex; @@ -84,6 +84,8 @@ fn fp_table() -> &'static [i64; 256] { /// /// assert_eq!(i, 2906301498937520992) /// ``` +/// [single object encoding](https://avro.apache.org/docs/current/specification/#single-object-encoding) +/// [schema fingerprints](https://avro.apache.org/docs/current/specification/#schema-fingerprints) #[derive(Clone)] pub struct Rabin { result: i64, diff --git a/avro/src/reader.rs b/avro/src/reader.rs index 3c3b740..8af522c 100644 --- a/avro/src/reader.rs +++ b/avro/src/reader.rs @@ -459,12 +459,12 @@ pub fn from_avro_datum<R: Read>( } } -/// Decode a `Value` encoded in Avro format given the provided `Schema` and anything implementing `io::Read` -/// to read from. +/// Decode a `Value` from raw Avro data. +/// /// If the writer schema is incomplete, i.e. contains `Schema::Ref`s then it will use the provided /// schemata to resolve any dependencies. /// -/// In case a reader `Schema` is provided, schema resolution will also be performed. +/// When a reader `Schema` is provided, schema resolution will also be performed. pub fn from_avro_datum_schemata<R: Read>( writer_schema: &Schema, writer_schemata: Vec<&Schema>, @@ -480,12 +480,12 @@ pub fn from_avro_datum_schemata<R: Read>( ) } -/// Decode a `Value` encoded in Avro format given the provided `Schema` and anything implementing `io::Read` -/// to read from. +/// Decode a `Value` from raw Avro data. +/// /// If the writer schema is incomplete, i.e. contains `Schema::Ref`s then it will use the provided /// schemata to resolve any dependencies. /// -/// In case a reader `Schema` is provided, schema resolution will also be performed. +/// When a reader `Schema` is provided, schema resolution will also be performed. pub fn from_avro_datum_reader_schemata<R: Read>( writer_schema: &Schema, writer_schemata: Vec<&Schema>, diff --git a/avro/src/schema/mod.rs b/avro/src/schema/mod.rs index 0ac4803..38ebac2 100644 --- a/avro/src/schema/mod.rs +++ b/avro/src/schema/mod.rs @@ -59,7 +59,8 @@ pub use crate::schema::{ /// Represents documentation for complex Avro schemas. pub type Documentation = Option<String>; -/// Represents an Avro schema fingerprint +/// Represents an Avro schema fingerprint. +/// /// More information about Avro schema fingerprints can be found in the /// [Avro Schema Fingerprint documentation](https://avro.apache.org/docs/current/specification/#schema-fingerprints) pub struct SchemaFingerprint { @@ -82,7 +83,7 @@ impl fmt::Display for SchemaFingerprint { /// Represents any valid Avro schema /// More information about Avro schemas can be found in the -/// [Avro Specification](https://avro.apache.org/docs/current/specification/#schema-declaration) +/// [Avro Specification](https://avro.apache.org/docs/++version++/specification/#schema-declaration) #[derive(Clone, Debug, EnumDiscriminants, Display)] #[strum_discriminants(name(SchemaKind), derive(Hash, Ord, PartialOrd))] pub enum Schema { @@ -99,17 +100,20 @@ pub enum Schema { /// A `double` Avro schema. Double, /// A `bytes` Avro schema. + /// /// `Bytes` represents a sequence of 8-bit unsigned bytes. Bytes, /// A `string` Avro schema. + /// /// `String` represents a unicode character sequence. String, - /// A `array` Avro schema. Avro arrays are required to have the same type for each element. - /// This variant holds the `Schema` for the array element type. + /// A `array` Avro schema. + /// + /// All items will have the same schema. Array(ArraySchema), /// A `map` Avro schema. - /// `Map` holds a pointer to the `Schema` of its values, which must all be the same schema. - /// `Map` keys are assumed to be `string`. + /// + /// Keys are always a `Schema::String` and all values will have the same schema. Map(MapSchema), /// A `union` Avro schema. Union(UnionSchema), @@ -119,22 +123,27 @@ pub enum Schema { Enum(EnumSchema), /// A `fixed` Avro schema. Fixed(FixedSchema), - /// Logical type which represents `Decimal` values. The underlying type is serialized and - /// deserialized as `Schema::Bytes` or `Schema::Fixed`. + /// Logical type which represents `Decimal` values. + /// + /// The underlying type is serialized and deserialized as `Schema::Bytes` or `Schema::Fixed`. Decimal(DecimalSchema), /// Logical type which represents `Decimal` values without predefined scale. + /// /// The underlying type is serialized and deserialized as `Schema::Bytes` BigDecimal, /// A universally unique identifier, annotating a string, bytes or fixed. Uuid(UuidSchema), /// Logical type which represents the number of days since the unix epoch. + /// /// Serialization format is `Schema::Int`. Date, - /// The time of day in number of milliseconds after midnight with no reference any calendar, - /// time zone or date in particular. + /// The time of day in number of milliseconds after midnight. + /// + /// This type has no reference to any calendar, time zone or date in particular. TimeMillis, - /// The time of day in number of microseconds after midnight with no reference any calendar, - /// time zone or date in particular. + /// The time of day in number of microseconds after midnight. + /// + /// This type has no reference to any calendar, time zone or date in particular. TimeMicros, /// An instant in time represented as the number of milliseconds after the UNIX epoch. TimestampMillis, @@ -608,7 +617,7 @@ impl Schema { } } - /// Returns a Schema::Map with the given types. + /// Returns a `Schema::Map` with the given types. pub fn map(types: Schema) -> Self { Schema::Map(MapSchema { types: Box::new(types), @@ -616,7 +625,7 @@ impl Schema { }) } - /// Returns a Schema::Map with the given types and custom attributes. + /// Returns a `Schema::Map` with the given types and custom attributes. pub fn map_with_attributes(types: Schema, attributes: BTreeMap<String, Value>) -> Self { Schema::Map(MapSchema { types: Box::new(types), @@ -624,7 +633,7 @@ impl Schema { }) } - /// Returns a Schema::Array with the given items. + /// Returns a `Schema::Array` with the given items. pub fn array(items: Schema) -> Self { Schema::Array(ArraySchema { items: Box::new(items), @@ -632,7 +641,7 @@ impl Schema { }) } - /// Returns a Schema::Array with the given items and custom attributes. + /// Returns a `Schema::Array` with the given items and custom attributes. pub fn array_with_attributes(items: Schema, attributes: BTreeMap<String, Value>) -> Self { Schema::Array(ArraySchema { items: Box::new(items), @@ -903,8 +912,9 @@ impl Serialize for Schema { } } -/// Parses a **valid** avro schema into the Parsing Canonical Form. -/// https://avro.apache.org/docs/current/specification/#parsing-canonical-form-for-schemas +/// Parses a valid Avro schema into [the Parsing Canonical Form]. +/// +/// [the Parsing Canonical From](https://avro.apache.org/docs/current/specification/#parsing-canonical-form-for-schemas) fn parsing_canonical_form(schema: &Value, defined_names: &mut HashSet<String>) -> String { match schema { Value::Object(map) => pcf_map(map, defined_names), diff --git a/avro/src/schema/name.rs b/avro/src/schema/name.rs index 624443f..57c1cae 100644 --- a/avro/src/schema/name.rs +++ b/avro/src/schema/name.rs @@ -121,18 +121,19 @@ impl Name { } } - /// Return the fully qualified name needed for indexing or searching for the schema within a schema/schema env context. Puts the enclosing namespace into the name's namespace for clarity in schema/schema env parsing - /// ```ignore - /// use apache_avro::schema::Name; + /// Construct the fully qualified name /// + /// ``` + /// # use apache_avro::{Error, schema::Name}; /// assert_eq!( - /// Name::new("some_name")?.fully_qualified_name(&Some("some_namespace".into())), - /// Name::new("some_namespace.some_name")? + /// Name::new("some_name")?.fully_qualified_name(&Some("some_namespace".into())), + /// Name::new("some_namespace.some_name")? /// ); /// assert_eq!( - /// Name::new("some_namespace.some_name")?.fully_qualified_name(&Some("other_namespace".into())), - /// Name::new("some_namespace.some_name")? + /// Name::new("some_namespace.some_name")?.fully_qualified_name(&Some("other_namespace".into())), + /// Name::new("some_namespace.some_name")? /// ); + /// # Ok::<(), Error>(()) /// ``` pub fn fully_qualified_name(&self, enclosing_namespace: &Namespace) -> Name { Name { diff --git a/avro/src/schema/parser.rs b/avro/src/schema/parser.rs index ced79bc..2f3f6a1 100644 --- a/avro/src/schema/parser.rs +++ b/avro/src/schema/parser.rs @@ -33,12 +33,10 @@ use std::collections::{BTreeMap, HashMap, HashSet}; #[derive(Default)] pub(crate) struct Parser { input_schemas: HashMap<Name, Value>, - /// A map of name -> Schema::Ref /// Used to resolve cyclic references, i.e. when a /// field's type is a reference to its record's type resolving_schemas: Names, input_order: Vec<Name>, - /// A map of name -> fully parsed Schema /// Used to avoid parsing the same schema twice parsed_schemas: Names, } @@ -67,8 +65,9 @@ impl Parser { self.parse(&value, &None) } - /// Create an array of `Schema`'s from an iterator of JSON Avro schemas. It is allowed that - /// the schemas have cross-dependencies; these will be resolved during parsing. + /// Create an array of `Schema`'s from an iterator of JSON Avro schemas. + /// + /// It is allowed that the schemas have cross-dependencies; these will be resolved during parsing. pub(super) fn parse_list(&mut self) -> AvroResult<Vec<Schema>> { self.parse_input_schemas()?; @@ -83,7 +82,7 @@ impl Parser { Ok(parsed_schemas) } - /// Convert the input schemas to parsed_schemas + /// Convert the input schemas to `parsed_schemas`. pub(super) fn parse_input_schemas(&mut self) -> Result<(), Error> { while !self.input_schemas.is_empty() { let next_name = self @@ -103,8 +102,7 @@ impl Parser { Ok(()) } - /// Create a `Schema` from a `serde_json::Value` representing a JSON Avro - /// schema. + /// Create a `Schema` from a `serde_json::Value` representing a JSON Avro schema. pub(super) fn parse( &mut self, value: &Value, @@ -120,9 +118,7 @@ impl Parser { } } - /// Parse a `serde_json::Value` representing an Avro type whose Schema is known into a - /// `Schema`. A Schema for a `serde_json::Value` is known if it is primitive or has - /// been parsed previously by the parsed and stored in its map of parsed_schemas. + /// Parse a string as a primitive type or reference to `parsed_schemas`. fn parse_known_schema( &mut self, name: &str, @@ -142,9 +138,10 @@ impl Parser { } /// Given a name, tries to retrieve the parsed schema from `parsed_schemas`. + /// /// If a parsed schema is not found, it checks if a currently resolving /// schema with that name exists. - /// If a resolving schema is not found, it checks if a json with that name exists + /// If a resolving schema is not found, it checks if a JSON with that name exists /// in `input_schemas` and then parses it (removing it from `input_schemas`) /// and adds the parsed schema to `parsed_schemas`. /// @@ -240,11 +237,10 @@ impl Parser { } } - /// Parse a `serde_json::Value` representing a complex Avro type into a - /// `Schema`. + /// Parse a `serde_json::Value` representing a complex Avro type into a `Schema`. /// /// Avro supports "recursive" definition of types. - /// e.g: {"type": {"type": "string"}} + /// e.g: `{"type": {"type": "string"}}` pub(super) fn parse_complex( &mut self, complex: &Map<String, Value>, @@ -539,8 +535,7 @@ impl Parser { } } - /// Parse a `serde_json::Value` representing a Avro record type into a - /// `Schema`. + /// Parse a `serde_json::Value` representing a Avro record type into a `Schema`. fn parse_record( &mut self, complex: &Map<String, Value>, @@ -619,8 +614,7 @@ impl Parser { custom_attributes } - /// Parse a `serde_json::Value` representing a Avro enum type into a - /// `Schema`. + /// Parse a `serde_json::Value` representing a Avro enum type into a `Schema`. fn parse_enum( &mut self, complex: &Map<String, Value>, @@ -697,8 +691,7 @@ impl Parser { Ok(schema) } - /// Parse a `serde_json::Value` representing a Avro array type into a - /// `Schema`. + /// Parse a `serde_json::Value` representing a Avro array type into a `Schema`. fn parse_array( &mut self, complex: &Map<String, Value>, @@ -716,8 +709,7 @@ impl Parser { }) } - /// Parse a `serde_json::Value` representing a Avro map type into a - /// `Schema`. + /// Parse a `serde_json::Value` representing a Avro map type into a `Schema`. fn parse_map( &mut self, complex: &Map<String, Value>, @@ -735,8 +727,7 @@ impl Parser { }) } - /// Parse a `serde_json::Value` representing a Avro union type into a - /// `Schema`. + /// Parse a `serde_json::Value` representing a Avro union type into a `Schema`. fn parse_union( &mut self, items: &[Value], @@ -764,8 +755,7 @@ impl Parser { }) } - /// Parse a `serde_json::Value` representing a Avro fixed type into a - /// `Schema`. + /// Parse a `serde_json::Value` representing a Avro fixed type into a `Schema`. fn parse_fixed( &mut self, complex: &Map<String, Value>, diff --git a/avro/src/schema/union.rs b/avro/src/schema/union.rs index 5bf631a..7510a13 100644 --- a/avro/src/schema/union.rs +++ b/avro/src/schema/union.rs @@ -36,7 +36,7 @@ pub struct UnionSchema { } impl UnionSchema { - /// Creates a new UnionSchema from a vector of schemas. + /// Creates a new `UnionSchema` from a vector of schemas. /// /// # Errors /// Will return an error if `schemas` has duplicate unnamed schemas or if `schemas` diff --git a/avro/src/schema_compatibility.rs b/avro/src/schema_compatibility.rs index aa0ab41..d7994f2 100644 --- a/avro/src/schema_compatibility.rs +++ b/avro/src/schema_compatibility.rs @@ -20,7 +20,7 @@ //! To allow for schema evolution, Avro supports resolving the writer's schema to the reader's schema. //! To check if this is possible, [`SchemaCompatibility`] can be used. For the complete rules see //! [the specification](https://avro.apache.org/docs/++version++/specification/#schema-resolution). -//! +//! //! There are three levels of compatibility. //! //! 1. Fully compatible schemas (`Ok(Compatibility::Full)`) @@ -44,11 +44,11 @@ //! let readers_schema = Schema::array(Schema::Int); //! assert!(SchemaCompatibility::can_read(&writers_schema, &readers_schema).is_err()); //! ``` -//! +//! //! 3. Partially compatible schemas (`Ok(Compatibility::Partial)`) //! //! For example, a union of a string and integer is only compatible with an integer if an integer was written: -//! +//! //! ```rust //! # use apache_avro::{Error, Schema, schema_compatibility::{Compatibility, SchemaCompatibility}}; //! let writers_schema = Schema::union(vec![Schema::Int, Schema::String])?; @@ -56,7 +56,7 @@ //! assert_eq!(SchemaCompatibility::can_read(&writers_schema, &readers_schema), Ok(Compatibility::Partial)); //! # Ok::<(), Error>(()) //! ``` -//! +//! use crate::{ error::CompatibilityError, schema::{ @@ -73,9 +73,9 @@ use std::{ }; /// Check if two schemas can be resolved. -/// +/// /// See [the module documentation] for more details. -/// +/// /// [the module documentation]: crate::schema_compatibility pub struct SchemaCompatibility; diff --git a/avro/src/schema_equality.rs b/avro/src/schema_equality.rs index cd1c2cd..ecc793b 100644 --- a/avro/src/schema_equality.rs +++ b/avro/src/schema_equality.rs @@ -27,15 +27,16 @@ use log::debug; use std::{fmt::Debug, sync::OnceLock}; /// A trait that compares two schemata for equality. -/// To register a custom one use [set_schemata_equality_comparator]. +/// +/// To register a custom one use [`set_schemata_equality_comparator`]. pub trait SchemataEq: Debug + Send + Sync { /// Compares two schemata for equality. fn compare(&self, schema_one: &Schema, schema_two: &Schema) -> bool; } -/// Compares two schemas according to the Avro specification by using -/// their canonical forms. -/// See <https://avro.apache.org/docs/1.11.1/specification/#parsing-canonical-form-for-schemas> +/// Compares two schemas according to the Avro specification by using [their canonical forms]. +/// +/// [their canonical forms](https://avro.apache.org/docs/1.11.1/specification/#parsing-canonical-form-for-schemas) #[derive(Debug)] pub struct SpecificationEq; impl SchemataEq for SpecificationEq { @@ -44,12 +45,15 @@ impl SchemataEq for SpecificationEq { } } -/// Compares two schemas for equality field by field, using only the fields that -/// are used to construct their canonical forms. -/// See <https://avro.apache.org/docs/1.11.1/specification/#parsing-canonical-form-for-schemas> +/// Compares [the canonical forms] of two schemas for equality field by field. +/// +/// This means that attributes like `aliases`, `doc`, `default` and `logicalType` are ignored. +/// +/// [the canonical forms](https://avro.apache.org/docs/1.11.1/specification/#parsing-canonical-form-for-schemas) #[derive(Debug)] pub struct StructFieldEq { /// Whether to include custom attributes in the comparison. + /// /// The custom attributes are not used to construct the canonical form of the schema! pub include_attributes: bool, } diff --git a/avro/src/serde/derive.rs b/avro/src/serde/derive.rs index 3a1dcbb..ebd054e 100644 --- a/avro/src/serde/derive.rs +++ b/avro/src/serde/derive.rs @@ -58,14 +58,14 @@ use std::collections::HashMap; /// This will implement [`AvroSchemaComponent`] for the type, and `AvroSchema` /// through the blanket implementation for `T: AvroSchemaComponent`. /// -/// Every member of the `struct` and `enum` must also implement `AvroSchemaComponent`. +/// When deriving `struct`s, every member must also implement `AvroSchemaComponent`. /// /// ## Changing the generated schema /// /// The derive macro will read both the `avro` and `serde` attributes to modify the generated schema. /// It will also check for compatibility between the various attributes. /// -/// ### Container attributes +/// #### Container attributes /// /// - `#[serde(rename = "name")]` /// @@ -96,14 +96,14 @@ use std::collections::HashMap; /// Use the schema of the inner field directly. Is only allowed on structs with only unskipped field. /// /// -/// ### Variant attributes +/// #### Variant attributes /// /// - `#[serde(rename = "name")]` /// /// Rename the variant to the given name. /// /// -/// ### Field attributes +/// #### Field attributes /// /// - `#[serde(rename = "name")]` /// @@ -146,7 +146,7 @@ use std::collections::HashMap; /// /// Override the schema used for this field. See [Working with foreign types](#working-with-foreign-types). /// -/// ### Incompatible Serde attributes +/// #### Incompatible Serde attributes /// /// The derive macro is compatible with most Serde attributes, but it is incompatible with /// the following attributes: @@ -168,79 +168,137 @@ use std::collections::HashMap; /// ## Working with foreign types /// /// Most foreign types won't have a [`AvroSchema`] implementation. This crate implements it only -/// for built-in types, [`serde_json::Map`] and [`uuid::Uuid`]. Notable exceptions are [`char`] and -/// [`u64`] types, as there is no equivalent for char in Avro and the largest integer type in Avro -/// is `long` (equal to an [`i64`]). +/// for built-in types and [`uuid::Uuid`]. /// /// To still be able to derive schemas for fields of foreign types, the `#[avro(with)`] /// attribute can be used to get the schema for those fields. It can be used in two ways: /// /// 1. In combination with `#[serde(with = "path::to::module)]` /// -/// To get the schema, it will call the function `fn get_schema_in_ctxt(&mut Names, &Namespace) -> Schema` -/// in the module provided to the Serde attribute. +/// To get the schema, it will call the functions `fn get_schema_in_ctxt(&mut Names, &Namespace) -> Schema` +/// and `fn get_record_fields_in_ctxt(&mut Names, &Namespace) -> Schema` in the module provided +/// to the Serde attribute. /// /// 2. By providing a function directly, `#[avro(with = some_fn)]`. /// /// To get the schema, it will call the function provided. It must have the signature -/// `fn(&mut Names, &Namespace) -> Schema` +/// `fn(&mut Names, &Namespace) -> Schema`. When this is used for a `transparent` struct, the +/// default implementation of [`AvroSchemaComponent::get_record_fields_in_ctxt`] will be used +/// which is implemented with a lot of backtracking and cloning. /// pub trait AvroSchema { + /// Construct the full schema that represents this type. + /// + /// The returned schema is fully independent and contains only `Schema::Ref` to named types defined + /// earlier in the schema. fn get_schema() -> Schema; } -/// Trait for types that serve as fully defined components inside an Avro data model. Derive -/// implementation available through `derive` feature. This is what is implemented by -/// the `derive(AvroSchema)` macro. +/// Trait for types that serve as fully defined components inside an Avro data model. /// -/// TODO: Explain that field ordering is important +/// This trait can be derived with [`#[derive(AvroSchema)]`](AvroSchema) when the `derive` feature is enabled. /// /// # Implementation guide /// -/// ### Simple implementation -/// To construct a non named simple schema, it is possible to ignore the input argument making the -/// general form implementation look like -/// ```ignore -/// impl AvroSchemaComponent for AType { +/// ### Implementation for returning primitive types +/// When the schema you want to return is a primitive type (a type without a name), the function +/// arguments can be ignored. +/// +/// For example, you have a custom integer type: +/// ``` +/// # use apache_avro::{Schema, serde::{AvroSchemaComponent}, schema::{Names, Namespace, RecordField}}; +/// // Make sure to implement `Serialize` and `Deserialize` to use the right serialization methods +/// pub struct U24([u8; 3]); +/// impl AvroSchemaComponent for U24 { /// fn get_schema_in_ctxt(_: &mut Names, _: &Namespace) -> Schema { -/// Schema::? -/// } +/// Schema::Int +/// } +/// +/// fn get_record_fields_in_ctxt(_: &mut Names, _: &Namespace) -> Option<Vec<RecordField>> { +/// None // A Schema::Int is not a Schema::Record so there are no fields to return +/// } ///} /// ``` /// /// ### Passthrough implementation /// -/// To construct a schema for a Type that acts as in "inner" type, such as for smart pointers, simply -/// pass through the arguments to the inner type -/// ```ignore -/// impl AvroSchemaComponent for PassthroughType { +/// To construct a schema for a type is "transparent", such as for smart pointers, simply +/// pass through the arguments to the inner type: +/// ``` +/// # use apache_avro::{Schema, serde::{AvroSchemaComponent}, schema::{Names, Namespace, RecordField}}; +/// # use serde::{Serialize, Deserialize}; +/// #[derive(Serialize, Deserialize)] +/// #[serde(transparent)] // This attribute is important for all passthrough implementations! +/// pub struct Transparent<T>(T); +/// impl<T: AvroSchemaComponent> AvroSchemaComponent for Transparent<T> { /// fn get_schema_in_ctxt(named_schemas: &mut Names, enclosing_namespace: &Namespace) -> Schema { -/// InnerType::get_schema_in_ctxt(named_schemas, enclosing_namespace) -/// } +/// T::get_schema_in_ctxt(named_schemas, enclosing_namespace) +/// } +/// +/// fn get_record_fields_in_ctxt(named_schemas: &mut Names, enclosing_namespace: &Namespace) -> Option<Vec<RecordField>> { +/// T::get_record_fields_in_ctxt(named_schemas, enclosing_namespace) +/// } ///} /// ``` /// -/// ### Complex implementation +/// ### Implementation for complex types +/// When the schema you want to return is a complex type (a type with a name), special care has to +/// be taken to avoid duplicate type definitions and getting the correct namespace. +/// +/// Things to keep in mind: +/// - If the fully qualified name already exists, return a [`Schema::Ref`] +/// - Use the `AvroSchemaComponent` implementations to get the schemas for the subtypes +/// - The ordering of fields in the schema **must** match with the ordering in Serde +/// - Implement `get_record_fields_in_ctxt` as the default implementation has to be implemented +/// with backtracking and a lot of cloning. +/// - Even if your schema is not a record, still implement the function and just return `None` +/// +/// ```rust +/// # use apache_avro::{Schema, serde::{AvroSchemaComponent}, schema::{Name, Names, Namespace, RecordField, RecordSchema}}; +/// # use serde::{Serialize, Deserialize}; +/// # use std::time::Duration; +/// pub struct Foo { +/// one: String, +/// two: i32, +/// three: Option<Duration> +/// } /// -/// To implement this for Named schema there is a general form needed to avoid creating invalid -/// schemas or infinite loops. -/// ```ignore -/// impl AvroSchemaComponent for ComplexType { +/// impl AvroSchemaComponent for Foo { /// fn get_schema_in_ctxt(named_schemas: &mut Names, enclosing_namespace: &Namespace) -> Schema { /// // Create the fully qualified name for your type given the enclosing namespace -/// let name = apache_avro::schema::Name::new("MyName") -/// .expect("Unable to parse schema name") -/// .fully_qualified_name(enclosing_namespace); -/// let enclosing_namespace = &name.namespace; -/// // Check, if your name is already defined, and if so, return a ref to that name +/// let name = Name::new("Foo").unwrap().fully_qualified_name(enclosing_namespace); /// if named_schemas.contains_key(&name) { -/// apache_avro::schema::Schema::Ref{name: name.clone()} +/// Schema::Ref { name } /// } else { -/// named_schemas.insert(name.clone(), apache_avro::schema::Schema::Ref{name: name.clone()}); -/// // YOUR SCHEMA DEFINITION HERE with the name equivalent to "MyName". -/// // For non-simple sub types delegate to their implementation of AvroSchemaComponent +/// let enclosing_namespace = &name.namespace; +/// // This is needed because otherwise recursive types will recurse forever and cause a stack overflow +/// named_schemas.insert(name.clone(), Schema::Ref { name: name.clone() }); +/// let schema = Schema::Record(RecordSchema::builder() +/// .name(name.clone()) +/// .fields(Self::get_record_fields_in_ctxt(named_schemas, enclosing_namespace).expect("Impossible!")) +/// .build() +/// ); +/// named_schemas.insert(name, schema.clone()); +/// schema /// } -/// } +/// } +/// +/// fn get_record_fields_in_ctxt(named_schemas: &mut Names, enclosing_namespace: &Namespace) -> Option<Vec<RecordField>> { +/// Some(vec![ +/// RecordField::builder() +/// .name("one") +/// .schema(String::get_schema_in_ctxt(named_schemas, enclosing_namespace)) +/// .build(), +/// RecordField::builder() +/// .name("two") +/// .schema(i32::get_schema_in_ctxt(named_schemas, enclosing_namespace)) +/// .build(), +/// RecordField::builder() +/// .name("three") +/// .schema(<Option<Duration>>::get_schema_in_ctxt(named_schemas, enclosing_namespace)) +/// .build(), +/// ]) +/// } ///} /// ``` pub trait AvroSchemaComponent { diff --git a/avro/src/serde/mod.rs b/avro/src/serde/mod.rs index a2d8377..02cada6 100644 --- a/avro/src/serde/mod.rs +++ b/avro/src/serde/mod.rs @@ -41,7 +41,6 @@ //! # use std::io::Cursor; //! # use serde::{Serialize, Deserialize}; //! # use apache_avro::{AvroSchema, Error, Reader, Writer, serde::{from_value, to_value}}; -//! //! #[derive(AvroSchema, Serialize, Deserialize, PartialEq, Debug)] //! struct Foo { //! a: i64, @@ -57,14 +56,12 @@ //! b: "Hello".to_string(), //! }; //! -//! // There are two ways to serialize data. -//! // 1: Serialize directly to the writer: +//! // Serialize as many items as you want. //! writer.append_ser(&foo)?; -//! // 2: First serialize to an Avro `Value` then write that: -//! let foo_value = to_value(&foo)?; -//! writer.append(foo_value)?; //! -//! // Always flush or consume the writer +//! // Always flush +//! writer.flush(); +//! // Or consume the writer //! let data = writer.into_inner()?; //! //! // The reader does not need a schema as it's included in the data diff --git a/avro/src/serde/ser_schema.rs b/avro/src/serde/ser_schema.rs index 2cfe380..63ecdad 100644 --- a/avro/src/serde/ser_schema.rs +++ b/avro/src/serde/ser_schema.rs @@ -15,8 +15,7 @@ // specific language governing permissions and limitations // under the License. -//! Logic for serde-compatible schema-aware serialization -//! which writes directly to a `Write` stream +//! Logic for serde-compatible schema-aware serialization which writes directly to a writer. use crate::schema::{DecimalSchema, InnerDecimalSchema, UuidSchema}; use crate::{ @@ -35,11 +34,11 @@ const COLLECTION_SERIALIZER_DEFAULT_INIT_ITEM_CAPACITY: usize = 32; const SINGLE_VALUE_INIT_BUFFER_SIZE: usize = 128; /// The sequence serializer for [`SchemaAwareWriteSerializer`]. -/// [`SchemaAwareWriteSerializeSeq`] may break large arrays up into multiple blocks to avoid having +/// +/// This may break large arrays up into multiple blocks to avoid having /// to obtain the length of the entire array before being able to write any data to the underlying -/// [`std::fmt::Write`] stream. (See the +/// writer (see the [Data Serialization and Deserialization] for more info). /// [Data Serialization and Deserialization](https://avro.apache.org/docs/1.12.0/specification/#data-serialization-and-deserialization) -/// section of the Avro spec for more info.) pub struct SchemaAwareWriteSerializeSeq<'a, 's, W: Write> { ser: &'a mut SchemaAwareWriteSerializer<'s, W>, item_schema: &'s Schema, @@ -143,11 +142,11 @@ impl<W: Write> ser::SerializeTuple for SchemaAwareWriteSerializeSeq<'_, '_, W> { } /// The map serializer for [`SchemaAwareWriteSerializer`]. -/// [`SchemaAwareWriteSerializeMap`] may break large maps up into multiple blocks to avoid having to -/// obtain the size of the entire map before being able to write any data to the underlying -/// [`std::fmt::Write`] stream. (See the +/// +/// This may break large maps up into multiple blocks to avoid having to obtain the size of the entire +/// map before being able to write any data to the underlying writer +/// (see [Data Serialization and Deserialization] for more info) /// [Data Serialization and Deserialization](https://avro.apache.org/docs/1.12.0/specification/#data-serialization-and-deserialization) -/// section of the Avro spec for more info.) pub struct SchemaAwareWriteSerializeMap<'a, 's, W: Write> { ser: &'a mut SchemaAwareWriteSerializer<'s, W>, item_schema: &'s Schema, @@ -245,9 +244,9 @@ impl<W: Write> ser::SerializeMap for SchemaAwareWriteSerializeMap<'_, '_, W> { } /// The struct serializer for [`SchemaAwareWriteSerializer`], which can serialize Avro records. -/// [`SchemaAwareWriteSerializeStruct`] can accept fields out of order, but doing so incurs a -/// performance penalty, since it requires [`SchemaAwareWriteSerializeStruct`] to buffer serialized -/// values in order to write them to the stream in order. +/// +/// This can accept fields out of order, but doing so incurs a performance penalty, since it requires +/// buffering serialized values in order to write them to the stream in order. pub struct SchemaAwareWriteSerializeStruct<'a, 's, W: Write> { ser: &'a mut SchemaAwareWriteSerializer<'s, W>, record_schema: &'s RecordSchema, @@ -427,7 +426,10 @@ impl<W: Write> ser::SerializeStruct for SchemaAwareWriteSerializeStruct<'_, '_, } } -/// This implementation is used to support `#[serde(flatten)]` as that uses SerializeMap instead of SerializeStruct. +/// This implementation is used to support `#[serde(flatten)]` as that uses [`SerializeMap`] instead of [`SerializeStruct`]. +/// +/// [`SerializeMap`](ser::SerializeMap) +/// [`SerializeStruct`](ser::SerializeStruct) impl<W: Write> ser::SerializeMap for SchemaAwareWriteSerializeStruct<'_, '_, W> { type Ok = usize; type Error = Error; @@ -531,7 +533,8 @@ impl<W: Write> ser::SerializeMap for SchemaAwareWriteSerializeMapOrStruct<'_, '_ } /// The tuple struct serializer for [`SchemaAwareWriteSerializer`]. -/// [`SchemaAwareWriteSerializeTupleStruct`] can serialize to an Avro array, record, or big-decimal. +/// +/// This can serialize to an Avro array, record, or big-decimal. /// When serializing to a record, fields must be provided in the correct order, since no names are provided. pub enum SchemaAwareWriteSerializeTupleStruct<'a, 's, W: Write> { Record(SchemaAwareWriteSerializeStruct<'a, 's, W>), @@ -593,11 +596,11 @@ impl<W: Write> ser::SerializeTupleVariant for SchemaAwareWriteSerializeTupleStru } } -/// A [`serde::ser::Serializer`] implementation that serializes directly to a [`std::fmt::Write`] -/// using the provided schema. If [`SchemaAwareWriteSerializer`] isn't able to match the incoming -/// data with its schema, it will return an error. -/// A [`SchemaAwareWriteSerializer`] instance can be re-used to serialize multiple values matching -/// the schema to its [`std::fmt::Write`] stream. +/// A [`Serializer`](ser::Serializer) implementation that serializes directly to raw Avro data. +/// +/// If data does not match with the schema it will return an error. +/// +/// This does not keep state and can therefore be reused to write to the same writer. pub struct SchemaAwareWriteSerializer<'s, W: Write> { writer: &'s mut W, root_schema: &'s Schema, diff --git a/avro/src/types.rs b/avro/src/types.rs index e375f71..e33fd84 100644 --- a/avro/src/types.rs +++ b/avro/src/types.rs @@ -240,11 +240,10 @@ impl Record<'_> { } } - /// Put a compatible value (implementing the `ToAvro` trait) in the - /// `Record` for a given `field` name. + /// Add a field to the `Record`. /// - /// **NOTE** Only ensure that the field name is present in the `Schema` given when creating - /// this `Record`. Does not perform any schema validation. + // TODO: This should return an error at least panic + /// **NOTE**: If the field name does not exist in the schema, the value is silently dropped. pub fn put<V>(&mut self, field: &str, value: V) where V: Into<Value>, @@ -255,6 +254,7 @@ impl Record<'_> { } /// Get the value for a given field name. + /// /// Returns `None` if the field is not present in the schema pub fn get(&self, field: &str) -> Option<&Value> { self.schema_lookup diff --git a/avro/src/util.rs b/avro/src/util.rs index 748e923..8acbfa4 100644 --- a/avro/src/util.rs +++ b/avro/src/util.rs @@ -24,20 +24,21 @@ use std::{ sync::OnceLock, }; -/// Maximum number of bytes that can be allocated when decoding -/// Avro-encoded values. This is a protection against ill-formed -/// data, whose length field might be interpreted as enormous. -/// See max_allocation_bytes to change this limit. +/// Maximum number of bytes that can be allocated when decoding Avro-encoded values. +/// +/// This is a protection against ill-formed data, whose length field might be interpreted as enormous. +/// +/// See [`max_allocation_bytes`] to change this limit. pub const DEFAULT_MAX_ALLOCATION_BYTES: usize = 512 * 1024 * 1024; static MAX_ALLOCATION_BYTES: OnceLock<usize> = OnceLock::new(); -/// Whether to set serialization & deserialization traits -/// as `human_readable` or not. -/// See [set_serde_human_readable] to change this value. +/// Whether to set serialization & deserialization traits as `human_readable` or not. +/// +/// See [`set_serde_human_readable`] to change this value. +pub const DEFAULT_SERDE_HUMAN_READABLE: bool = false; +/// Whether the serializer and deserializer should indicate to types that the format is human-readable. // crate-visible for testing pub(crate) static SERDE_HUMAN_READABLE: OnceLock<bool> = OnceLock::new(); -/// Whether the serializer and deserializer should indicate to types that the format is human-readable. -pub const DEFAULT_SERDE_HUMAN_READABLE: bool = false; pub(crate) trait MapHelper { fn string(&self, key: &str) -> Option<String>; diff --git a/avro/src/validator.rs b/avro/src/validator.rs index 971870c..e8406a2 100644 --- a/avro/src/validator.rs +++ b/avro/src/validator.rs @@ -24,10 +24,12 @@ use std::sync::OnceLock; struct SpecificationValidator; /// A trait that validates schema names. -/// To register a custom one use [set_schema_name_validator]. +/// +/// To register a custom one use [`set_schema_name_validator`]. pub trait SchemaNameValidator: Send + Sync { - /// Returns the regex used to validate the schema name - /// according to the Avro specification. + /// The regex used to validate the schema name. + /// + /// The default implementation uses the Avro specified regex. fn regex(&self) -> &'static Regex { static SCHEMA_NAME_ONCE: OnceLock<Regex> = OnceLock::new(); SCHEMA_NAME_ONCE.get_or_init(|| { @@ -39,8 +41,9 @@ pub trait SchemaNameValidator: Send + Sync { }) } - /// Validates the schema name and returns the name and the optional namespace, - /// or [Details::InvalidSchemaName] if it is invalid. + /// Validates the schema name and returns the name and the optional namespace. + /// + /// Should return [`Details::InvalidSchemaName`] if it is invalid. fn validate(&self, schema_name: &str) -> AvroResult<(String, Namespace)>; } @@ -61,8 +64,7 @@ static NAME_VALIDATOR_ONCE: OnceLock<Box<dyn SchemaNameValidator + Send + Sync>> /// Sets a custom schema name validator. /// -/// Returns a unit if the registration was successful or the already -/// registered validator if the registration failed. +/// Returns `Err(validator)` if a validator is already configured. /// /// **Note**: This function must be called before parsing any schema because this will /// register the default validator and the registration is one time only! @@ -83,10 +85,12 @@ pub(crate) fn validate_schema_name(schema_name: &str) -> AvroResult<(String, Nam } /// A trait that validates schema namespaces. -/// To register a custom one use [set_schema_namespace_validator]. +/// +/// To register a custom one use [`set_schema_namespace_validator`]. pub trait SchemaNamespaceValidator: Send + Sync { - /// Returns the regex used to validate the schema namespace - /// according to the Avro specification. + /// The regex used to validate the schema namespace. + /// + /// The default implementation uses the Avro specified regex. fn regex(&self) -> &'static Regex { static NAMESPACE_ONCE: OnceLock<Regex> = OnceLock::new(); NAMESPACE_ONCE.get_or_init(|| { @@ -94,7 +98,9 @@ pub trait SchemaNamespaceValidator: Send + Sync { }) } - /// Validates the schema namespace or [Details::InvalidNamespace] if it is invalid. + /// Validates a schema namespace. + /// + /// Should return [`Details::InvalidNamespace`] if it is invalid. fn validate(&self, namespace: &str) -> AvroResult<()>; } @@ -114,8 +120,7 @@ static NAMESPACE_VALIDATOR_ONCE: OnceLock<Box<dyn SchemaNamespaceValidator + Sen /// Sets a custom schema namespace validator. /// -/// Returns a unit if the registration was successful or the already -/// registered validator if the registration failed. +/// Returns `Err(validator)` if a validator is already configured. /// /// **Note**: This function must be called before parsing any schema because this will /// register the default validator and the registration is one time only! @@ -135,17 +140,20 @@ pub(crate) fn validate_namespace(ns: &str) -> AvroResult<()> { } /// A trait that validates enum symbol names. -/// To register a custom one use [set_enum_symbol_name_validator]. +/// +/// To register a custom one use [`set_enum_symbol_name_validator`]. pub trait EnumSymbolNameValidator: Send + Sync { - /// Returns the regex used to validate the symbols of enum schema - /// according to the Avro specification. + /// The regex used to validate the symbols of enums. + /// + /// The default implementation uses the Avro specified regex. fn regex(&self) -> &'static Regex { static ENUM_SYMBOL_NAME_ONCE: OnceLock<Regex> = OnceLock::new(); ENUM_SYMBOL_NAME_ONCE.get_or_init(|| Regex::new(r"^[A-Za-z_][A-Za-z0-9_]*$").unwrap()) } - /// Validates the symbols of an Enum schema name and returns nothing (unit), - /// or [Details::EnumSymbolName] if it is invalid. + /// Validate the symbol of an enum. + /// + /// Should return [`Details::EnumSymbolName`] if it is invalid. fn validate(&self, name: &str) -> AvroResult<()>; } @@ -165,8 +173,7 @@ static ENUM_SYMBOL_NAME_VALIDATOR_ONCE: OnceLock<Box<dyn EnumSymbolNameValidator /// Sets a custom enum symbol name validator. /// -/// Returns a unit if the registration was successful or the already -/// registered validator if the registration failed. +/// Returns `Err(validator)` if a validator is already configured. /// /// **Note**: This function must be called before parsing any schema because this will /// register the default validator and the registration is one time only! @@ -186,17 +193,20 @@ pub(crate) fn validate_enum_symbol_name(symbol: &str) -> AvroResult<()> { } /// A trait that validates record field names. -/// To register a custom one use [set_record_field_name_validator]. +/// +/// To register a custom one use [`set_record_field_name_validator`]. pub trait RecordFieldNameValidator: Send + Sync { - /// Returns the regex used to validate the record field names - /// according to the Avro specification. + /// The regex used to validate the record field names. + /// + /// The default implementation uses the Avro specified regex. fn regex(&self) -> &'static Regex { static FIELD_NAME_ONCE: OnceLock<Regex> = OnceLock::new(); FIELD_NAME_ONCE.get_or_init(|| Regex::new(r"^[A-Za-z_][A-Za-z0-9_]*$").unwrap()) } - /// Validates the record field's names and returns nothing (unit), - /// or [Details::FieldName] if it is invalid. + /// Validate the name of a record field. + /// + /// Should return [`Details::FieldName`] if it is invalid. fn validate(&self, name: &str) -> AvroResult<()>; } @@ -216,8 +226,7 @@ static RECORD_FIELD_NAME_VALIDATOR_ONCE: OnceLock<Box<dyn RecordFieldNameValidat /// Sets a custom record field name validator. /// -/// Returns a unit if the registration was successful or the already -/// registered validator if the registration failed. +/// Returns `Err(validator)` if a validator is already configured. /// /// **Note**: This function must be called before parsing any schema because this will /// register the default validator and the registration is one time only! diff --git a/avro/src/writer.rs b/avro/src/writer.rs index 32808d1..1690259 100644 --- a/avro/src/writer.rs +++ b/avro/src/writer.rs @@ -178,7 +178,7 @@ impl<'a, W: Write> Writer<'a, W> { self.append_value(value) } - /// Append a compatible value to a `Writer`, also performing schema validation. + /// Append a value to the `Writer`, also performs schema validation. /// /// Returns the number of bytes written (it might be 0, see below). /// @@ -190,7 +190,7 @@ impl<'a, W: Write> Writer<'a, W> { self.append_value_ref(&avro) } - /// Append a compatible value to a `Writer`, also performing schema validation. + /// Append a compatible value to a `Writer`, also performs schema validation. /// /// Returns the number of bytes written (it might be 0, see below). /// @@ -287,8 +287,7 @@ impl<'a, W: Write> Writer<'a, W> { Ok(n) } - /// Extend a `Writer` with an `Iterator` of compatible values (implementing the `ToAvro` - /// trait), also performing schema validation. + /// Extend a `Writer` with an `Iterator` of values, also performs schema validation. /// /// Returns the number of bytes written. /// @@ -551,8 +550,7 @@ impl<W: Write> Drop for Writer<'_, W> { } } -/// Encode a compatible value (implementing the `ToAvro` trait) into Avro format, also performing -/// schema validation. +/// Encode a value into raw Avro data, also performs schema validation. /// /// This is an internal function which gets the bytes buffer where to write as parameter instead of /// creating a new one like `to_avro_datum`. @@ -767,8 +765,7 @@ fn write_value_ref_owned_resolved<W: Write>( ) } -/// Encode a compatible value (implementing the `ToAvro` trait) into Avro format, also -/// performing schema validation. +/// Encode a value into raw Avro data, also performs schema validation. /// /// **NOTE**: This function has a quite small niche of usage and does NOT generate headers and sync /// markers; use [`Writer`] to be fully Avro-compatible if you don't know what @@ -780,6 +777,7 @@ pub fn to_avro_datum<T: Into<Value>>(schema: &Schema, value: T) -> AvroResult<Ve } /// Write the referenced [Serialize]able object to the provided [Write] object. +/// /// Returns a result with the number of bytes written. /// /// **NOTE**: This function has a quite small niche of usage and does **NOT** generate headers and sync @@ -795,8 +793,8 @@ pub fn write_avro_datum_ref<T: Serialize, W: Write>( data.serialize(&mut serializer) } -/// Encode a compatible value (implementing the `ToAvro` trait) into Avro format, also -/// performing schema validation. +/// Encode a value into raw Avro data, also performs schema validation. +/// /// If the provided `schema` is incomplete then its dependencies must be /// provided in `schemata` pub fn to_avro_datum_schemata<T: Into<Value>>( diff --git a/avro_derive/build.rs b/avro_derive/build.rs index 54691a8..40e33af 100644 --- a/avro_derive/build.rs +++ b/avro_derive/build.rs @@ -18,7 +18,7 @@ //! Set the `nightly` cfg value on nightly toolchains. //! //! We would prefer to just do `#![rustversion::attr(nightly, feature(proc_macro_diagnostic)]` -//! but that's currently not possible, see https://github.com/dtolnay/rustversion/issues/8 +//! but that's currently not possible, see <https://github.com/dtolnay/rustversion/issues/8> #[rustversion::nightly] fn main() { diff --git a/avro_derive/src/case.rs b/avro_derive/src/case.rs index b5d5c38..c958562 100644 --- a/avro_derive/src/case.rs +++ b/avro_derive/src/case.rs @@ -30,24 +30,24 @@ pub enum RenameRule { /// Don't apply a default rename rule. #[default] None, - /// Rename direct children to "lowercase" style. + /// Rename direct children to `lowercase` style. LowerCase, - /// Rename direct children to "UPPERCASE" style. + /// Rename direct children to `UPPERCASE` style. UpperCase, - /// Rename direct children to "PascalCase" style, as typically used for + /// Rename direct children to `PascalCase` style, as typically used for /// enum variants. PascalCase, - /// Rename direct children to "camelCase" style. + /// Rename direct children to `camelCase` style. CamelCase, - /// Rename direct children to "snake_case" style, as commonly used for + /// Rename direct children to `snake_case` style, as commonly used for /// fields. SnakeCase, - /// Rename direct children to "SCREAMING_SNAKE_CASE" style, as commonly + /// Rename direct children to `SCREAMING_SNAKE_CASE` style, as commonly /// used for constants. ScreamingSnakeCase, - /// Rename direct children to "kebab-case" style. + /// Rename direct children to `kebab-case` style. KebabCase, - /// Rename direct children to "SCREAMING-KEBAB-CASE" style. + /// Rename direct children to `SCREAMING-KEBAB-CASE` style. ScreamingKebabCase, } diff --git a/avro_test_helper/src/lib.rs b/avro_test_helper/src/lib.rs index 6366a8e..9b5248d 100644 --- a/avro_test_helper/src/lib.rs +++ b/avro_test_helper/src/lib.rs @@ -53,7 +53,8 @@ fn after_all() { #[derive(Debug)] pub struct TestError; -/// A converter of any error into [TestError]. +/// A converter of any error into [`TestError`]. +/// /// It is used to print better error messages in the tests. /// Borrowed from <https://bluxte.net/musings/2023/01/08/improving_failure_messages_rust_tests/> // The Display bound is needed so that the `From` implementation doesn't
