This is an automated email from the ASF dual-hosted git repository.

mgrigorov pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/avro-rs.git


The following commit(s) were added to refs/heads/main by this push:
     new 68a4f2e  feat: Add settings for Codec::Deflate  (#174)
68a4f2e is described below

commit 68a4f2e871fdbc918ec8ae3de83a6be93d77e932
Author: Martin Grigorov <[email protected]>
AuthorDate: Wed Apr 9 08:26:19 2025 +0300

    feat: Add settings for Codec::Deflate  (#174)
    
    * Issue #169 - Be explicit about the used features of miniz_oxide
    
    Signed-off-by: Martin Tzvetanov Grigorov <[email protected]>
    
    * Issue #169 - Do not allow unused imports. Import only what is needed when 
it is needed
    
    Signed-off-by: Martin Tzvetanov Grigorov <[email protected]>
    
    * Fixes #173 - Introduce DeflateSettings
    
    At the moment it could be used to specify the compression level for
    Codec::Deflate
    
    Signed-off-by: Martin Tzvetanov Grigorov <[email protected]>
    
    * Issue #173 - Use non-default compression level for Codec::Deflate
    
    Add some Rustdoc
    
    Signed-off-by: Martin Tzvetanov Grigorov <[email protected]>
    
    * Issue #173 - Update the README.md with `cargo rdme`
    
    Signed-off-by: Martin Tzvetanov Grigorov <[email protected]>
    
    * Issue #173 - Fix Rustdoc imports
    
    Signed-off-by: Martin Tzvetanov Grigorov <[email protected]>
    
    * Issue #173 - Introduce a helper method for the cast of CompressionLevel 
to u8
    
    Signed-off-by: Martin Tzvetanov Grigorov <[email protected]>
    
    * Issue #173 - Further rustdoc import fixes
    
    Signed-off-by: Martin Tzvetanov Grigorov <[email protected]>
    
    ---------
    
    Signed-off-by: Martin Tzvetanov Grigorov <[email protected]>
---
 avro/Cargo.toml      |  2 +-
 avro/README.md       | 13 ++++++-------
 avro/src/codec.rs    | 54 +++++++++++++++++++++++++++++++++++++++++++---------
 avro/src/lib.rs      | 16 +++++++---------
 avro/src/writer.rs   | 13 +++++++++----
 avro/tests/codecs.rs |  7 +++++--
 6 files changed, 73 insertions(+), 32 deletions(-)

diff --git a/avro/Cargo.toml b/avro/Cargo.toml
index 4064817..eae7c0b 100644
--- a/avro/Cargo.toml
+++ b/avro/Cargo.toml
@@ -59,7 +59,7 @@ bon = { default-features = false, version = "3.5.1" }
 bzip2 = { version = "0.5.2", optional = true }
 crc32fast = { default-features = false, version = "1.4.2", optional = true }
 digest = { default-features = false, version = "0.10.7", features = 
["core-api"] }
-miniz_oxide = "0.8.7"
+miniz_oxide = { default-features = false, version = "0.8.7", features = 
["with-alloc"] }
 log = { workspace = true }
 num-bigint = { default-features = false, version = "0.4.6", features = ["std", 
"serde"] }
 regex-lite = { default-features = false, version = "0.1.6", features = ["std", 
"string"] }
diff --git a/avro/README.md b/avro/README.md
index 7598a75..a84e556 100644
--- a/avro/README.md
+++ b/avro/README.md
@@ -296,9 +296,8 @@ Avro supports three different compression codecs when 
encoding data:
 
 To specify a codec to use to compress data, just specify it while creating a 
`Writer`:
 ```rust
-use apache_avro::Writer;
-use apache_avro::Codec;
-let mut writer = Writer::with_codec(&schema, Vec::new(), Codec::Deflate);
+use apache_avro::{Codec, DeflateSettings, Schema, Writer};
+let mut writer = Writer::with_codec(&schema, Vec::new(), 
Codec::Deflate(DeflateSettings::default()));
 ```
 
 ## Reading data
@@ -394,7 +393,7 @@ The following is an example of how to combine everything 
showed so far and it is
 quick reference of the library interface:
 
 ```rust
-use apache_avro::{Codec, Reader, Schema, Writer, from_value, types::Record, 
Error};
+use apache_avro::{Codec, DeflateSettings, Reader, Schema, Writer, from_value, 
types::Record, Error};
 use serde::{Deserialize, Serialize};
 
 #[derive(Debug, Deserialize, Serialize)]
@@ -419,7 +418,7 @@ fn main() -> Result<(), Error> {
 
     println!("{:?}", schema);
 
-    let mut writer = Writer::with_codec(&schema, Vec::new(), Codec::Deflate);
+    let mut writer = Writer::with_codec(&schema, Vec::new(), 
Codec::Deflate(DeflateSettings::default()));
 
     let mut record = Record::new(writer.schema()).unwrap();
     record.put("a", 27i64);
@@ -459,7 +458,7 @@ Note that the on-disk representation is identical to the 
underlying primitive/co
 
 ```rust
 use apache_avro::{
-    types::Record, types::Value, Codec, Days, Decimal, Duration, Millis, 
Months, Reader, Schema,
+    types::Record, types::Value, Codec, Days, Decimal, DeflateSettings, 
Duration, Millis, Months, Reader, Schema,
     Writer, Error,
 };
 use num_bigint::ToBigInt;
@@ -545,7 +544,7 @@ fn main() -> Result<(), Error> {
 
     println!("{:?}", schema);
 
-    let mut writer = Writer::with_codec(&schema, Vec::new(), Codec::Deflate);
+    let mut writer = Writer::with_codec(&schema, Vec::new(), 
Codec::Deflate(DeflateSettings::default()));
 
     let mut record = Record::new(writer.schema()).unwrap();
     record.put("decimal_fixed", 
Decimal::from(9936.to_bigint().unwrap().to_signed_bytes_be()));
diff --git a/avro/src/codec.rs b/avro/src/codec.rs
index 599b59d..d616efa 100644
--- a/avro/src/codec.rs
+++ b/avro/src/codec.rs
@@ -17,10 +17,31 @@
 
 //! Logic for all supported compression codecs in Avro.
 use crate::{types::Value, AvroResult, Error};
-#[allow(unused_imports)] // may be flagged as unused when only DEFLATE is 
enabled
-use std::io::{Read, Write};
 use strum_macros::{EnumIter, EnumString, IntoStaticStr};
 
+/// Settings for the `Deflate` codec.
+#[derive(Clone, Copy, Eq, PartialEq, Debug)]
+pub struct DeflateSettings {
+    compression_level: miniz_oxide::deflate::CompressionLevel,
+}
+
+impl DeflateSettings {
+    pub fn new(compression_level: miniz_oxide::deflate::CompressionLevel) -> 
Self {
+        DeflateSettings { compression_level }
+    }
+
+    fn compression_level(&self) -> u8 {
+        self.compression_level as u8
+    }
+}
+
+impl Default for DeflateSettings {
+    /// Default compression level is 
`miniz_oxide::deflate::CompressionLevel::DefaultCompression`.
+    fn default() -> Self {
+        Self::new(miniz_oxide::deflate::CompressionLevel::DefaultCompression)
+    }
+}
+
 /// The compression codec used to compress blocks.
 #[derive(Clone, Copy, Debug, Eq, PartialEq, EnumIter, EnumString, 
IntoStaticStr)]
 #[strum(serialize_all = "kebab_case")]
@@ -30,7 +51,7 @@ pub enum Codec {
     /// The `Deflate` codec writes the data block using the deflate algorithm
     /// as specified in RFC 1951, and typically implemented using the zlib 
library.
     /// Note that this format (unlike the "zlib format" in RFC 1950) does not 
have a checksum.
-    Deflate,
+    Deflate(DeflateSettings),
     #[cfg(feature = "snappy")]
     /// The `Snappy` codec uses Google's 
[Snappy](http://google.github.io/snappy/)
     /// compression library. Each compressed block is followed by the 4-byte, 
big-endian
@@ -60,8 +81,9 @@ impl Codec {
     pub fn compress(self, stream: &mut Vec<u8>) -> AvroResult<()> {
         match self {
             Codec::Null => (),
-            Codec::Deflate => {
-                let compressed = miniz_oxide::deflate::compress_to_vec(stream, 
6);
+            Codec::Deflate(settings) => {
+                let compressed =
+                    miniz_oxide::deflate::compress_to_vec(stream, 
settings.compression_level());
                 *stream = compressed;
             }
             #[cfg(feature = "snappy")]
@@ -83,6 +105,7 @@ impl Codec {
             }
             #[cfg(feature = "zstandard")]
             Codec::Zstandard(settings) => {
+                use std::io::Write;
                 let mut encoder =
                     zstd::Encoder::new(Vec::new(), settings.compression_level 
as i32).unwrap();
                 encoder.write_all(stream).map_err(Error::ZstdCompress)?;
@@ -91,6 +114,7 @@ impl Codec {
             #[cfg(feature = "bzip")]
             Codec::Bzip2(settings) => {
                 use bzip2::read::BzEncoder;
+                use std::io::Read;
 
                 let mut encoder = BzEncoder::new(&stream[..], 
settings.compression());
                 let mut buffer = Vec::new();
@@ -99,6 +123,7 @@ impl Codec {
             }
             #[cfg(feature = "xz")]
             Codec::Xz(settings) => {
+                use std::io::Read;
                 use xz2::read::XzEncoder;
 
                 let mut encoder = XzEncoder::new(&stream[..], 
settings.compression_level as u32);
@@ -115,7 +140,7 @@ impl Codec {
     pub fn decompress(self, stream: &mut Vec<u8>) -> AvroResult<()> {
         *stream = match self {
             Codec::Null => return Ok(()),
-            Codec::Deflate => 
miniz_oxide::inflate::decompress_to_vec(stream).map_err(|e| {
+            Codec::Deflate(_settings) => 
miniz_oxide::inflate::decompress_to_vec(stream).map_err(|e| {
                 let err = {
                     use miniz_oxide::inflate::TINFLStatus::*;
                     use std::io::{Error,ErrorKind};
@@ -168,6 +193,7 @@ impl Codec {
             #[cfg(feature = "bzip")]
             Codec::Bzip2(_) => {
                 use bzip2::read::BzDecoder;
+                use std::io::Read;
 
                 let mut decoder = BzDecoder::new(&stream[..]);
                 let mut decoded = Vec::new();
@@ -177,6 +203,7 @@ impl Codec {
             #[cfg(feature = "xz")]
             Codec::Xz(_) => {
                 use xz2::read::XzDecoder;
+                use std::io::Read;
 
                 let mut decoder = XzDecoder::new(&stream[..]);
                 let mut decoded: Vec<u8> = Vec::new();
@@ -258,6 +285,7 @@ pub mod xz {
 mod tests {
     use super::*;
     use apache_avro_test_helper::TestResult;
+    use miniz_oxide::deflate::CompressionLevel;
     use pretty_assertions::{assert_eq, assert_ne};
 
     const INPUT: &[u8] = 
b"theanswertolifetheuniverseandeverythingis42theanswertolifetheuniverseandeverythingis4theanswertolifetheuniverseandeverythingis2";
@@ -275,7 +303,9 @@ mod tests {
 
     #[test]
     fn deflate_compress_and_decompress() -> TestResult {
-        compress_and_decompress(Codec::Deflate)
+        compress_and_decompress(Codec::Deflate(DeflateSettings::new(
+            CompressionLevel::BestCompression,
+        )))
     }
 
     #[cfg(feature = "snappy")]
@@ -315,7 +345,10 @@ mod tests {
     #[test]
     fn codec_to_str() {
         assert_eq!(<&str>::from(Codec::Null), "null");
-        assert_eq!(<&str>::from(Codec::Deflate), "deflate");
+        assert_eq!(
+            <&str>::from(Codec::Deflate(DeflateSettings::default())),
+            "deflate"
+        );
 
         #[cfg(feature = "snappy")]
         assert_eq!(<&str>::from(Codec::Snappy), "snappy");
@@ -341,7 +374,10 @@ mod tests {
         use std::str::FromStr;
 
         assert_eq!(Codec::from_str("null").unwrap(), Codec::Null);
-        assert_eq!(Codec::from_str("deflate").unwrap(), Codec::Deflate);
+        assert_eq!(
+            Codec::from_str("deflate").unwrap(),
+            Codec::Deflate(DeflateSettings::default())
+        );
 
         #[cfg(feature = "snappy")]
         assert_eq!(Codec::from_str("snappy").unwrap(), Codec::Snappy);
diff --git a/avro/src/lib.rs b/avro/src/lib.rs
index 4b95bb5..247ab18 100644
--- a/avro/src/lib.rs
+++ b/avro/src/lib.rs
@@ -311,9 +311,7 @@
 //!
 //! To specify a codec to use to compress data, just specify it while creating 
a `Writer`:
 //! ```
-//! # use apache_avro::Schema;
-//! use apache_avro::Writer;
-//! use apache_avro::Codec;
+//! use apache_avro::{Codec, DeflateSettings, Schema, Writer};
 //! #
 //! # let raw_schema = r#"
 //! #     {
@@ -326,7 +324,7 @@
 //! #     }
 //! # "#;
 //! # let schema = Schema::parse_str(raw_schema).unwrap();
-//! let mut writer = Writer::with_codec(&schema, Vec::new(), Codec::Deflate);
+//! let mut writer = Writer::with_codec(&schema, Vec::new(), 
Codec::Deflate(DeflateSettings::default()));
 //! ```
 //!
 //! # Reading data
@@ -507,7 +505,7 @@
 //! quick reference of the library interface:
 //!
 //! ```
-//! use apache_avro::{Codec, Reader, Schema, Writer, from_value, 
types::Record, Error};
+//! use apache_avro::{Codec, DeflateSettings, Reader, Schema, Writer, 
from_value, types::Record, Error};
 //! use serde::{Deserialize, Serialize};
 //!
 //! #[derive(Debug, Deserialize, Serialize)]
@@ -532,7 +530,7 @@
 //!
 //!     println!("{:?}", schema);
 //!
-//!     let mut writer = Writer::with_codec(&schema, Vec::new(), 
Codec::Deflate);
+//!     let mut writer = Writer::with_codec(&schema, Vec::new(), 
Codec::Deflate(DeflateSettings::default()));
 //!
 //!     let mut record = Record::new(writer.schema()).unwrap();
 //!     record.put("a", 27i64);
@@ -572,7 +570,7 @@
 //!
 //! ```rust
 //! use apache_avro::{
-//!     types::Record, types::Value, Codec, Days, Decimal, Duration, Millis, 
Months, Reader, Schema,
+//!     types::Record, types::Value, Codec, Days, Decimal, DeflateSettings, 
Duration, Millis, Months, Reader, Schema,
 //!     Writer, Error,
 //! };
 //! use num_bigint::ToBigInt;
@@ -658,7 +656,7 @@
 //!
 //!     println!("{:?}", schema);
 //!
-//!     let mut writer = Writer::with_codec(&schema, Vec::new(), 
Codec::Deflate);
+//!     let mut writer = Writer::with_codec(&schema, Vec::new(), 
Codec::Deflate(DeflateSettings::default()));
 //!
 //!     let mut record = Record::new(writer.schema()).unwrap();
 //!     record.put("decimal_fixed", 
Decimal::from(9936.to_bigint().unwrap().to_signed_bytes_be()));
@@ -893,7 +891,7 @@ pub use codec::bzip::Bzip2Settings;
 pub use codec::xz::XzSettings;
 #[cfg(feature = "zstandard")]
 pub use codec::zstandard::ZstandardSettings;
-pub use codec::Codec;
+pub use codec::{Codec, DeflateSettings};
 pub use de::from_value;
 pub use decimal::Decimal;
 pub use duration::{Days, Duration, Millis, Months};
diff --git a/avro/src/writer.rs b/avro/src/writer.rs
index 1ff1339..ed41f0e 100644
--- a/avro/src/writer.rs
+++ b/avro/src/writer.rs
@@ -709,6 +709,7 @@ mod tests {
     use pretty_assertions::assert_eq;
     use serde::{Deserialize, Serialize};
 
+    use crate::codec::DeflateSettings;
     use apache_avro_test_helper::TestResult;
 
     const AVRO_OBJECT_HEADER_LEN: usize = AVRO_OBJECT_HEADER.len();
@@ -1065,14 +1066,18 @@ mod tests {
     }
 
     fn make_writer_with_codec(schema: &Schema) -> Writer<'_, Vec<u8>> {
-        Writer::with_codec(schema, Vec::new(), Codec::Deflate)
+        Writer::with_codec(
+            schema,
+            Vec::new(),
+            Codec::Deflate(DeflateSettings::default()),
+        )
     }
 
     fn make_writer_with_builder(schema: &Schema) -> Writer<'_, Vec<u8>> {
         Writer::builder()
             .writer(Vec::new())
             .schema(schema)
-            .codec(Codec::Deflate)
+            .codec(Codec::Deflate(DeflateSettings::default()))
             .block_size(100)
             .build()
     }
@@ -1094,7 +1099,7 @@ mod tests {
         zig_i64(3, &mut data)?;
         data.extend(b"foo");
         data.extend(data.clone());
-        Codec::Deflate.compress(&mut data)?;
+        Codec::Deflate(DeflateSettings::default()).compress(&mut data)?;
 
         // starts with magic
         assert_eq!(&result[..AVRO_OBJECT_HEADER_LEN], AVRO_OBJECT_HEADER);
@@ -1142,7 +1147,7 @@ mod tests {
           ]
         }
         "#;
-        let codec = Codec::Deflate;
+        let codec = Codec::Deflate(DeflateSettings::default());
         let schema = Schema::parse_str(LOGICAL_TYPE_SCHEMA)?;
         let mut writer = Writer::builder()
             .schema(&schema)
diff --git a/avro/tests/codecs.rs b/avro/tests/codecs.rs
index 5017d33..02137e9 100644
--- a/avro/tests/codecs.rs
+++ b/avro/tests/codecs.rs
@@ -17,9 +17,10 @@
 
 use apache_avro::{
     types::{Record, Value},
-    Codec, Reader, Schema, Writer,
+    Codec, DeflateSettings, Reader, Schema, Writer,
 };
 use apache_avro_test_helper::TestResult;
+use miniz_oxide::deflate::CompressionLevel;
 
 #[test]
 fn avro_4032_null_codec_settings() -> TestResult {
@@ -27,7 +28,9 @@ fn avro_4032_null_codec_settings() -> TestResult {
 }
 #[test]
 fn avro_4032_deflate_codec_settings() -> TestResult {
-    avro_4032_codec_settings(Codec::Deflate)
+    avro_4032_codec_settings(Codec::Deflate(DeflateSettings::new(
+        CompressionLevel::UberCompression,
+    )))
 }
 
 #[test]

Reply via email to