This is an automated email from the ASF dual-hosted git repository.
mgrigorov pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/avro-rs.git
The following commit(s) were added to refs/heads/main by this push:
new 68a4f2e feat: Add settings for Codec::Deflate (#174)
68a4f2e is described below
commit 68a4f2e871fdbc918ec8ae3de83a6be93d77e932
Author: Martin Grigorov <[email protected]>
AuthorDate: Wed Apr 9 08:26:19 2025 +0300
feat: Add settings for Codec::Deflate (#174)
* Issue #169 - Be explicit about the used features of miniz_oxide
Signed-off-by: Martin Tzvetanov Grigorov <[email protected]>
* Issue #169 - Do not allow unused imports. Import only what is needed when
it is needed
Signed-off-by: Martin Tzvetanov Grigorov <[email protected]>
* Fixes #173 - Introduce DeflateSettings
At the moment it could be used to specify the compression level for
Codec::Deflate
Signed-off-by: Martin Tzvetanov Grigorov <[email protected]>
* Issue #173 - Use non-default compression level for Codec::Deflate
Add some Rustdoc
Signed-off-by: Martin Tzvetanov Grigorov <[email protected]>
* Issue #173 - Update the README.md with `cargo rdme`
Signed-off-by: Martin Tzvetanov Grigorov <[email protected]>
* Issue #173 - Fix Rustdoc imports
Signed-off-by: Martin Tzvetanov Grigorov <[email protected]>
* Issue #173 - Introduce a helper method for the cast of CompressionLevel
to u8
Signed-off-by: Martin Tzvetanov Grigorov <[email protected]>
* Issue #173 - Further rustdoc import fixes
Signed-off-by: Martin Tzvetanov Grigorov <[email protected]>
---------
Signed-off-by: Martin Tzvetanov Grigorov <[email protected]>
---
avro/Cargo.toml | 2 +-
avro/README.md | 13 ++++++-------
avro/src/codec.rs | 54 +++++++++++++++++++++++++++++++++++++++++++---------
avro/src/lib.rs | 16 +++++++---------
avro/src/writer.rs | 13 +++++++++----
avro/tests/codecs.rs | 7 +++++--
6 files changed, 73 insertions(+), 32 deletions(-)
diff --git a/avro/Cargo.toml b/avro/Cargo.toml
index 4064817..eae7c0b 100644
--- a/avro/Cargo.toml
+++ b/avro/Cargo.toml
@@ -59,7 +59,7 @@ bon = { default-features = false, version = "3.5.1" }
bzip2 = { version = "0.5.2", optional = true }
crc32fast = { default-features = false, version = "1.4.2", optional = true }
digest = { default-features = false, version = "0.10.7", features =
["core-api"] }
-miniz_oxide = "0.8.7"
+miniz_oxide = { default-features = false, version = "0.8.7", features =
["with-alloc"] }
log = { workspace = true }
num-bigint = { default-features = false, version = "0.4.6", features = ["std",
"serde"] }
regex-lite = { default-features = false, version = "0.1.6", features = ["std",
"string"] }
diff --git a/avro/README.md b/avro/README.md
index 7598a75..a84e556 100644
--- a/avro/README.md
+++ b/avro/README.md
@@ -296,9 +296,8 @@ Avro supports three different compression codecs when
encoding data:
To specify a codec to use to compress data, just specify it while creating a
`Writer`:
```rust
-use apache_avro::Writer;
-use apache_avro::Codec;
-let mut writer = Writer::with_codec(&schema, Vec::new(), Codec::Deflate);
+use apache_avro::{Codec, DeflateSettings, Schema, Writer};
+let mut writer = Writer::with_codec(&schema, Vec::new(),
Codec::Deflate(DeflateSettings::default()));
```
## Reading data
@@ -394,7 +393,7 @@ The following is an example of how to combine everything
showed so far and it is
quick reference of the library interface:
```rust
-use apache_avro::{Codec, Reader, Schema, Writer, from_value, types::Record,
Error};
+use apache_avro::{Codec, DeflateSettings, Reader, Schema, Writer, from_value,
types::Record, Error};
use serde::{Deserialize, Serialize};
#[derive(Debug, Deserialize, Serialize)]
@@ -419,7 +418,7 @@ fn main() -> Result<(), Error> {
println!("{:?}", schema);
- let mut writer = Writer::with_codec(&schema, Vec::new(), Codec::Deflate);
+ let mut writer = Writer::with_codec(&schema, Vec::new(),
Codec::Deflate(DeflateSettings::default()));
let mut record = Record::new(writer.schema()).unwrap();
record.put("a", 27i64);
@@ -459,7 +458,7 @@ Note that the on-disk representation is identical to the
underlying primitive/co
```rust
use apache_avro::{
- types::Record, types::Value, Codec, Days, Decimal, Duration, Millis,
Months, Reader, Schema,
+ types::Record, types::Value, Codec, Days, Decimal, DeflateSettings,
Duration, Millis, Months, Reader, Schema,
Writer, Error,
};
use num_bigint::ToBigInt;
@@ -545,7 +544,7 @@ fn main() -> Result<(), Error> {
println!("{:?}", schema);
- let mut writer = Writer::with_codec(&schema, Vec::new(), Codec::Deflate);
+ let mut writer = Writer::with_codec(&schema, Vec::new(),
Codec::Deflate(DeflateSettings::default()));
let mut record = Record::new(writer.schema()).unwrap();
record.put("decimal_fixed",
Decimal::from(9936.to_bigint().unwrap().to_signed_bytes_be()));
diff --git a/avro/src/codec.rs b/avro/src/codec.rs
index 599b59d..d616efa 100644
--- a/avro/src/codec.rs
+++ b/avro/src/codec.rs
@@ -17,10 +17,31 @@
//! Logic for all supported compression codecs in Avro.
use crate::{types::Value, AvroResult, Error};
-#[allow(unused_imports)] // may be flagged as unused when only DEFLATE is
enabled
-use std::io::{Read, Write};
use strum_macros::{EnumIter, EnumString, IntoStaticStr};
+/// Settings for the `Deflate` codec.
+#[derive(Clone, Copy, Eq, PartialEq, Debug)]
+pub struct DeflateSettings {
+ compression_level: miniz_oxide::deflate::CompressionLevel,
+}
+
+impl DeflateSettings {
+ pub fn new(compression_level: miniz_oxide::deflate::CompressionLevel) ->
Self {
+ DeflateSettings { compression_level }
+ }
+
+ fn compression_level(&self) -> u8 {
+ self.compression_level as u8
+ }
+}
+
+impl Default for DeflateSettings {
+ /// Default compression level is
`miniz_oxide::deflate::CompressionLevel::DefaultCompression`.
+ fn default() -> Self {
+ Self::new(miniz_oxide::deflate::CompressionLevel::DefaultCompression)
+ }
+}
+
/// The compression codec used to compress blocks.
#[derive(Clone, Copy, Debug, Eq, PartialEq, EnumIter, EnumString,
IntoStaticStr)]
#[strum(serialize_all = "kebab_case")]
@@ -30,7 +51,7 @@ pub enum Codec {
/// The `Deflate` codec writes the data block using the deflate algorithm
/// as specified in RFC 1951, and typically implemented using the zlib
library.
/// Note that this format (unlike the "zlib format" in RFC 1950) does not
have a checksum.
- Deflate,
+ Deflate(DeflateSettings),
#[cfg(feature = "snappy")]
/// The `Snappy` codec uses Google's
[Snappy](http://google.github.io/snappy/)
/// compression library. Each compressed block is followed by the 4-byte,
big-endian
@@ -60,8 +81,9 @@ impl Codec {
pub fn compress(self, stream: &mut Vec<u8>) -> AvroResult<()> {
match self {
Codec::Null => (),
- Codec::Deflate => {
- let compressed = miniz_oxide::deflate::compress_to_vec(stream,
6);
+ Codec::Deflate(settings) => {
+ let compressed =
+ miniz_oxide::deflate::compress_to_vec(stream,
settings.compression_level());
*stream = compressed;
}
#[cfg(feature = "snappy")]
@@ -83,6 +105,7 @@ impl Codec {
}
#[cfg(feature = "zstandard")]
Codec::Zstandard(settings) => {
+ use std::io::Write;
let mut encoder =
zstd::Encoder::new(Vec::new(), settings.compression_level
as i32).unwrap();
encoder.write_all(stream).map_err(Error::ZstdCompress)?;
@@ -91,6 +114,7 @@ impl Codec {
#[cfg(feature = "bzip")]
Codec::Bzip2(settings) => {
use bzip2::read::BzEncoder;
+ use std::io::Read;
let mut encoder = BzEncoder::new(&stream[..],
settings.compression());
let mut buffer = Vec::new();
@@ -99,6 +123,7 @@ impl Codec {
}
#[cfg(feature = "xz")]
Codec::Xz(settings) => {
+ use std::io::Read;
use xz2::read::XzEncoder;
let mut encoder = XzEncoder::new(&stream[..],
settings.compression_level as u32);
@@ -115,7 +140,7 @@ impl Codec {
pub fn decompress(self, stream: &mut Vec<u8>) -> AvroResult<()> {
*stream = match self {
Codec::Null => return Ok(()),
- Codec::Deflate =>
miniz_oxide::inflate::decompress_to_vec(stream).map_err(|e| {
+ Codec::Deflate(_settings) =>
miniz_oxide::inflate::decompress_to_vec(stream).map_err(|e| {
let err = {
use miniz_oxide::inflate::TINFLStatus::*;
use std::io::{Error,ErrorKind};
@@ -168,6 +193,7 @@ impl Codec {
#[cfg(feature = "bzip")]
Codec::Bzip2(_) => {
use bzip2::read::BzDecoder;
+ use std::io::Read;
let mut decoder = BzDecoder::new(&stream[..]);
let mut decoded = Vec::new();
@@ -177,6 +203,7 @@ impl Codec {
#[cfg(feature = "xz")]
Codec::Xz(_) => {
use xz2::read::XzDecoder;
+ use std::io::Read;
let mut decoder = XzDecoder::new(&stream[..]);
let mut decoded: Vec<u8> = Vec::new();
@@ -258,6 +285,7 @@ pub mod xz {
mod tests {
use super::*;
use apache_avro_test_helper::TestResult;
+ use miniz_oxide::deflate::CompressionLevel;
use pretty_assertions::{assert_eq, assert_ne};
const INPUT: &[u8] =
b"theanswertolifetheuniverseandeverythingis42theanswertolifetheuniverseandeverythingis4theanswertolifetheuniverseandeverythingis2";
@@ -275,7 +303,9 @@ mod tests {
#[test]
fn deflate_compress_and_decompress() -> TestResult {
- compress_and_decompress(Codec::Deflate)
+ compress_and_decompress(Codec::Deflate(DeflateSettings::new(
+ CompressionLevel::BestCompression,
+ )))
}
#[cfg(feature = "snappy")]
@@ -315,7 +345,10 @@ mod tests {
#[test]
fn codec_to_str() {
assert_eq!(<&str>::from(Codec::Null), "null");
- assert_eq!(<&str>::from(Codec::Deflate), "deflate");
+ assert_eq!(
+ <&str>::from(Codec::Deflate(DeflateSettings::default())),
+ "deflate"
+ );
#[cfg(feature = "snappy")]
assert_eq!(<&str>::from(Codec::Snappy), "snappy");
@@ -341,7 +374,10 @@ mod tests {
use std::str::FromStr;
assert_eq!(Codec::from_str("null").unwrap(), Codec::Null);
- assert_eq!(Codec::from_str("deflate").unwrap(), Codec::Deflate);
+ assert_eq!(
+ Codec::from_str("deflate").unwrap(),
+ Codec::Deflate(DeflateSettings::default())
+ );
#[cfg(feature = "snappy")]
assert_eq!(Codec::from_str("snappy").unwrap(), Codec::Snappy);
diff --git a/avro/src/lib.rs b/avro/src/lib.rs
index 4b95bb5..247ab18 100644
--- a/avro/src/lib.rs
+++ b/avro/src/lib.rs
@@ -311,9 +311,7 @@
//!
//! To specify a codec to use to compress data, just specify it while creating
a `Writer`:
//! ```
-//! # use apache_avro::Schema;
-//! use apache_avro::Writer;
-//! use apache_avro::Codec;
+//! use apache_avro::{Codec, DeflateSettings, Schema, Writer};
//! #
//! # let raw_schema = r#"
//! # {
@@ -326,7 +324,7 @@
//! # }
//! # "#;
//! # let schema = Schema::parse_str(raw_schema).unwrap();
-//! let mut writer = Writer::with_codec(&schema, Vec::new(), Codec::Deflate);
+//! let mut writer = Writer::with_codec(&schema, Vec::new(),
Codec::Deflate(DeflateSettings::default()));
//! ```
//!
//! # Reading data
@@ -507,7 +505,7 @@
//! quick reference of the library interface:
//!
//! ```
-//! use apache_avro::{Codec, Reader, Schema, Writer, from_value,
types::Record, Error};
+//! use apache_avro::{Codec, DeflateSettings, Reader, Schema, Writer,
from_value, types::Record, Error};
//! use serde::{Deserialize, Serialize};
//!
//! #[derive(Debug, Deserialize, Serialize)]
@@ -532,7 +530,7 @@
//!
//! println!("{:?}", schema);
//!
-//! let mut writer = Writer::with_codec(&schema, Vec::new(),
Codec::Deflate);
+//! let mut writer = Writer::with_codec(&schema, Vec::new(),
Codec::Deflate(DeflateSettings::default()));
//!
//! let mut record = Record::new(writer.schema()).unwrap();
//! record.put("a", 27i64);
@@ -572,7 +570,7 @@
//!
//! ```rust
//! use apache_avro::{
-//! types::Record, types::Value, Codec, Days, Decimal, Duration, Millis,
Months, Reader, Schema,
+//! types::Record, types::Value, Codec, Days, Decimal, DeflateSettings,
Duration, Millis, Months, Reader, Schema,
//! Writer, Error,
//! };
//! use num_bigint::ToBigInt;
@@ -658,7 +656,7 @@
//!
//! println!("{:?}", schema);
//!
-//! let mut writer = Writer::with_codec(&schema, Vec::new(),
Codec::Deflate);
+//! let mut writer = Writer::with_codec(&schema, Vec::new(),
Codec::Deflate(DeflateSettings::default()));
//!
//! let mut record = Record::new(writer.schema()).unwrap();
//! record.put("decimal_fixed",
Decimal::from(9936.to_bigint().unwrap().to_signed_bytes_be()));
@@ -893,7 +891,7 @@ pub use codec::bzip::Bzip2Settings;
pub use codec::xz::XzSettings;
#[cfg(feature = "zstandard")]
pub use codec::zstandard::ZstandardSettings;
-pub use codec::Codec;
+pub use codec::{Codec, DeflateSettings};
pub use de::from_value;
pub use decimal::Decimal;
pub use duration::{Days, Duration, Millis, Months};
diff --git a/avro/src/writer.rs b/avro/src/writer.rs
index 1ff1339..ed41f0e 100644
--- a/avro/src/writer.rs
+++ b/avro/src/writer.rs
@@ -709,6 +709,7 @@ mod tests {
use pretty_assertions::assert_eq;
use serde::{Deserialize, Serialize};
+ use crate::codec::DeflateSettings;
use apache_avro_test_helper::TestResult;
const AVRO_OBJECT_HEADER_LEN: usize = AVRO_OBJECT_HEADER.len();
@@ -1065,14 +1066,18 @@ mod tests {
}
fn make_writer_with_codec(schema: &Schema) -> Writer<'_, Vec<u8>> {
- Writer::with_codec(schema, Vec::new(), Codec::Deflate)
+ Writer::with_codec(
+ schema,
+ Vec::new(),
+ Codec::Deflate(DeflateSettings::default()),
+ )
}
fn make_writer_with_builder(schema: &Schema) -> Writer<'_, Vec<u8>> {
Writer::builder()
.writer(Vec::new())
.schema(schema)
- .codec(Codec::Deflate)
+ .codec(Codec::Deflate(DeflateSettings::default()))
.block_size(100)
.build()
}
@@ -1094,7 +1099,7 @@ mod tests {
zig_i64(3, &mut data)?;
data.extend(b"foo");
data.extend(data.clone());
- Codec::Deflate.compress(&mut data)?;
+ Codec::Deflate(DeflateSettings::default()).compress(&mut data)?;
// starts with magic
assert_eq!(&result[..AVRO_OBJECT_HEADER_LEN], AVRO_OBJECT_HEADER);
@@ -1142,7 +1147,7 @@ mod tests {
]
}
"#;
- let codec = Codec::Deflate;
+ let codec = Codec::Deflate(DeflateSettings::default());
let schema = Schema::parse_str(LOGICAL_TYPE_SCHEMA)?;
let mut writer = Writer::builder()
.schema(&schema)
diff --git a/avro/tests/codecs.rs b/avro/tests/codecs.rs
index 5017d33..02137e9 100644
--- a/avro/tests/codecs.rs
+++ b/avro/tests/codecs.rs
@@ -17,9 +17,10 @@
use apache_avro::{
types::{Record, Value},
- Codec, Reader, Schema, Writer,
+ Codec, DeflateSettings, Reader, Schema, Writer,
};
use apache_avro_test_helper::TestResult;
+use miniz_oxide::deflate::CompressionLevel;
#[test]
fn avro_4032_null_codec_settings() -> TestResult {
@@ -27,7 +28,9 @@ fn avro_4032_null_codec_settings() -> TestResult {
}
#[test]
fn avro_4032_deflate_codec_settings() -> TestResult {
- avro_4032_codec_settings(Codec::Deflate)
+ avro_4032_codec_settings(Codec::Deflate(DeflateSettings::new(
+ CompressionLevel::UberCompression,
+ )))
}
#[test]