This is an automated email from the ASF dual-hosted git repository.
mgrigorov pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/avro-rs.git
The following commit(s) were added to refs/heads/main by this push:
new f13800d Use miniz_oxide - a better DEFLATE backend (#169)
f13800d is described below
commit f13800dbe78d25021e249e1fb24f111dc601ca3f
Author: Sergey "Shnatsel" Davidoff <[email protected]>
AuthorDate: Tue Apr 1 08:46:07 2025 +0100
Use miniz_oxide - a better DEFLATE backend (#169)
* WIP: use a better DEFLATE library
* Map all possible errors from miniz_oxide to std::io::Error
* Suppress unused imports warning in conditional compilation
* Add a TODO to remove a now-unused error variant
* Address clippy warnings
* Add a deprecation warning, as requested
* deprecate since v0.19 instead
Co-authored-by: Martin Grigorov <[email protected]>
* Do not panic on theoretically unreachable errors in miniz_oxide error
handling
---------
Co-authored-by: Martin Grigorov <[email protected]>
---
Cargo.lock | 85 ++-----------------------------------------------------
avro/Cargo.toml | 2 +-
avro/src/codec.rs | 35 ++++++++++++-----------
avro/src/error.rs | 2 ++
4 files changed, 25 insertions(+), 99 deletions(-)
diff --git a/Cargo.lock b/Cargo.lock
index dc05141..dc8ad9c 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -17,24 +17,6 @@ version = "2.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "512761e0bb2578dd7380c6baaa0f4ce03e84f95e960231d1dec8bf4d7d6e2627"
-[[package]]
-name = "adler32"
-version = "1.2.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "aae1277d39aeec15cb388266ecc24b11c80469deae6067e17a1a7aa9e5c1f234"
-
-[[package]]
-name = "ahash"
-version = "0.8.11"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011"
-dependencies = [
- "cfg-if",
- "once_cell",
- "version_check",
- "zerocopy 0.7.35",
-]
-
[[package]]
name = "aho-corasick"
version = "1.1.3"
@@ -44,12 +26,6 @@ dependencies = [
"memchr",
]
-[[package]]
-name = "allocator-api2"
-version = "0.2.21"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923"
-
[[package]]
name = "anes"
version = "0.1.6"
@@ -82,9 +58,9 @@ dependencies = [
"criterion",
"digest",
"hex-literal",
- "libflate",
"log",
"md-5",
+ "miniz_oxide",
"num-bigint",
"paste",
"pretty_assertions",
@@ -345,15 +321,6 @@ dependencies = [
"wasm-bindgen",
]
-[[package]]
-name = "core2"
-version = "0.4.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b49ba7ef1ad6107f8824dbe97de947cbaac53c44e7f9756a1fba0d37c1eec505"
-dependencies = [
- "memchr",
-]
-
[[package]]
name = "cpufeatures"
version = "0.2.16"
@@ -473,12 +440,6 @@ dependencies = [
"syn",
]
-[[package]]
-name = "dary_heap"
-version = "0.3.7"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "04d2cd9c18b9f454ed67da600630b021a8a80bf33f8c95896ab33aaf1c26b728"
-
[[package]]
name = "diff"
version = "0.1.13"
@@ -679,16 +640,6 @@ dependencies = [
"crunchy",
]
-[[package]]
-name = "hashbrown"
-version = "0.14.5"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1"
-dependencies = [
- "ahash",
- "allocator-api2",
-]
-
[[package]]
name = "heck"
version = "0.5.0"
@@ -781,30 +732,6 @@ version = "0.2.168"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5aaeb2981e0606ca11d79718f8bb01164f1d6ed75080182d3abf017e6d244b6d"
-[[package]]
-name = "libflate"
-version = "2.1.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "45d9dfdc14ea4ef0900c1cddbc8dcd553fbaacd8a4a282cf4018ae9dd04fb21e"
-dependencies = [
- "adler32",
- "core2",
- "crc32fast",
- "dary_heap",
- "libflate_lz77",
-]
-
-[[package]]
-name = "libflate_lz77"
-version = "2.1.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e6e0d73b369f386f1c44abd9c570d5318f55ccde816ff4b562fa452e5182863d"
-dependencies = [
- "core2",
- "hashbrown",
- "rle-decode-fast",
-]
-
[[package]]
name = "libm"
version = "0.2.11"
@@ -866,9 +793,9 @@ dependencies = [
[[package]]
name = "miniz_oxide"
-version = "0.8.0"
+version = "0.8.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e2d80299ef12ff69b16a84bb182e3b9df68b5a91574d3d4fa6e41b65deec4df1"
+checksum = "8e3e04debbb59698c15bacbb6d93584a8c0ca9cc3213cb423d31f760d8843ce5"
dependencies = [
"adler2",
]
@@ -1159,12 +1086,6 @@ version = "1.9.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ba39f3699c378cd8970968dcbff9c43159ea4cfbd88d43c00b22f2ef10a435d2"
-[[package]]
-name = "rle-decode-fast"
-version = "1.0.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3582f63211428f83597b51b2ddb88e2a91a9d52d12831f9d08f5e624e8977422"
-
[[package]]
name = "rstest"
version = "0.25.0"
diff --git a/avro/Cargo.toml b/avro/Cargo.toml
index 6e04087..21b6ea1 100644
--- a/avro/Cargo.toml
+++ b/avro/Cargo.toml
@@ -59,7 +59,7 @@ bon = { default-features = false, version = "3.5.1" }
bzip2 = { version = "0.5.2", optional = true }
crc32fast = { default-features = false, version = "1.4.2", optional = true }
digest = { default-features = false, version = "0.10.7", features =
["core-api"] }
-libflate = { default-features = false, version = "2.1.0", features = ["std"] }
+miniz_oxide = "0.8.5"
log = { workspace = true }
num-bigint = { default-features = false, version = "0.4.6", features = ["std",
"serde"] }
regex-lite = { default-features = false, version = "0.1.6", features = ["std",
"string"] }
diff --git a/avro/src/codec.rs b/avro/src/codec.rs
index 9ec9d88..599b59d 100644
--- a/avro/src/codec.rs
+++ b/avro/src/codec.rs
@@ -17,7 +17,7 @@
//! Logic for all supported compression codecs in Avro.
use crate::{types::Value, AvroResult, Error};
-use libflate::deflate::{Decoder, Encoder};
+#[allow(unused_imports)] // may be flagged as unused when only DEFLATE is
enabled
use std::io::{Read, Write};
use strum_macros::{EnumIter, EnumString, IntoStaticStr};
@@ -61,13 +61,8 @@ impl Codec {
match self {
Codec::Null => (),
Codec::Deflate => {
- let mut encoder = Encoder::new(Vec::new());
- encoder.write_all(stream).map_err(Error::DeflateCompress)?;
- // Deflate errors seem to just be io::Error
- *stream = encoder
- .finish()
- .into_result()
- .map_err(Error::DeflateCompressFinish)?;
+ let compressed = miniz_oxide::deflate::compress_to_vec(stream,
6);
+ *stream = compressed;
}
#[cfg(feature = "snappy")]
Codec::Snappy => {
@@ -120,14 +115,22 @@ impl Codec {
pub fn decompress(self, stream: &mut Vec<u8>) -> AvroResult<()> {
*stream = match self {
Codec::Null => return Ok(()),
- Codec::Deflate => {
- let mut decoded = Vec::new();
- let mut decoder = Decoder::new(&stream[..]);
- decoder
- .read_to_end(&mut decoded)
- .map_err(Error::DeflateDecompress)?;
- decoded
- }
+ Codec::Deflate =>
miniz_oxide::inflate::decompress_to_vec(stream).map_err(|e| {
+ let err = {
+ use miniz_oxide::inflate::TINFLStatus::*;
+ use std::io::{Error,ErrorKind};
+ match e.status {
+ FailedCannotMakeProgress =>
Error::from(ErrorKind::UnexpectedEof),
+ BadParam => Error::other("Unexpected error:
miniz_oxide reported invalid output buffer size. Please report this to avro-rs
developers."), // not possible for _to_vec()
+ Adler32Mismatch => Error::from(ErrorKind::InvalidData),
+ Failed => Error::from(ErrorKind::InvalidData),
+ Done => Error::other("Unexpected error: miniz_oxide
reported an error with a success status. Please report this to avro-rs
developers."),
+ NeedsMoreInput =>
Error::from(ErrorKind::UnexpectedEof),
+ HasMoreOutput => Error::other("Unexpected error:
miniz_oxide has more data than the output buffer can hold. Please report this
to avro-rs developers."), // not possible for _to_vec()
+ }
+ };
+ Error::DeflateDecompress(err)
+ })?,
#[cfg(feature = "snappy")]
Codec::Snappy => {
let decompressed_size =
snap::raw::decompress_len(&stream[..stream.len() - 4])
diff --git a/avro/src/error.rs b/avro/src/error.rs
index 8cd6213..237995f 100644
--- a/avro/src/error.rs
+++ b/avro/src/error.rs
@@ -377,6 +377,8 @@ pub enum Error {
#[error("Failed to compress with flate: {0}")]
DeflateCompress(#[source] std::io::Error),
+ // no longer possible after migration from libflate to miniz_oxide
+ #[deprecated(since = "0.19.0", note = "This error can no longer occur")]
#[error("Failed to finish flate compressor: {0}")]
DeflateCompressFinish(#[source] std::io::Error),