CTTY commented on code in PR #2288:
URL: https://github.com/apache/iceberg-rust/pull/2288#discussion_r3003339106
##########
crates/iceberg/src/compression.rs:
##########
@@ -17,28 +17,73 @@
//! Compression codec support for data compression and decompression.
+use std::fmt;
use std::io::{Read, Write};
use flate2::Compression;
use flate2::read::GzDecoder;
use flate2::write::GzEncoder;
-use serde::{Deserialize, Serialize};
+use serde::{Deserialize, Deserializer, Serialize, Serializer};
use crate::{Error, ErrorKind, Result};
/// Data compression formats
-#[derive(Debug, PartialEq, Eq, Clone, Copy, Default, Serialize, Deserialize)]
-#[serde(rename_all = "lowercase")]
+#[derive(Debug, PartialEq, Eq, Clone, Copy, Default)]
pub enum CompressionCodec {
#[default]
/// No compression
None,
/// LZ4 single compression frame with content size present
Lz4,
- /// Zstandard single compression frame with content size present
- Zstd,
- /// Gzip compression
- Gzip,
+ /// Zstandard single compression frame with content size present. Optional
level 0–22.
Review Comment:
nit: We should mention that level 0 for zstd means default compression level
but not no compression like Gzip
##########
crates/iceberg/src/spec/table_properties.rs:
##########
@@ -85,13 +85,15 @@ pub(crate) fn parse_metadata_file_compression(
// Validate that only None and Gzip are used for metadata
match codec {
- CompressionCodec::None | CompressionCodec::Gzip => Ok(codec),
- CompressionCodec::Lz4 | CompressionCodec::Zstd => Err(Error::new(
- ErrorKind::DataInvalid,
- format!(
- "Invalid metadata compression codec: {value}. Only 'none' and
'gzip' are supported for metadata files."
- ),
- )),
+ CompressionCodec::None | CompressionCodec::Gzip(_) => Ok(codec),
+ CompressionCodec::Lz4 | CompressionCodec::Zstd(_) |
CompressionCodec::Snappy => {
Review Comment:
nit: since we only support `None` and `Gzip`, this can be `_ =>` to fail all
other cases
##########
crates/iceberg/src/compression.rs:
##########
@@ -66,19 +115,24 @@ impl CompressionCodec {
ErrorKind::FeatureUnsupported,
"LZ4 compression is not supported currently",
)),
- CompressionCodec::Zstd => {
+ CompressionCodec::Zstd(level) => {
let writer = Vec::<u8>::new();
- let mut encoder = zstd::stream::Encoder::new(writer, 3)?;
+ let mut encoder = zstd::stream::Encoder::new(writer,
level.unwrap_or(3) as i32)?;
encoder.include_checksum(true)?;
encoder.set_pledged_src_size(Some(bytes.len().try_into()?))?;
std::io::copy(&mut &bytes[..], &mut encoder)?;
Ok(encoder.finish()?)
}
- CompressionCodec::Gzip => {
- let mut encoder = GzEncoder::new(Vec::new(),
Compression::default());
+ CompressionCodec::Gzip(level) => {
+ let compression = Compression::new(level.unwrap_or(6).min(9)
as u32);
Review Comment:
We can just call `Compression::default()` when `level` is `None`
##########
crates/iceberg/src/compression.rs:
##########
@@ -49,13 +94,17 @@ impl CompressionCodec {
ErrorKind::FeatureUnsupported,
"LZ4 decompression is not supported currently",
)),
- CompressionCodec::Zstd =>
Ok(zstd::stream::decode_all(&bytes[..])?),
- CompressionCodec::Gzip => {
+ CompressionCodec::Zstd(_) =>
Ok(zstd::stream::decode_all(&bytes[..])?),
+ CompressionCodec::Gzip(_) => {
let mut decoder = GzDecoder::new(&bytes[..]);
let mut decompressed = Vec::new();
decoder.read_to_end(&mut decompressed)?;
Ok(decompressed)
}
+ CompressionCodec::Snappy => Err(Error::new(
+ ErrorKind::FeatureUnsupported,
+ "Snappy decompression is not supported currently",
+ )),
Review Comment:
Do we plan to address this in a follow up PR? If so, could you create a
tracking issue?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]