1fanwang commented on code in PR #2438:
URL: https://github.com/apache/iceberg-rust/pull/2438#discussion_r3407571329
##########
crates/iceberg/src/compression.rs:
##########
@@ -220,25 +234,46 @@ mod tests {
}
}
+ #[tokio::test]
+ async fn test_compression_codec_lz4_roundtrip() {
+ let codec = CompressionCodec::Lz4;
+
+ // Empty input must round-trip cleanly.
+ let empty: Vec<u8> = vec![];
+ let compressed_empty = codec.compress(empty.clone()).unwrap();
+ assert_eq!(codec.decompress(compressed_empty).unwrap(), empty);
+
+ // Mixed-byte payload (less compressible than all-zeros) round-trips.
+ let payload: Vec<u8> = (0..10_000).map(|i| (i % 251) as u8).collect();
+ let compressed = codec.compress(payload.clone()).unwrap();
+ assert_eq!(codec.decompress(compressed).unwrap(), payload);
+
+ // Frame must begin with the LZ4 frame magic number 0x184D2204
(little-endian)
+ // per https://github.com/lz4/lz4/blob/dev/doc/lz4_Frame_format.md.
+ let highly_compressible = vec![0u8; 10_000];
+ let compressed = codec.compress(highly_compressible).unwrap();
+ assert_eq!(&compressed[..4], &[0x04, 0x22, 0x4D, 0x18]);
+ }
+
#[tokio::test]
async fn test_compression_codec_unsupported() {
Review Comment:
Renamed to `test_snappy_compression_is_unsupported`, keeping the `test_`
prefix to match the module. d798ed04
##########
crates/iceberg/src/compression.rs:
##########
@@ -220,25 +234,46 @@ mod tests {
}
}
+ #[tokio::test]
+ async fn test_compression_codec_lz4_roundtrip() {
Review Comment:
Folded the empty and less-compressible round-trips into
`test_compression_codec_compress` so Zstd and Gzip get the same coverage; kept
only the LZ4 magic-number check as `test_lz4_frame_magic_number`. d798ed04
##########
crates/iceberg/src/puffin/writer.rs:
##########
@@ -273,13 +273,65 @@ mod tests {
let blobs = vec![blob_0(), blob_1()];
let blobs_with_compression = blobs_with_compression(blobs.clone(),
CompressionCodec::Lz4);
- assert_eq!(
- write_puffin_file(&temp_dir, blobs_with_compression,
file_properties())
- .await
- .unwrap_err()
- .to_string(),
- "FeatureUnsupported => LZ4 compression is not supported currently"
- );
+ let input_file = write_puffin_file(&temp_dir, blobs_with_compression,
file_properties())
+ .await
+ .unwrap()
+ .to_input_file();
+
+ // Blob round-trip must yield the original bytes after LZ4 framed
decompression.
+ assert_eq!(read_all_blobs_from_puffin_file(input_file).await, blobs);
+ }
+
+ /// Regression for https://github.com/apache/iceberg-rust/issues/2419 —
+ /// the PuffinWriter previously claimed LZ4 compression on the footer (by
setting
+ /// the FooterPayloadCompressed flag) but wrote the raw uncompressed JSON,
which
+ /// produced unreadable files. The writer now actually LZ4-encodes the
footer when
+ /// compress_footer=true, and the reader round-trips it.
Review Comment:
Confirmed — and the before/after block in the PR description shows it:
reverting the two `Lz4` arms makes `test_compress_empty_footer_lz4_succeeds`
panic at `close()` with `FeatureUnsupported`, so pre-fix it failed outright
rather than writing raw JSON. Since LZ4 footer compression never worked, these
are feature tests, not regressions. Reworded the doc. d798ed04
##########
crates/iceberg/src/puffin/writer.rs:
##########
@@ -273,13 +273,65 @@ mod tests {
let blobs = vec![blob_0(), blob_1()];
let blobs_with_compression = blobs_with_compression(blobs.clone(),
CompressionCodec::Lz4);
- assert_eq!(
- write_puffin_file(&temp_dir, blobs_with_compression,
file_properties())
- .await
- .unwrap_err()
- .to_string(),
- "FeatureUnsupported => LZ4 compression is not supported currently"
- );
+ let input_file = write_puffin_file(&temp_dir, blobs_with_compression,
file_properties())
+ .await
+ .unwrap()
+ .to_input_file();
+
+ // Blob round-trip must yield the original bytes after LZ4 framed
decompression.
+ assert_eq!(read_all_blobs_from_puffin_file(input_file).await, blobs);
+ }
+
+ /// Regression for https://github.com/apache/iceberg-rust/issues/2419 —
+ /// the PuffinWriter previously claimed LZ4 compression on the footer (by
setting
+ /// the FooterPayloadCompressed flag) but wrote the raw uncompressed JSON,
which
+ /// produced unreadable files. The writer now actually LZ4-encodes the
footer when
+ /// compress_footer=true, and the reader round-trips it.
+ #[tokio::test]
+ async fn test_compress_footer_lz4_round_trips() {
+ let temp_dir = TempDir::new().unwrap();
+ let file_io = FileIO::new_with_fs();
+ let path = temp_dir.path().join("compressed_footer.bin");
+ let output_file = file_io.new_output(path.to_str().unwrap()).unwrap();
+
+ // compress_footer=true sets the footer codec to LZ4.
+ let mut writer = PuffinWriter::new(&output_file, file_properties(),
true)
+ .await
+ .unwrap();
+ writer.add(blob_0(), CompressionCodec::None).await.unwrap();
+ writer.close().await.unwrap();
+
+ // Reader must be able to LZ4-decompress the footer and recover both
the
+ // file metadata and the blob payload.
+ let input_file = output_file.to_input_file();
+ let metadata = FileMetadata::read(&input_file).await.unwrap();
+ assert_eq!(metadata.properties, file_properties());
+ assert_eq!(metadata.blobs.len(), 1);
+ assert_eq!(read_all_blobs_from_puffin_file(input_file).await, vec![
+ blob_0()
+ ]);
+ }
+
+ /// Direct adaptation of the reproducer from
+ /// https://github.com/apache/iceberg-rust/issues/2419 — close must
succeed when
Review Comment:
Dropped the issue links from both footer test docs; the names carry the
intent. d798ed04
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]