Repository: commons-compress Updated Branches: refs/heads/master a5f7089f6 -> a5720927e
COMPRESS-271 implement block dependency when writing framed lz4 Project: http://git-wip-us.apache.org/repos/asf/commons-compress/repo Commit: http://git-wip-us.apache.org/repos/asf/commons-compress/commit/a5720927 Tree: http://git-wip-us.apache.org/repos/asf/commons-compress/tree/a5720927 Diff: http://git-wip-us.apache.org/repos/asf/commons-compress/diff/a5720927 Branch: refs/heads/master Commit: a5720927e1d76cb920d126d8bfecd9695daed09c Parents: a5f7089 Author: Stefan Bodewig <bode...@apache.org> Authored: Tue Feb 7 21:00:17 2017 +0100 Committer: Stefan Bodewig <bode...@apache.org> Committed: Tue Feb 7 21:00:17 2017 +0100 ---------------------------------------------------------------------- .../lz4/BlockLZ4CompressorOutputStream.java | 17 ++++++ .../lz4/FramedLZ4CompressorOutputStream.java | 59 +++++++++++++++----- .../lz4/FramedLZ4CompressorRoundtripTest.java | 9 ++- 3 files changed, 70 insertions(+), 15 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/commons-compress/blob/a5720927/src/main/java/org/apache/commons/compress/compressors/lz4/BlockLZ4CompressorOutputStream.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/commons/compress/compressors/lz4/BlockLZ4CompressorOutputStream.java b/src/main/java/org/apache/commons/compress/compressors/lz4/BlockLZ4CompressorOutputStream.java index 5c01f83..f7ba28d 100644 --- a/src/main/java/org/apache/commons/compress/compressors/lz4/BlockLZ4CompressorOutputStream.java +++ b/src/main/java/org/apache/commons/compress/compressors/lz4/BlockLZ4CompressorOutputStream.java @@ -145,6 +145,23 @@ public class BlockLZ4CompressorOutputStream extends CompressorOutputStream { } } + /** + * Adds some initial data to fill the window with. + * + * @param data the data to fill the window with. + * @param off offset of real data into the array + * @param len amount of data + * @throws IllegalStateException if the stream has already started to write data + * @see LZ77Compressor#prefill + */ + public void prefill(byte[] data, int off, int len) { + if (len > 0) { + byte[] b = Arrays.copyOfRange(data, off, off + len); + compressor.prefill(b); + recordLiteral(b); + } + } + private void addLiteralBlock(LZ77Compressor.LiteralBlock block) throws IOException { Pair last = writeBlocksAndReturnUnfinishedPair(block.getLength()); recordLiteral(last.addLiteral(block)); http://git-wip-us.apache.org/repos/asf/commons-compress/blob/a5720927/src/main/java/org/apache/commons/compress/compressors/lz4/FramedLZ4CompressorOutputStream.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/commons/compress/compressors/lz4/FramedLZ4CompressorOutputStream.java b/src/main/java/org/apache/commons/compress/compressors/lz4/FramedLZ4CompressorOutputStream.java index fc599d3..f2c0e54 100644 --- a/src/main/java/org/apache/commons/compress/compressors/lz4/FramedLZ4CompressorOutputStream.java +++ b/src/main/java/org/apache/commons/compress/compressors/lz4/FramedLZ4CompressorOutputStream.java @@ -35,11 +35,6 @@ import org.apache.commons.compress.utils.ByteUtils; * @NotThreadSafe */ public class FramedLZ4CompressorOutputStream extends CompressorOutputStream { - /* - * TODO before releasing 1.14: - * - * + block dependence - */ private static final byte[] END_MARK = new byte[4]; @@ -57,6 +52,10 @@ public class FramedLZ4CompressorOutputStream extends CompressorOutputStream { // used for block checksum, if requested private final XXHash32 blockHash; + // only created if the config requires block dependency + private byte[] blockDependencyBuffer; + private int collectedBlockDependencyBytes; + /** * The block sizes supported by the format. */ @@ -88,7 +87,7 @@ public class FramedLZ4CompressorOutputStream extends CompressorOutputStream { */ public static class Parameters { private final BlockSize blockSize; - private final boolean withContentChecksum, withBlockChecksum; + private final boolean withContentChecksum, withBlockChecksum, withBlockDependency; /** * The default parameters of 4M block size, enabled content @@ -96,7 +95,7 @@ public class FramedLZ4CompressorOutputStream extends CompressorOutputStream { * * <p>This matches the defaults of the lz4 command line utility.</p> */ - public static final Parameters DEFAULT = new Parameters(BlockSize.M4, true, false); + public static final Parameters DEFAULT = new Parameters(BlockSize.M4, true, false, false); /** * Sets up custom a custom block size for the LZ4 stream but @@ -105,7 +104,7 @@ public class FramedLZ4CompressorOutputStream extends CompressorOutputStream { * @param blockSize the size of a single block. */ public Parameters(BlockSize blockSize) { - this(blockSize, true, false); + this(blockSize, true, false, false); } /** * Sets up custom parameters for the LZ4 stream. @@ -114,17 +113,23 @@ public class FramedLZ4CompressorOutputStream extends CompressorOutputStream { * @param withBlockChecksum whether to write a block checksum. * Note that block checksums are not supported by the lz4 * command line utility + * @param withBlockDependency whether a block may depend on + * the content of a previous block. Enabling this may improve + * compression ratio but makes it impossible to decompress the + * output in parallel. */ - public Parameters(BlockSize blockSize, boolean withContentChecksum, boolean withBlockChecksum) { + public Parameters(BlockSize blockSize, boolean withContentChecksum, boolean withBlockChecksum, + boolean withBlockDependency) { this.blockSize = blockSize; this.withContentChecksum = withContentChecksum; this.withBlockChecksum = withBlockChecksum; + this.withBlockDependency = withBlockDependency; } @Override public String toString() { return "LZ4 Parameters with BlockSize " + blockSize + ", withContentChecksum " + withContentChecksum - + ", withBlockChecksum " + withBlockChecksum; + + ", withBlockChecksum " + withBlockChecksum + ", withBlockDependency " + withBlockDependency; } } @@ -152,6 +157,9 @@ public class FramedLZ4CompressorOutputStream extends CompressorOutputStream { blockHash = params.withBlockChecksum ? new XXHash32() : null; out.write(FramedLZ4CompressorInputStream.LZ4_SIGNATURE); writeFrameDescriptor(); + blockDependencyBuffer = params.withBlockDependency + ? new byte[BlockLZ4CompressorInputStream.WINDOW_SIZE] + : null; } @Override @@ -199,8 +207,10 @@ public class FramedLZ4CompressorOutputStream extends CompressorOutputStream { } private void writeFrameDescriptor() throws IOException { - int flags = FramedLZ4CompressorInputStream.SUPPORTED_VERSION - | FramedLZ4CompressorInputStream.BLOCK_INDEPENDENCE_MASK; + int flags = FramedLZ4CompressorInputStream.SUPPORTED_VERSION; + if (!params.withBlockDependency) { + flags |= FramedLZ4CompressorInputStream.BLOCK_INDEPENDENCE_MASK; + } if (params.withContentChecksum) { flags |= FramedLZ4CompressorInputStream.CONTENT_CHECKSUM_MASK; } @@ -217,10 +227,18 @@ public class FramedLZ4CompressorOutputStream extends CompressorOutputStream { } private void flushBlock() throws IOException { + final boolean withBlockDependency = params.withBlockDependency; ByteArrayOutputStream baos = new ByteArrayOutputStream(); - try (OutputStream o = new BlockLZ4CompressorOutputStream(baos)) { + try (BlockLZ4CompressorOutputStream o = new BlockLZ4CompressorOutputStream(baos)) { + if (withBlockDependency) { + o.prefill(blockDependencyBuffer, blockDependencyBuffer.length - collectedBlockDependencyBytes, + collectedBlockDependencyBytes); + } o.write(blockData, 0, currentIndex); } + if (withBlockDependency) { + appendToBlockDependencyBuffer(blockData, 0, currentIndex); + } byte[] b = baos.toByteArray(); if (b.length > currentIndex) { // compression increased size, maybe beyond blocksize ByteUtils.toLittleEndian(out, currentIndex | FramedLZ4CompressorInputStream.UNCOMPRESSED_FLAG_MASK, @@ -250,5 +268,20 @@ public class FramedLZ4CompressorOutputStream extends CompressorOutputStream { } } + private void appendToBlockDependencyBuffer(final byte[] b, final int off, int len) { + len = Math.min(len, blockDependencyBuffer.length); + if (len > 0) { + int keep = blockDependencyBuffer.length - len; + if (keep > 0) { + // move last keep bytes towards the start of the buffer + System.arraycopy(blockDependencyBuffer, len, blockDependencyBuffer, 0, keep); + } + // append new data + System.arraycopy(b, off, blockDependencyBuffer, keep, len); + collectedBlockDependencyBytes = Math.min(collectedBlockDependencyBytes + len, + blockDependencyBuffer.length); + } + } + } http://git-wip-us.apache.org/repos/asf/commons-compress/blob/a5720927/src/test/java/org/apache/commons/compress/compressors/lz4/FramedLZ4CompressorRoundtripTest.java ---------------------------------------------------------------------- diff --git a/src/test/java/org/apache/commons/compress/compressors/lz4/FramedLZ4CompressorRoundtripTest.java b/src/test/java/org/apache/commons/compress/compressors/lz4/FramedLZ4CompressorRoundtripTest.java index fe37a28..d54fafe 100644 --- a/src/test/java/org/apache/commons/compress/compressors/lz4/FramedLZ4CompressorRoundtripTest.java +++ b/src/test/java/org/apache/commons/compress/compressors/lz4/FramedLZ4CompressorRoundtripTest.java @@ -44,9 +44,14 @@ public final class FramedLZ4CompressorRoundtripTest extends AbstractTestCase { new Object[] { new FramedLZ4CompressorOutputStream.Parameters(FramedLZ4CompressorOutputStream.BlockSize.M1) }, new Object[] { FramedLZ4CompressorOutputStream.Parameters.DEFAULT }, // default without content checksum - new Object[] { new FramedLZ4CompressorOutputStream.Parameters(FramedLZ4CompressorOutputStream.BlockSize.M4, false, false) }, + new Object[] { new FramedLZ4CompressorOutputStream.Parameters(FramedLZ4CompressorOutputStream.BlockSize.M4, + false, false, false) }, // default with block checksum - new Object[] { new FramedLZ4CompressorOutputStream.Parameters(FramedLZ4CompressorOutputStream.BlockSize.M4, true, true) }, + new Object[] { new FramedLZ4CompressorOutputStream.Parameters(FramedLZ4CompressorOutputStream.BlockSize.M4, + true, true, false) }, + // small blocksize (so we get enough blocks) and enabled block dependency, otherwise defaults + new Object[] { new FramedLZ4CompressorOutputStream.Parameters(FramedLZ4CompressorOutputStream.BlockSize.K64, + true, false, true) }, }); }