Repository: commons-compress Updated Branches: refs/heads/master 5170392a5 -> 043f42b65
allow LZ77 algorithm to be tuned niceLen and maxCandidates are used in the same way as zlib's deflate algorithm. The configured values roughly correspond to zlib's compression levels 5 (tunedForSpeed), 7 (default) and 9 (tunedForCompressionRatio). Project: http://git-wip-us.apache.org/repos/asf/commons-compress/repo Commit: http://git-wip-us.apache.org/repos/asf/commons-compress/commit/043f42b6 Tree: http://git-wip-us.apache.org/repos/asf/commons-compress/tree/043f42b6 Diff: http://git-wip-us.apache.org/repos/asf/commons-compress/diff/043f42b6 Branch: refs/heads/master Commit: 043f42b65d9c67fd064d2ba638b246389ad3a26d Parents: 5170392 Author: Stefan Bodewig <bode...@apache.org> Authored: Sun Mar 26 17:57:14 2017 +0200 Committer: Stefan Bodewig <bode...@apache.org> Committed: Sun Mar 26 17:57:14 2017 +0200 ---------------------------------------------------------------------- .../compressors/lz77support/LZ77Compressor.java | 6 +- .../compressors/lz77support/Parameters.java | 75 +++++++++++++++++++- .../lz4/BlockLZ4CompressorRoundtripTest.java | 29 +++++++- .../lz77support/LZ77CompressorTest.java | 2 +- .../compressors/snappy/SnappyRoundtripTest.java | 20 ++++++ 5 files changed, 125 insertions(+), 7 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/commons-compress/blob/043f42b6/src/main/java/org/apache/commons/compress/compressors/lz77support/LZ77Compressor.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/commons/compress/compressors/lz77support/LZ77Compressor.java b/src/main/java/org/apache/commons/compress/compressors/lz77support/LZ77Compressor.java index 487dee2..c284f4c 100644 --- a/src/main/java/org/apache/commons/compress/compressors/lz77support/LZ77Compressor.java +++ b/src/main/java/org/apache/commons/compress/compressors/lz77support/LZ77Compressor.java @@ -479,7 +479,9 @@ public class LZ77Compressor { int longestMatchLength = minLength - 1; final int maxPossibleLength = Math.min(params.getMaxBackReferenceLength(), lookahead); final int minIndex = Math.max(0, currentPosition - params.getMaxOffset()); - while (matchHead >= minIndex) { + final int niceBackReferenceLength = Math.min(maxPossibleLength, params.getNiceBackReferenceLength()); + final int maxCandidates = params.getMaxCandidates(); + for (int candidates = 0; candidates < maxCandidates && matchHead >= minIndex; candidates++) { int currentLength = 0; for (int i = 0; i < maxPossibleLength; i++) { if (window[matchHead + i] != window[currentPosition + i]) { @@ -490,7 +492,7 @@ public class LZ77Compressor { if (currentLength > longestMatchLength) { longestMatchLength = currentLength; matchStart = matchHead; - if (currentLength == maxPossibleLength) { + if (currentLength >= niceBackReferenceLength) { // no need to search any further break; } http://git-wip-us.apache.org/repos/asf/commons-compress/blob/043f42b6/src/main/java/org/apache/commons/compress/compressors/lz77support/Parameters.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/commons/compress/compressors/lz77support/Parameters.java b/src/main/java/org/apache/commons/compress/compressors/lz77support/Parameters.java index 3842dd9..e4e9e47 100644 --- a/src/main/java/org/apache/commons/compress/compressors/lz77support/Parameters.java +++ b/src/main/java/org/apache/commons/compress/compressors/lz77support/Parameters.java @@ -52,6 +52,7 @@ public final class Parameters { public static class Builder { private final int windowSize; private int minBackReferenceLength, maxBackReferenceLength, maxOffset, maxLiteralLength; + private Integer niceBackReferenceLength, maxCandidates; private Builder(int windowSize) { if (windowSize < 2 || !isPowerOfTwo(windowSize)) { @@ -150,24 +151,78 @@ public final class Parameters { } /** + * Sets the "nice length" of a back-reference. + * + * <p>When a back-references if this size has been found, stop searching for longer back-references.</p> + * + * <p>This settings can be used to tune the tradeoff between compression speed and compression ratio.</p> + */ + public Builder withNiceBackReferenceLength(int niceLen) { + niceBackReferenceLength = niceLen; + return this; + } + + /** + * Sets the maximum number of back-reference candidates that should be consulted. + * + * <p>This settings can be used to tune the tradeoff between compression speed and compression ratio.</p> + */ + public Builder withMaxNumberOfCandidates(int maxCandidates) { + this.maxCandidates = maxCandidates; + return this; + } + + /** + * Changes the default setting for "nice back-reference length" and "maximum number of candidates" for improved + * compression speed at the cost of compression ratio. + * + * <p>Use this method after configuring "maximum back-reference length".</p> + */ + public Builder tunedForSpeed() { + niceBackReferenceLength = Math.max(minBackReferenceLength, maxBackReferenceLength / 8); + maxCandidates = Math.max(32, windowSize / 1024); + return this; + } + + /** + * Changes the default setting for "nice back-reference length" and "maximum number of candidates" for improved + * compression ratio at the cost of compression speed. + * + * <p>Use this method after configuring "maximum back-reference length".</p> + */ + public Builder tunedForCompressionRatio() { + niceBackReferenceLength = maxBackReferenceLength; + maxCandidates = Math.max(32, windowSize / 16); + return this; + } + + /** * Creates the {@link Parameters} instance. * @return the configured {@link Parameters} instance. */ public Parameters build() { + // default settings tuned for a compromise of good compression and acceptable speed + int niceLen = niceBackReferenceLength != null ? niceBackReferenceLength + : Math.max(minBackReferenceLength, maxBackReferenceLength / 2); + int candidates = maxCandidates != null ? maxCandidates : Math.max(256, windowSize / 128); + return new Parameters(windowSize, minBackReferenceLength, maxBackReferenceLength, - maxOffset, maxLiteralLength); + maxOffset, maxLiteralLength, niceLen, candidates); } } - private final int windowSize, minBackReferenceLength, maxBackReferenceLength, maxOffset, maxLiteralLength; + private final int windowSize, minBackReferenceLength, maxBackReferenceLength, maxOffset, maxLiteralLength, + niceBackReferenceLength, maxCandidates; private Parameters(int windowSize, int minBackReferenceLength, int maxBackReferenceLength, int maxOffset, - int maxLiteralLength) { + int maxLiteralLength, int niceBackReferenceLength, int maxCandidates) { this.windowSize = windowSize; this.minBackReferenceLength = minBackReferenceLength; this.maxBackReferenceLength = maxBackReferenceLength; this.maxOffset = maxOffset; this.maxLiteralLength = maxLiteralLength; + this.niceBackReferenceLength = niceBackReferenceLength; + this.maxCandidates = maxCandidates; } /** @@ -207,6 +262,20 @@ public final class Parameters { return maxLiteralLength; } + /** + * Gets the length of a back-reference that is considered nice enough to stop searching for longer ones. + */ + public int getNiceBackReferenceLength() { + return niceBackReferenceLength; + } + + /** + * Gets the maximum number of back-reference candidates to consider. + */ + public int getMaxCandidates() { + return maxCandidates; + } + private static final boolean isPowerOfTwo(int x) { // pre-condition: x > 0 return (x & (x - 1)) == 0; http://git-wip-us.apache.org/repos/asf/commons-compress/blob/043f42b6/src/test/java/org/apache/commons/compress/compressors/lz4/BlockLZ4CompressorRoundtripTest.java ---------------------------------------------------------------------- diff --git a/src/test/java/org/apache/commons/compress/compressors/lz4/BlockLZ4CompressorRoundtripTest.java b/src/test/java/org/apache/commons/compress/compressors/lz4/BlockLZ4CompressorRoundtripTest.java index 780d4ce..da4941d 100644 --- a/src/test/java/org/apache/commons/compress/compressors/lz4/BlockLZ4CompressorRoundtripTest.java +++ b/src/test/java/org/apache/commons/compress/compressors/lz4/BlockLZ4CompressorRoundtripTest.java @@ -22,22 +22,48 @@ import java.io.File; import java.io.FileInputStream; import java.io.FileOutputStream; import java.io.IOException; +import java.util.Arrays; +import java.util.Collection; import org.apache.commons.compress.AbstractTestCase; +import org.apache.commons.compress.compressors.lz77support.Parameters; import org.apache.commons.compress.utils.IOUtils; import org.junit.Assert; import org.junit.Test; +import org.junit.runners.Parameterized; +import org.junit.runner.RunWith; +@RunWith(Parameterized.class) public final class BlockLZ4CompressorRoundtripTest extends AbstractTestCase { + @org.junit.runners.Parameterized.Parameters(name = "using {0}") + public static Collection<Object[]> factory() { + return Arrays.asList(new Object[][] { + new Object[] { "default", BlockLZ4CompressorOutputStream.createParameterBuilder().build() }, + new Object[] { "tuned for speed", + BlockLZ4CompressorOutputStream.createParameterBuilder().tunedForSpeed().build() }, + new Object[] { "tuned for compression ratio", + BlockLZ4CompressorOutputStream.createParameterBuilder().tunedForCompressionRatio().build() } + }); + } + + private final String config; + private final Parameters params; + + public BlockLZ4CompressorRoundtripTest(String config, Parameters params) { + this.config = config; + this.params = params; + } + private void roundTripTest(String testFile) throws IOException { File input = getFile(testFile); long start = System.currentTimeMillis(); final File outputSz = new File(dir, input.getName() + ".block.lz4"); try (FileInputStream is = new FileInputStream(input); FileOutputStream os = new FileOutputStream(outputSz); - BlockLZ4CompressorOutputStream los = new BlockLZ4CompressorOutputStream(os)) { + BlockLZ4CompressorOutputStream los = new BlockLZ4CompressorOutputStream(os, params)) { IOUtils.copy(is, los); } + System.err.println("Configuration: " + config); System.err.println(input.getName() + " written, uncompressed bytes: " + input.length() + ", compressed bytes: " + outputSz.length() + " after " + (System.currentTimeMillis() - start) + "ms"); start = System.currentTimeMillis(); @@ -62,6 +88,7 @@ public final class BlockLZ4CompressorRoundtripTest extends AbstractTestCase { roundTripTest("lorem-ipsum.txt.gz"); } + // yields no compression at all @Test public void biggerFileRoundtrip() throws IOException { roundTripTest("COMPRESS-256.7z"); http://git-wip-us.apache.org/repos/asf/commons-compress/blob/043f42b6/src/test/java/org/apache/commons/compress/compressors/lz77support/LZ77CompressorTest.java ---------------------------------------------------------------------- diff --git a/src/test/java/org/apache/commons/compress/compressors/lz77support/LZ77CompressorTest.java b/src/test/java/org/apache/commons/compress/compressors/lz77support/LZ77CompressorTest.java index 9ac0c14..3163295 100644 --- a/src/test/java/org/apache/commons/compress/compressors/lz77support/LZ77CompressorTest.java +++ b/src/test/java/org/apache/commons/compress/compressors/lz77support/LZ77CompressorTest.java @@ -149,7 +149,6 @@ public class LZ77CompressorTest { List<LZ77Compressor.Block> blocks = compress(newParameters(8), BLA); assertSize(6, blocks); assertLiteralBlock("Blah b", blocks.get(0)); - assertEquals(LZ77Compressor.BackReference.class, blocks.get(1).getClass()); assertBackReference(5, 7, blocks.get(1)); assertBackReference(5, 3, blocks.get(2)); assertBackReference(5, 7, blocks.get(3)); @@ -347,6 +346,7 @@ public class LZ77CompressorTest { .withMaxBackReferenceLength(maxBackReferenceLength) .withMaxOffset(maxOffset) .withMaxLiteralLength(maxLiteralLength) + .tunedForCompressionRatio() .build(); } } http://git-wip-us.apache.org/repos/asf/commons-compress/blob/043f42b6/src/test/java/org/apache/commons/compress/compressors/snappy/SnappyRoundtripTest.java ---------------------------------------------------------------------- diff --git a/src/test/java/org/apache/commons/compress/compressors/snappy/SnappyRoundtripTest.java b/src/test/java/org/apache/commons/compress/compressors/snappy/SnappyRoundtripTest.java index 427c8d0..8b96309 100644 --- a/src/test/java/org/apache/commons/compress/compressors/snappy/SnappyRoundtripTest.java +++ b/src/test/java/org/apache/commons/compress/compressors/snappy/SnappyRoundtripTest.java @@ -61,15 +61,35 @@ public final class SnappyRoundtripTest extends AbstractTestCase { // should yield decent compression @Test public void blaTarRoundtrip() throws IOException { + System.err.println("Configuration: default"); roundTripTest("bla.tar"); } + @Test + public void blaTarRoundtripTunedForSpeed() throws IOException { + System.err.println("Configuration: tuned for speed"); + roundTripTest(getFile("bla.tar"), + SnappyCompressorOutputStream.createParameterBuilder(SnappyCompressorInputStream.DEFAULT_BLOCK_SIZE) + .tunedForSpeed() + .build()); + } + + @Test + public void blaTarRoundtripTunedForCompressionRatio() throws IOException { + System.err.println("Configuration: tuned for compression ratio"); + roundTripTest(getFile("bla.tar"), + SnappyCompressorOutputStream.createParameterBuilder(SnappyCompressorInputStream.DEFAULT_BLOCK_SIZE) + .tunedForCompressionRatio() + .build()); + } + // yields no compression at all @Test public void gzippedLoremIpsumRoundtrip() throws IOException { roundTripTest("lorem-ipsum.txt.gz"); } + // yields no compression at all @Test public void biggerFileRoundtrip() throws IOException { roundTripTest("COMPRESS-256.7z");