Repository: commons-compress Updated Branches: refs/heads/master 76d913a3c -> 176cd18fb
COMPRESS-271 create parameters class for LZ4 output and test all block sizes ... and fix a bug uncovered by the new tests Project: http://git-wip-us.apache.org/repos/asf/commons-compress/repo Commit: http://git-wip-us.apache.org/repos/asf/commons-compress/commit/176cd18f Tree: http://git-wip-us.apache.org/repos/asf/commons-compress/tree/176cd18f Diff: http://git-wip-us.apache.org/repos/asf/commons-compress/diff/176cd18f Branch: refs/heads/master Commit: 176cd18fb69464d036697cf7c83788eb367ae904 Parents: 76d913a Author: Stefan Bodewig <bode...@apache.org> Authored: Wed Jan 25 19:40:58 2017 +0100 Committer: Stefan Bodewig <bode...@apache.org> Committed: Wed Jan 25 19:49:25 2017 +0100 ---------------------------------------------------------------------- .../lz4/BlockLZ4CompressorOutputStream.java | 15 ++++- .../lz4/FramedLZ4CompressorOutputStream.java | 69 ++++++++++++++++---- .../lz4/BlockLZ4CompressorRoundtripTest.java | 28 -------- .../compress/compressors/lz4/FactoryTest.java | 66 +++++++++++++++++++ .../FramedLZ4CompressorOutputStreamTest.java | 32 --------- .../lz4/FramedLZ4CompressorRoundtripTest.java | 57 +++++++--------- 6 files changed, 159 insertions(+), 108 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/commons-compress/blob/176cd18f/src/main/java/org/apache/commons/compress/compressors/lz4/BlockLZ4CompressorOutputStream.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/commons/compress/compressors/lz4/BlockLZ4CompressorOutputStream.java b/src/main/java/org/apache/commons/compress/compressors/lz4/BlockLZ4CompressorOutputStream.java index e51b28d..baf58ad 100644 --- a/src/main/java/org/apache/commons/compress/compressors/lz4/BlockLZ4CompressorOutputStream.java +++ b/src/main/java/org/apache/commons/compress/compressors/lz4/BlockLZ4CompressorOutputStream.java @@ -352,7 +352,6 @@ public class BlockLZ4CompressorOutputStream extends CompressorOutputStream { replacement.prependLiteral(expand(toExpand, toExpand)); } Pair splitCandidate = lastPairs.get(0); - int splitLen = splitCandidate.length(); int stillNeeded = MIN_OFFSET_OF_LAST_BACK_REFERENCE - toExpand; if (splitCandidate.hasBackReference() && splitCandidate.backReferenceLength() >= MIN_BACK_REFERENCE_LENGTH + stillNeeded) { @@ -360,7 +359,11 @@ public class BlockLZ4CompressorOutputStream extends CompressorOutputStream { pairs.add(splitCandidate.splitWithNewBackReferenceLengthOf(splitCandidate.backReferenceLength() - stillNeeded)); } else { - replacement.prependLiteral(expand(toExpand + splitLen, splitLen)); + if (splitCandidate.hasBackReference()) { + int brLen = splitCandidate.backReferenceLength(); + replacement.prependLiteral(expand(toExpand + brLen, brLen)); + } + splitCandidate.prependTo(replacement); } pairs.add(replacement); } @@ -439,7 +442,13 @@ public class BlockLZ4CompressorOutputStream extends CompressorOutputStream { private int backReferenceLength() { return brLength; } - Pair splitWithNewBackReferenceLengthOf(int newBackReferenceLength) { + private void prependTo(Pair other) { + Iterator<byte[]> litsBackwards = literals.descendingIterator(); + while (litsBackwards.hasNext()) { + other.prependLiteral(litsBackwards.next()); + } + } + private Pair splitWithNewBackReferenceLengthOf(int newBackReferenceLength) { Pair p = new Pair(); p.literals.addAll(literals); p.brOffset = brOffset; http://git-wip-us.apache.org/repos/asf/commons-compress/blob/176cd18f/src/main/java/org/apache/commons/compress/compressors/lz4/FramedLZ4CompressorOutputStream.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/commons/compress/compressors/lz4/FramedLZ4CompressorOutputStream.java b/src/main/java/org/apache/commons/compress/compressors/lz4/FramedLZ4CompressorOutputStream.java index e0622e1..f50aa57 100644 --- a/src/main/java/org/apache/commons/compress/compressors/lz4/FramedLZ4CompressorOutputStream.java +++ b/src/main/java/org/apache/commons/compress/compressors/lz4/FramedLZ4CompressorOutputStream.java @@ -44,9 +44,6 @@ public class FramedLZ4CompressorOutputStream extends CompressorOutputStream { * + block dependence */ - private static final int DEFAULT_BLOCK_SIZE = 4096 * 1024; - private static final List<Integer> BLOCK_SIZES = Arrays.asList(64 * 1024, 256 * 1024, 1024 * 1024, - DEFAULT_BLOCK_SIZE); private static final byte[] END_MARK = new byte[4]; // used in one-arg write method @@ -54,12 +51,63 @@ public class FramedLZ4CompressorOutputStream extends CompressorOutputStream { private final byte[] blockData; private final OutputStream out; + private final Parameters params; private boolean finished = false; private int currentIndex = 0; // used for frame header checksum and content checksum, if present private final XXHash32 contentHash = new XXHash32(); + public enum BlockSize { + /** Block size of 64K */ + K64(64 * 1024, 0), + /** Block size of 256K */ + K256(256 * 1024, 1), + /** Block size of 1M */ + M1(1024 * 1024, 2), + /** Block size of 4M */ + M4(1024 * 1024, 4); + + private final int size, index; + private BlockSize(int size, int index) { + this.size = size; + this.index = index; + } + int getSize() { + return size; + } + int getIndex() { + return index; + } + } + + /** + * Parameters of the LZ4 frame format. + */ + public static class Parameters { + private final BlockSize blockSize; + + /** + * The default parameters of 4M block size, enabled content + * checksum, disabled block checksums and independent blocks. + * + * <p>This matches the defaults of the lz4 command line utility.</p> + */ + public static Parameters DEFAULT = new Parameters(BlockSize.M4); + + /** + * Sets up custom parameters for the LZ4 stream. + * @param blockSize the size of a single block. + */ + public Parameters(BlockSize blockSize) { + this.blockSize = blockSize; + } + @Override + public String toString() { + return "LZ4 Parameters with BlockSize " + blockSize; + } + } + /** * Constructs a new output stream that compresses data using the * LZ4 frame format using the default block size of 4MB. @@ -67,22 +115,19 @@ public class FramedLZ4CompressorOutputStream extends CompressorOutputStream { * @throws IOException if writing the signature fails */ public FramedLZ4CompressorOutputStream(OutputStream out) throws IOException { - this(out, DEFAULT_BLOCK_SIZE); + this(out, Parameters.DEFAULT); } /** * Constructs a new output stream that compresses data using the * LZ4 frame format using the given block size. * @param out the OutputStream to which to write the compressed data - * @param blockSize block size, one of 64 KB, 256 KB, 1 MB or 4 MB. + * @param params the parameters to use * @throws IOException if writing the signature fails - * @throws IllegalArgumentException if the block size is not supported */ - public FramedLZ4CompressorOutputStream(OutputStream out, int blockSize) throws IOException { - if (!BLOCK_SIZES.contains(blockSize)) { - throw new IllegalArgumentException("Unsupported block size"); - } - blockData = new byte[blockSize]; + public FramedLZ4CompressorOutputStream(OutputStream out, Parameters params) throws IOException { + this.params = params; + blockData = new byte[params.blockSize.getSize()]; this.out = out; out.write(FramedLZ4CompressorInputStream.LZ4_SIGNATURE); writeFrameDescriptor(); @@ -136,7 +181,7 @@ public class FramedLZ4CompressorOutputStream extends CompressorOutputStream { | FramedLZ4CompressorInputStream.BLOCK_INDEPENDENCE_MASK; out.write(flags); contentHash.update(flags); - int bd = BLOCK_SIZES.indexOf(blockData.length) << 4; + int bd = params.blockSize.getIndex() << 4; out.write(bd); contentHash.update(bd); out.write((int) ((contentHash.getValue() >> 8) & 0xff)); http://git-wip-us.apache.org/repos/asf/commons-compress/blob/176cd18f/src/test/java/org/apache/commons/compress/compressors/lz4/BlockLZ4CompressorRoundtripTest.java ---------------------------------------------------------------------- diff --git a/src/test/java/org/apache/commons/compress/compressors/lz4/BlockLZ4CompressorRoundtripTest.java b/src/test/java/org/apache/commons/compress/compressors/lz4/BlockLZ4CompressorRoundtripTest.java index fc28841..780d4ce 100644 --- a/src/test/java/org/apache/commons/compress/compressors/lz4/BlockLZ4CompressorRoundtripTest.java +++ b/src/test/java/org/apache/commons/compress/compressors/lz4/BlockLZ4CompressorRoundtripTest.java @@ -21,13 +21,8 @@ package org.apache.commons.compress.compressors.lz4; import java.io.File; import java.io.FileInputStream; import java.io.FileOutputStream; -import java.io.InputStream; import java.io.IOException; -import java.io.OutputStream; -import java.util.Random; import org.apache.commons.compress.AbstractTestCase; -import org.apache.commons.compress.compressors.CompressorStreamFactory; -import org.apache.commons.compress.compressors.lz77support.Parameters; import org.apache.commons.compress.utils.IOUtils; import org.junit.Assert; import org.junit.Test; @@ -72,27 +67,4 @@ public final class BlockLZ4CompressorRoundtripTest extends AbstractTestCase { roundTripTest("COMPRESS-256.7z"); } - @Test - public void roundtripViaFactory() throws Exception { - File input = getFile("bla.tar"); - long start = System.currentTimeMillis(); - final File outputSz = new File(dir, input.getName() + ".block.lz4"); - try (FileInputStream is = new FileInputStream(input); - FileOutputStream os = new FileOutputStream(outputSz); - OutputStream los = new CompressorStreamFactory() - .createCompressorOutputStream(CompressorStreamFactory.getLZ4Block(), os)) { - IOUtils.copy(is, los); - } - System.err.println(input.getName() + " written, uncompressed bytes: " + input.length() - + ", compressed bytes: " + outputSz.length() + " after " + (System.currentTimeMillis() - start) + "ms"); - start = System.currentTimeMillis(); - try (FileInputStream is = new FileInputStream(input); - InputStream sis = new CompressorStreamFactory() - .createCompressorInputStream(CompressorStreamFactory.LZ4_BLOCK, new FileInputStream(outputSz))) { - byte[] expected = IOUtils.toByteArray(is); - byte[] actual = IOUtils.toByteArray(sis); - Assert.assertArrayEquals(expected, actual); - } - System.err.println(outputSz.getName() + " read after " + (System.currentTimeMillis() - start) + "ms"); - } } http://git-wip-us.apache.org/repos/asf/commons-compress/blob/176cd18f/src/test/java/org/apache/commons/compress/compressors/lz4/FactoryTest.java ---------------------------------------------------------------------- diff --git a/src/test/java/org/apache/commons/compress/compressors/lz4/FactoryTest.java b/src/test/java/org/apache/commons/compress/compressors/lz4/FactoryTest.java new file mode 100644 index 0000000..3db6249 --- /dev/null +++ b/src/test/java/org/apache/commons/compress/compressors/lz4/FactoryTest.java @@ -0,0 +1,66 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.compressors.lz4; + +import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.InputStream; +import java.io.OutputStream; + +import org.apache.commons.compress.AbstractTestCase; +import org.apache.commons.compress.compressors.CompressorStreamFactory; +import org.apache.commons.compress.utils.IOUtils; +import org.junit.Assert; +import org.junit.Test; + +public class FactoryTest extends AbstractTestCase { + + @Test + public void frameRoundtripViaFactory() throws Exception { + roundtripViaFactory(CompressorStreamFactory.getLZ4Framed()); + } + + @Test + public void blockRoundtripViaFactory() throws Exception { + roundtripViaFactory(CompressorStreamFactory.getLZ4Block()); + } + + private void roundtripViaFactory(String format) throws Exception { + File input = getFile("bla.tar"); + long start = System.currentTimeMillis(); + final File outputSz = new File(dir, input.getName() + "." + format + ".lz4"); + try (FileInputStream is = new FileInputStream(input); + FileOutputStream os = new FileOutputStream(outputSz); + OutputStream los = new CompressorStreamFactory().createCompressorOutputStream(format, os)) { + IOUtils.copy(is, los); + } + System.err.println(input.getName() + " written, uncompressed bytes: " + input.length() + + ", compressed bytes: " + outputSz.length() + " after " + (System.currentTimeMillis() - start) + "ms"); + start = System.currentTimeMillis(); + try (FileInputStream is = new FileInputStream(input); + InputStream sis = new CompressorStreamFactory() + .createCompressorInputStream(format, new FileInputStream(outputSz))) { + byte[] expected = IOUtils.toByteArray(is); + byte[] actual = IOUtils.toByteArray(sis); + Assert.assertArrayEquals(expected, actual); + } + System.err.println(outputSz.getName() + " read after " + (System.currentTimeMillis() - start) + "ms"); + } +} http://git-wip-us.apache.org/repos/asf/commons-compress/blob/176cd18f/src/test/java/org/apache/commons/compress/compressors/lz4/FramedLZ4CompressorOutputStreamTest.java ---------------------------------------------------------------------- diff --git a/src/test/java/org/apache/commons/compress/compressors/lz4/FramedLZ4CompressorOutputStreamTest.java b/src/test/java/org/apache/commons/compress/compressors/lz4/FramedLZ4CompressorOutputStreamTest.java deleted file mode 100644 index 5f2802e..0000000 --- a/src/test/java/org/apache/commons/compress/compressors/lz4/FramedLZ4CompressorOutputStreamTest.java +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.commons.compress.compressors.lz4; - -import java.io.ByteArrayOutputStream; -import java.io.IOException; - -import org.junit.Test; - -public final class FramedLZ4CompressorOutputStreamTest { - - @Test(expected = IllegalArgumentException.class) - public void illegalBlockSize() throws IOException { - new FramedLZ4CompressorOutputStream(new ByteArrayOutputStream(), 32 * 1024); - } -} http://git-wip-us.apache.org/repos/asf/commons-compress/blob/176cd18f/src/test/java/org/apache/commons/compress/compressors/lz4/FramedLZ4CompressorRoundtripTest.java ---------------------------------------------------------------------- diff --git a/src/test/java/org/apache/commons/compress/compressors/lz4/FramedLZ4CompressorRoundtripTest.java b/src/test/java/org/apache/commons/compress/compressors/lz4/FramedLZ4CompressorRoundtripTest.java index 1a743e7..9744df7 100644 --- a/src/test/java/org/apache/commons/compress/compressors/lz4/FramedLZ4CompressorRoundtripTest.java +++ b/src/test/java/org/apache/commons/compress/compressors/lz4/FramedLZ4CompressorRoundtripTest.java @@ -21,26 +21,44 @@ package org.apache.commons.compress.compressors.lz4; import java.io.File; import java.io.FileInputStream; import java.io.FileOutputStream; -import java.io.InputStream; import java.io.IOException; -import java.io.OutputStream; -import java.util.Random; +import java.util.Arrays; +import java.util.Collection; + import org.apache.commons.compress.AbstractTestCase; -import org.apache.commons.compress.compressors.CompressorStreamFactory; -import org.apache.commons.compress.compressors.lz77support.Parameters; import org.apache.commons.compress.utils.IOUtils; import org.junit.Assert; import org.junit.Test; +import org.junit.runners.Parameterized; +import org.junit.runners.Parameterized.Parameters; +import org.junit.runner.RunWith; +@RunWith(Parameterized.class) public final class FramedLZ4CompressorRoundtripTest extends AbstractTestCase { + @Parameters(name = "using {0}") + public static Collection<Object[]> factory() { + return Arrays.asList(new Object[][] { + new Object[] { new FramedLZ4CompressorOutputStream.Parameters(FramedLZ4CompressorOutputStream.BlockSize.K64) }, + new Object[] { new FramedLZ4CompressorOutputStream.Parameters(FramedLZ4CompressorOutputStream.BlockSize.K256) }, + new Object[] { new FramedLZ4CompressorOutputStream.Parameters(FramedLZ4CompressorOutputStream.BlockSize.M1) }, + new Object[] { FramedLZ4CompressorOutputStream.Parameters.DEFAULT }, + }); + } + + private final FramedLZ4CompressorOutputStream.Parameters params; + + public FramedLZ4CompressorRoundtripTest(FramedLZ4CompressorOutputStream.Parameters params) { + this.params = params; + } + private void roundTripTest(String testFile) throws IOException { File input = getFile(testFile); long start = System.currentTimeMillis(); final File outputSz = new File(dir, input.getName() + ".framed.lz4"); try (FileInputStream is = new FileInputStream(input); FileOutputStream os = new FileOutputStream(outputSz); - FramedLZ4CompressorOutputStream los = new FramedLZ4CompressorOutputStream(os)) { + FramedLZ4CompressorOutputStream los = new FramedLZ4CompressorOutputStream(os, params)) { IOUtils.copy(is, los); } System.err.println(input.getName() + " written, uncompressed bytes: " + input.length() @@ -58,11 +76,7 @@ public final class FramedLZ4CompressorRoundtripTest extends AbstractTestCase { // should yield decent compression @Test public void blaTarRoundtrip() throws IOException { - try { roundTripTest("bla.tar"); - } catch (Exception e) { - e.printStackTrace(); - } } // yields no compression at all @@ -76,27 +90,4 @@ public final class FramedLZ4CompressorRoundtripTest extends AbstractTestCase { roundTripTest("COMPRESS-256.7z"); } - @Test - public void roundtripViaFactory() throws Exception { - File input = getFile("bla.tar"); - long start = System.currentTimeMillis(); - final File outputSz = new File(dir, input.getName() + ".framed.lz4"); - try (FileInputStream is = new FileInputStream(input); - FileOutputStream os = new FileOutputStream(outputSz); - OutputStream los = new CompressorStreamFactory() - .createCompressorOutputStream(CompressorStreamFactory.getLZ4Framed(), os)) { - IOUtils.copy(is, los); - } - System.err.println(input.getName() + " written, uncompressed bytes: " + input.length() - + ", compressed bytes: " + outputSz.length() + " after " + (System.currentTimeMillis() - start) + "ms"); - start = System.currentTimeMillis(); - try (FileInputStream is = new FileInputStream(input); - InputStream sis = new CompressorStreamFactory() - .createCompressorInputStream(CompressorStreamFactory.LZ4_FRAMED, new FileInputStream(outputSz))) { - byte[] expected = IOUtils.toByteArray(is); - byte[] actual = IOUtils.toByteArray(sis); - Assert.assertArrayEquals(expected, actual); - } - System.err.println(outputSz.getName() + " read after " + (System.currentTimeMillis() - start) + "ms"); - } }