Repository: commons-compress Updated Branches: refs/heads/master 099cdc213 -> a0aec901f
COMPRESS-425 add auto-detection of Zstandard inputs Project: http://git-wip-us.apache.org/repos/asf/commons-compress/repo Commit: http://git-wip-us.apache.org/repos/asf/commons-compress/commit/a0aec901 Tree: http://git-wip-us.apache.org/repos/asf/commons-compress/tree/a0aec901 Diff: http://git-wip-us.apache.org/repos/asf/commons-compress/diff/a0aec901 Branch: refs/heads/master Commit: a0aec901f411b4bc26734745297b7651d7e6b9a2 Parents: 099cdc2 Author: Stefan Bodewig <bode...@apache.org> Authored: Sun Oct 22 07:06:19 2017 +0200 Committer: Stefan Bodewig <bode...@apache.org> Committed: Sun Oct 22 07:06:19 2017 +0200 ---------------------------------------------------------------------- src/changes/changes.xml | 3 ++ .../compressors/CompressorStreamFactory.java | 4 ++ .../compressors/zstandard/ZstdUtils.java | 52 ++++++++++++++++++++ src/site/xdoc/examples.xml | 2 +- src/site/xdoc/limitations.xml | 2 - .../compressors/DetectCompressorTestCase.java | 6 +++ .../compressors/zstandard/ZstdUtilsTest.java | 52 ++++++++++++++++++++ 7 files changed, 118 insertions(+), 3 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/commons-compress/blob/a0aec901/src/changes/changes.xml ---------------------------------------------------------------------- diff --git a/src/changes/changes.xml b/src/changes/changes.xml index 8854b6c..2a94f16 100644 --- a/src/changes/changes.xml +++ b/src/changes/changes.xml @@ -49,6 +49,9 @@ The <action> type attribute can be add,update,fix,remove. Add read-only support for Zstandard compression based on the Zstd-jni project. </action> + <action issue="COMPRESS-425" type="add" date="2017-10-22"> + Added auto-detection for Zstandard compressed streams. + </action> </release> <release version="1.15" date="2017-10-17" description="Release 1.15 http://git-wip-us.apache.org/repos/asf/commons-compress/blob/a0aec901/src/main/java/org/apache/commons/compress/compressors/CompressorStreamFactory.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/commons/compress/compressors/CompressorStreamFactory.java b/src/main/java/org/apache/commons/compress/compressors/CompressorStreamFactory.java index ea32223..ab9d5fd 100644 --- a/src/main/java/org/apache/commons/compress/compressors/CompressorStreamFactory.java +++ b/src/main/java/org/apache/commons/compress/compressors/CompressorStreamFactory.java @@ -493,6 +493,10 @@ public class CompressorStreamFactory implements CompressorStreamProvider { return LZ4_FRAMED; } + if (ZstdUtils.matches(signature, signatureLength)) { + return ZSTANDARD; + } + throw new CompressorException("No Compressor found for the stream signature."); } /** http://git-wip-us.apache.org/repos/asf/commons-compress/blob/a0aec901/src/main/java/org/apache/commons/compress/compressors/zstandard/ZstdUtils.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/commons/compress/compressors/zstandard/ZstdUtils.java b/src/main/java/org/apache/commons/compress/compressors/zstandard/ZstdUtils.java index a12492e..8588519 100644 --- a/src/main/java/org/apache/commons/compress/compressors/zstandard/ZstdUtils.java +++ b/src/main/java/org/apache/commons/compress/compressors/zstandard/ZstdUtils.java @@ -29,6 +29,20 @@ public class ZstdUtils { DONT_CACHE, CACHED_AVAILABLE, CACHED_UNAVAILABLE } + /** + * Zstandard Frame Magic Bytes. + */ + private static final byte[] ZSTANDARD_FRAME_MAGIC = { + (byte) 0x28, (byte) 0xB5, (byte) 0x2F, (byte) 0xFD + }; + + /** + * Skippable Frame Magic Bytes - the three common bytes. + */ + private static final byte[] SKIPPABLE_FRAME_MAGIC = { + (byte) 0x2A, (byte) 0x4D, (byte) 0x18 + }; + private static volatile CachedAvailability cachedZstdAvailability; static { @@ -81,6 +95,44 @@ public class ZstdUtils { } } + /** + * Checks if the signature matches what is expected for a Zstandard file. + * + * @param signature the bytes to check + * @param length the number of bytes to check + * @return true if signature matches the Ztstandard or skippable + * frame magic bytes, false otherwise + */ + public static boolean matches(final byte[] signature, final int length) { + if (length < ZSTANDARD_FRAME_MAGIC.length) { + return false; + } + + boolean isZstandard = true; + for (int i = 0; i < ZSTANDARD_FRAME_MAGIC.length; ++i) { + if (signature[i] != ZSTANDARD_FRAME_MAGIC[i]) { + isZstandard = false; + break; + } + } + if (isZstandard) { + return true; + } + + if (0x50 == (signature[0] & 0xF0)) { + // skippable frame + for (int i = 0; i < SKIPPABLE_FRAME_MAGIC.length; ++i) { + if (signature[i + 1] != SKIPPABLE_FRAME_MAGIC[i]) { + return false; + } + } + + return true; + } + + return false; + } + // only exists to support unit tests static CachedAvailability getCachedZstdAvailability() { return cachedZstdAvailability; http://git-wip-us.apache.org/repos/asf/commons-compress/blob/a0aec901/src/site/xdoc/examples.xml ---------------------------------------------------------------------- diff --git a/src/site/xdoc/examples.xml b/src/site/xdoc/examples.xml index 6bac23a..cced8a8 100644 --- a/src/site/xdoc/examples.xml +++ b/src/site/xdoc/examples.xml @@ -82,7 +82,7 @@ CompressorInputStream input = new CompressorStreamFactory() .createCompressorInputStream(originalInput); ]]></source> - <p>Note that there is no way to detect the lzma, Zstandard or Brotli formats so only + <p>Note that there is no way to detect the lzma or Brotli formats so only the two-arg version of <code>createCompressorInputStream</code> can be used. Prior to Compress 1.9 the .Z format hasn't been auto-detected http://git-wip-us.apache.org/repos/asf/commons-compress/blob/a0aec901/src/site/xdoc/limitations.xml ---------------------------------------------------------------------- diff --git a/src/site/xdoc/limitations.xml b/src/site/xdoc/limitations.xml index 18b5b66..d651204 100644 --- a/src/site/xdoc/limitations.xml +++ b/src/site/xdoc/limitations.xml @@ -200,8 +200,6 @@ href="https://github.com/luben/zstd-jni">Zstandard JNI</a> library.</li> <li>read-only support</li> - <li><code>CompressorStreamFactory</code> is not able to auto-detect - streams using Zstandard compression.</li> </ul> </section> </body> http://git-wip-us.apache.org/repos/asf/commons-compress/blob/a0aec901/src/test/java/org/apache/commons/compress/compressors/DetectCompressorTestCase.java ---------------------------------------------------------------------- diff --git a/src/test/java/org/apache/commons/compress/compressors/DetectCompressorTestCase.java b/src/test/java/org/apache/commons/compress/compressors/DetectCompressorTestCase.java index 6fde36d..7f3d316 100644 --- a/src/test/java/org/apache/commons/compress/compressors/DetectCompressorTestCase.java +++ b/src/test/java/org/apache/commons/compress/compressors/DetectCompressorTestCase.java @@ -38,6 +38,7 @@ import org.apache.commons.compress.compressors.deflate.DeflateCompressorInputStr import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream; import org.apache.commons.compress.compressors.pack200.Pack200CompressorInputStream; import org.apache.commons.compress.compressors.xz.XZCompressorInputStream; +import org.apache.commons.compress.compressors.zstandard.ZstdCompressorInputStream; import org.junit.Test; @SuppressWarnings("deprecation") // deliberately tests setDecompressConcatenated @@ -113,6 +114,10 @@ public final class DetectCompressorTestCase { assertNotNull(zlib); assertTrue(zlib instanceof DeflateCompressorInputStream); + final CompressorInputStream zstd = getStreamFor("bla.tar.zst"); + assertNotNull(zstd); + assertTrue(zstd instanceof ZstdCompressorInputStream); + try { factory.createCompressorInputStream(new ByteArrayInputStream(new byte[0])); fail("No exception thrown for an empty input stream"); @@ -133,6 +138,7 @@ public final class DetectCompressorTestCase { assertEquals(CompressorStreamFactory.LZMA, detect("bla.tar.lzma")); assertEquals(CompressorStreamFactory.SNAPPY_FRAMED, detect("bla.tar.sz")); assertEquals(CompressorStreamFactory.Z, detect("bla.tar.Z")); + assertEquals(CompressorStreamFactory.ZSTANDARD, detect("bla.tar.zst")); //make sure we don't oom on detect assertEquals(CompressorStreamFactory.Z, detect("COMPRESS-386")); http://git-wip-us.apache.org/repos/asf/commons-compress/blob/a0aec901/src/test/java/org/apache/commons/compress/compressors/zstandard/ZstdUtilsTest.java ---------------------------------------------------------------------- diff --git a/src/test/java/org/apache/commons/compress/compressors/zstandard/ZstdUtilsTest.java b/src/test/java/org/apache/commons/compress/compressors/zstandard/ZstdUtilsTest.java new file mode 100644 index 0000000..5bba0ad --- /dev/null +++ b/src/test/java/org/apache/commons/compress/compressors/zstandard/ZstdUtilsTest.java @@ -0,0 +1,52 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.commons.compress.compressors.zstandard; + +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + +import org.junit.Test; + +public class ZstdUtilsTest { + + @Test + public void testMatchesZstandardFrame() { + final byte[] data = { + (byte) 0x28, (byte) 0xB5, (byte) 0x2F, (byte) 0xFD, + }; + assertFalse(ZstdUtils.matches(data, 3)); + assertTrue(ZstdUtils.matches(data, 4)); + assertTrue(ZstdUtils.matches(data, 5)); + data[3] = '0'; + assertFalse(ZstdUtils.matches(data, 4)); + } + + @Test + public void testMatchesSkippableFrame() { + final byte[] data = { + 0, (byte) 0x2A, (byte) 0x4D, (byte) 0x18, + }; + assertFalse(ZstdUtils.matches(data, 4)); + for (byte b = (byte) 0x50; b < 0x60; b++) { + data[0] = b; + assertTrue(ZstdUtils.matches(data, 4)); + } + assertFalse(ZstdUtils.matches(data, 3)); + assertTrue(ZstdUtils.matches(data, 5)); + } +}