This is an automated email from the ASF dual-hosted git repository. ggregory pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/commons-compress.git
commit 96385a611d2c9f1a3e36a77076ea278e020e3540 Author: Gary Gregory <garydgreg...@gmail.com> AuthorDate: Tue Feb 4 10:50:39 2025 -0500 Add support for XZ compression in ZIP archives --- src/changes/changes.xml | 1 + .../archivers/zip/ZipArchiveInputStream.java | 9 +- .../archivers/zip/ZipArchiveOutputStream.java | 4 +- .../commons/compress/archivers/zip/ZipFile.java | 4 +- .../commons/compress/archivers/zip/ZipUtil.java | 3 +- .../archivers/zip/ZipCompressMethodXzTest.java | 147 +++++++++++++++++++++ .../archivers/zip/ZipCompressMethodZstdTest.java | 40 +++--- .../apache/commons/compress/zip/test-method-xz.zip | Bin 0 -> 4044 bytes 8 files changed, 181 insertions(+), 27 deletions(-) diff --git a/src/changes/changes.xml b/src/changes/changes.xml index 825d67c53..fe609e703 100644 --- a/src/changes/changes.xml +++ b/src/changes/changes.xml @@ -85,6 +85,7 @@ The <action> type attribute can be add,update,fix,remove. <action type="add" dev="ggregory" due-to="Gary Gregory">Add SevenZFile.Builder.setMaxMemoryLimitKiB(int).</action> <action type="add" dev="ggregory" due-to="Gary Gregory">Add MemoryLimitException.MemoryLimitException(long, int, Throwable) and deprecate MemoryLimitException.MemoryLimitException(long, int, Exception).</action> <action type="add" issue="COMPRESS-692" dev="ggregory" due-to="Mehmet Karaman, Andrey Loskutov, Gary Gregory">Add support for zstd compression in zip archives.</action> + <action type="add" dev="ggregory" due-to="Gary Gregory">Add support for XZ compression in ZIP archives.</action> <!-- UPDATE --> <action type="update" dev="ggregory" due-to="Dependabot, Gary Gregory">Bump org.apache.commons:commons-parent from 72 to 79 #563, #567, #574, #582, #587, #595.</action> <action type="update" dev="ggregory" due-to="Dependabot, Gary Gregory">Bump com.github.luben:zstd-jni from 1.5.6-4 to 1.5.6-9 #565, #578, #601, #616, #630.</action> diff --git a/src/main/java/org/apache/commons/compress/archivers/zip/ZipArchiveInputStream.java b/src/main/java/org/apache/commons/compress/archivers/zip/ZipArchiveInputStream.java index 62b41698f..0dd12a3b5 100644 --- a/src/main/java/org/apache/commons/compress/archivers/zip/ZipArchiveInputStream.java +++ b/src/main/java/org/apache/commons/compress/archivers/zip/ZipArchiveInputStream.java @@ -936,7 +936,8 @@ public int read(final byte[] buffer, final int offset, final int length) throws read = readDeflated(buffer, offset, length); } else if (method == ZipMethod.UNSHRINKING.getCode() || method == ZipMethod.IMPLODING.getCode() || method == ZipMethod.ENHANCED_DEFLATED.getCode() || method == ZipMethod.BZIP2.getCode() - || ZipMethod.isZstd(method)) { + || ZipMethod.isZstd(method) + || method == ZipMethod.XZ.getCode()) { read = current.inputStream.read(buffer, offset, length); } else { throw new UnsupportedZipFeatureException(ZipMethod.getMethodByCode(method), current.entry); @@ -1333,7 +1334,8 @@ private boolean supportsCompressedSizeFor(final ZipArchiveEntry entry) { return entry.getCompressedSize() != ArchiveEntry.SIZE_UNKNOWN || method == ZipEntry.DEFLATED || method == ZipMethod.ENHANCED_DEFLATED.getCode() || entry.getGeneralPurposeBit().usesDataDescriptor() && allowStoredEntriesWithDataDescriptor && method == ZipEntry.STORED - || ZipMethod.isZstd(method); + || ZipMethod.isZstd(method) + || method == ZipMethod.XZ.getCode(); } /** @@ -1346,6 +1348,7 @@ private boolean supportsDataDescriptorFor(final ZipArchiveEntry entry) { final int method = entry.getMethod(); return !entry.getGeneralPurposeBit().usesDataDescriptor() || allowStoredEntriesWithDataDescriptor && method == ZipEntry.STORED || method == ZipEntry.DEFLATED || method == ZipMethod.ENHANCED_DEFLATED.getCode() - || ZipMethod.isZstd(method); + || ZipMethod.isZstd(method) + || method == ZipMethod.XZ.getCode(); } } diff --git a/src/main/java/org/apache/commons/compress/archivers/zip/ZipArchiveOutputStream.java b/src/main/java/org/apache/commons/compress/archivers/zip/ZipArchiveOutputStream.java index 09136001a..441067a6f 100644 --- a/src/main/java/org/apache/commons/compress/archivers/zip/ZipArchiveOutputStream.java +++ b/src/main/java/org/apache/commons/compress/archivers/zip/ZipArchiveOutputStream.java @@ -870,7 +870,7 @@ private byte[] createLocalFileHeader(final ZipArchiveEntry ze, final ByteBuffer } else if (zipMethod == DEFLATED || out instanceof RandomAccessOutputStream) { System.arraycopy(LZERO, 0, buf, LFH_COMPRESSED_SIZE_OFFSET, ZipConstants.WORD); System.arraycopy(LZERO, 0, buf, LFH_ORIGINAL_SIZE_OFFSET, ZipConstants.WORD); - } else if (ZipMethod.isZstd(zipMethod)) { + } else if (ZipMethod.isZstd(zipMethod) || zipMethod == ZipMethod.XZ.getCode()) { ZipLong.putLong(ze.getCompressedSize(), buf, LFH_COMPRESSED_SIZE_OFFSET); ZipLong.putLong(ze.getSize(), buf, LFH_ORIGINAL_SIZE_OFFSET); } else { // Stored @@ -1076,7 +1076,7 @@ private boolean handleSizesAndCrc(final long bytesWritten, final long crc, final entry.entry.setSize(entry.bytesRead); entry.entry.setCompressedSize(bytesWritten); entry.entry.setCrc(crc); - } else if (ZipMethod.isZstd(zipMethod)) { + } else if (ZipMethod.isZstd(zipMethod) || zipMethod == ZipMethod.XZ.getCode()) { entry.entry.setCompressedSize(bytesWritten); entry.entry.setCrc(crc); } else if (!(out instanceof RandomAccessOutputStream)) { diff --git a/src/main/java/org/apache/commons/compress/archivers/zip/ZipFile.java b/src/main/java/org/apache/commons/compress/archivers/zip/ZipFile.java index 378e21f46..264f0a850 100644 --- a/src/main/java/org/apache/commons/compress/archivers/zip/ZipFile.java +++ b/src/main/java/org/apache/commons/compress/archivers/zip/ZipFile.java @@ -54,6 +54,7 @@ import org.apache.commons.compress.archivers.EntryStreamOffsets; import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream; import org.apache.commons.compress.compressors.deflate64.Deflate64CompressorInputStream; +import org.apache.commons.compress.compressors.xz.XZCompressorInputStream; import org.apache.commons.compress.compressors.zstandard.ZstdCompressorInputStream; import org.apache.commons.compress.utils.BoundedArchiveInputStream; import org.apache.commons.compress.utils.BoundedSeekableByteChannelInputStream; @@ -1234,6 +1235,8 @@ public void close() throws IOException { case ZSTD: case ZSTD_DEPRECATED: return new ZstdCompressorInputStream(is); + case XZ: + return new XZCompressorInputStream(is); case AES_ENCRYPTED: case EXPANDING_LEVEL_1: case EXPANDING_LEVEL_2: @@ -1246,7 +1249,6 @@ public void close() throws IOException { case TOKENIZATION: case UNKNOWN: case WAVPACK: - case XZ: default: throw new UnsupportedZipFeatureException(ZipMethod.getMethodByCode(entry.getMethod()), entry); } diff --git a/src/main/java/org/apache/commons/compress/archivers/zip/ZipUtil.java b/src/main/java/org/apache/commons/compress/archivers/zip/ZipUtil.java index 643363bb6..d5be5bc4c 100644 --- a/src/main/java/org/apache/commons/compress/archivers/zip/ZipUtil.java +++ b/src/main/java/org/apache/commons/compress/archivers/zip/ZipUtil.java @@ -335,7 +335,8 @@ private static boolean supportsMethodOf(final ZipArchiveEntry entry) { return method == ZipEntry.STORED || method == ZipMethod.UNSHRINKING.getCode() || method == ZipMethod.IMPLODING.getCode() || method == ZipEntry.DEFLATED || method == ZipMethod.ENHANCED_DEFLATED.getCode() || method == ZipMethod.BZIP2.getCode() - || ZipMethod.isZstd(method); + || ZipMethod.isZstd(method) + || method == ZipMethod.XZ.getCode(); } /** diff --git a/src/test/java/org/apache/commons/compress/archivers/zip/ZipCompressMethodXzTest.java b/src/test/java/org/apache/commons/compress/archivers/zip/ZipCompressMethodXzTest.java new file mode 100644 index 000000000..d464815b0 --- /dev/null +++ b/src/test/java/org/apache/commons/compress/archivers/zip/ZipCompressMethodXzTest.java @@ -0,0 +1,147 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.archivers.zip; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; + +import org.apache.commons.compress.AbstractTest; +import org.apache.commons.compress.compressors.xz.XZCompressorInputStream; +import org.apache.commons.compress.compressors.xz.XZCompressorOutputStream; +import org.apache.commons.io.IOUtils; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; + +public class ZipCompressMethodXzTest extends AbstractTest { + + private static final int DEFAULT_LEVEL = 6; + + @TempDir + static Path tempDir; + + /** + * Reads uncompressed data stream and writes it compressed to the output. + * + * @param input the data stream with compressed data + * @param output the data stream for compressed output + * @throws IOException throws the exception which could be got from from IOUtils.copyLarge() or XZCompressorOutputStream constructor + */ + private static void compress(final InputStream input, final OutputStream output) throws IOException { + @SuppressWarnings("resource") + final XZCompressorOutputStream outputStream = new XZCompressorOutputStream(output, DEFAULT_LEVEL); + IOUtils.copyLarge(input, outputStream); + outputStream.flush(); + } + + @Test + public void testXzInputStream() throws IOException { + // test-method-xz.zip was created with: + // "\Program Files\7-Zip\7z.exe" a test-method-xz.zip -mm=xz LICENSE.txt + // The "mm" option specifies the compress method + final Path file = getPath("org/apache/commons/compress/zip/test-method-xz.zip"); + try (ZipFile zip = ZipFile.builder().setPath(file).get()) { + final ZipArchiveEntry entry = zip.getEntries().nextElement(); + assertEquals("LICENSE.txt", entry.getName()); + assertTrue(zip.canReadEntryData(entry)); + assertEquals(ZipMethod.XZ.getCode(), entry.getMethod()); + try (InputStream inputStream = zip.getInputStream(entry)) { + final long actualSize = entry.getSize(); + final byte[] buf = new byte[(int) actualSize]; + inputStream.read(buf); + final String text = new String(buf); + assertTrue(text.startsWith(" Apache License"), text); + assertTrue(text.endsWith(" limitations under the License.\n"), text); + assertEquals(11357, text.length()); + } + } + } + + @Test + public void testXzMethodInZipFile() throws IOException { + final String zipContentFile = "testXzMethodInZipFile.txt"; + final byte[] text = "The quick brown fox jumps over the lazy dog".getBytes(StandardCharsets.UTF_8); + final Path file = tempDir.resolve("testXzMethodInZipFile.zip"); + // Create the Zip File + try (ZipArchiveOutputStream zipOutputStream = new ZipArchiveOutputStream(file)) { + final ZipArchiveEntry archiveEntry = new ZipArchiveEntry(zipContentFile); + archiveEntry.setMethod(ZipMethod.XZ.getCode()); + archiveEntry.setSize(text.length); + zipOutputStream.putArchiveEntry(archiveEntry); + compress(new ByteArrayInputStream(text), zipOutputStream); + zipOutputStream.closeArchiveEntry(); + } + // Read the Zip File + try (ZipFile zipFile = ZipFile.builder().setPath(file).get()) { + // Find the entry + final ZipArchiveEntry entry = zipFile.getEntry(zipContentFile); + // Check the compression method + assertEquals(entry.getMethod(), ZipMethod.XZ.getCode()); + @SuppressWarnings("resource") + final InputStream inputStream = zipFile.getInputStream(entry); + assertTrue(inputStream instanceof XZCompressorInputStream); + final long dataOffset = entry.getDataOffset(); + final int uncompressedSize = (int) entry.getSize(); + assertEquals(text.length, uncompressedSize); + final byte[] uncompressedData = new byte[uncompressedSize]; + inputStream.read(uncompressedData, 0, uncompressedSize); + // Check the uncompressed data + assertEquals(new String(text), new String(uncompressedData)); + try (InputStream fileInputStream = Files.newInputStream(file)) { + fileInputStream.skip(dataOffset); + final byte[] compressedData = new byte[4]; + fileInputStream.read(compressedData); + //assertTrue(ZstdUtils.matches(compressedData, 4)); + } + } + } + + @Test + public void testXzMethodWriteRead() throws IOException { + final String zipContentFile = "testXzMethodWriteRead.txt"; + final byte[] text = "The quick brown fox jumps over the lazy dog".getBytes(StandardCharsets.UTF_8); + final Path file = tempDir.resolve("testXzMethodWriteRead.zip"); + // Create the Zip File + try (ZipArchiveOutputStream zipOutputStream = new ZipArchiveOutputStream(file)) { + final ZipArchiveEntry archiveEntry = new ZipArchiveEntry(zipContentFile); + archiveEntry.setMethod(ZipMethod.XZ.getCode()); + archiveEntry.setSize(text.length); + zipOutputStream.putArchiveEntry(archiveEntry); + compress(new ByteArrayInputStream(text), zipOutputStream); + zipOutputStream.closeArchiveEntry(); + } + // Read the Zip File + try (ZipFile zipFile = ZipFile.builder().setPath(file).get()) { + // Find the entry + final ZipArchiveEntry entry = zipFile.getEntry(zipContentFile); + // Check the compression method + assertEquals(entry.getMethod(), ZipMethod.XZ.getCode()); + @SuppressWarnings("resource") + final InputStream inputStream = zipFile.getInputStream(entry); + assertTrue(inputStream instanceof XZCompressorInputStream); + } + } +} diff --git a/src/test/java/org/apache/commons/compress/archivers/zip/ZipCompressMethodZstdTest.java b/src/test/java/org/apache/commons/compress/archivers/zip/ZipCompressMethodZstdTest.java index 3e4027937..75135caf4 100644 --- a/src/test/java/org/apache/commons/compress/archivers/zip/ZipCompressMethodZstdTest.java +++ b/src/test/java/org/apache/commons/compress/archivers/zip/ZipCompressMethodZstdTest.java @@ -57,6 +57,26 @@ private static void compress(final InputStream input, final OutputStream output) outputStream.flush(); } + @Test + public void testZstdInputStream() throws IOException { + final Path file = getPath("COMPRESS-692/compress-692.zip"); + try (ZipFile zip = ZipFile.builder().setFile(file.toFile()).get()) { + final ZipArchiveEntry entry = zip.getEntries().nextElement(); + assertEquals("Unexpected first entry", "dolor.txt", entry.getName()); + assertTrue("entry can't be read", zip.canReadEntryData(entry)); + assertEquals("Unexpected method", ZipMethod.ZSTD.getCode(), entry.getMethod()); + try (InputStream inputStream = zip.getInputStream(entry)) { + final long uncompSize = entry.getSize(); + final byte[] buf = new byte[(int) uncompSize]; + inputStream.read(buf); + final String uncompData = new String(buf); + assertTrue(uncompData.startsWith("dolor sit amet")); + assertTrue(uncompData.endsWith("ex ea commodo")); + assertEquals(6066, uncompData.length()); + } + } + } + @ParameterizedTest @EnumSource(names = { "ZSTD", "ZSTD_DEPRECATED" }) public void testZstdMethod(final ZipMethod zipMethod) throws IOException { @@ -83,26 +103,6 @@ public void testZstdMethod(final ZipMethod zipMethod) throws IOException { } } - @Test - public void testZstdInputStream() throws IOException { - final Path file = getPath("COMPRESS-692/compress-692.zip"); - try (ZipFile zip = ZipFile.builder().setFile(file.toFile()).get()) { - final ZipArchiveEntry entry = zip.getEntries().nextElement(); - assertEquals("Unexpected first entry", "dolor.txt", entry.getName()); - assertTrue("entry can't be read", zip.canReadEntryData(entry)); - assertEquals("Unexpected method", ZipMethod.ZSTD.getCode(), entry.getMethod()); - try (InputStream inputStream = zip.getInputStream(entry)) { - final long uncompSize = entry.getSize(); - final byte[] buf = new byte[(int) uncompSize]; - inputStream.read(buf); - final String uncompData = new String(buf); - assertTrue(uncompData.startsWith("dolor sit amet")); - assertTrue(uncompData.endsWith("ex ea commodo")); - assertEquals(6066, uncompData.length()); - } - } - } - @ParameterizedTest @EnumSource(names = { "ZSTD", "ZSTD_DEPRECATED" }) public void testZstdMethodInZipFile(final ZipMethod zipMethod) throws IOException { diff --git a/src/test/resources/org/apache/commons/compress/zip/test-method-xz.zip b/src/test/resources/org/apache/commons/compress/zip/test-method-xz.zip new file mode 100644 index 000000000..fc5326105 Binary files /dev/null and b/src/test/resources/org/apache/commons/compress/zip/test-method-xz.zip differ