This is an automated email from the ASF dual-hosted git repository. ggregory pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/commons-compress.git
The following commit(s) were added to refs/heads/master by this push: new 3d2dbdd8b (doc) Added support for gzip extra subfields. (#604) 3d2dbdd8b is described below commit 3d2dbdd8b76acdb775e103353ebfc18022dc903f Author: ddeschenes-1 <54910444+ddeschene...@users.noreply.github.com> AuthorDate: Sun Nov 17 09:34:29 2024 -0500 (doc) Added support for gzip extra subfields. (#604) Co-authored-by: Danny Deschenes <ddesche...@towerlogic.com> --- .../commons/compress/compressors/gzip/Extra.java | 227 +++++++++++++++++++++ .../gzip/GzipCompressorInputStream.java | 12 +- .../gzip/GzipCompressorOutputStream.java | 11 +- .../compress/compressors/gzip/GzipParameters.java | 24 +++ .../gzip/GzipCompressorOutputStreamTest.java | 70 +++++++ 5 files changed, 335 insertions(+), 9 deletions(-) diff --git a/src/main/java/org/apache/commons/compress/compressors/gzip/Extra.java b/src/main/java/org/apache/commons/compress/compressors/gzip/Extra.java new file mode 100644 index 000000000..213c1f14c --- /dev/null +++ b/src/main/java/org/apache/commons/compress/compressors/gzip/Extra.java @@ -0,0 +1,227 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.commons.compress.compressors.gzip; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.stream.Collectors; + +/** + * Container for subfields. + * + * This class does not expose the internal subfields list to prevent adding + * subfields without total extra length validation. However a copy of the list + * is available. + * + * @since 1.28.0 + */ +public class Extra { + + static final int MAX_SIZE = 0xFFFF; + static final byte[] ZERO_BYTES = new byte[0]; + + static Extra fromBytes(byte[] ba) throws IOException { + if (ba == null) { + return null; + } + + Extra e = new Extra(); + + int pos = 0; + while (pos <= (ba.length - 4)) { + SubField f = new SubField(); + f.si1 = ba[pos++]; + f.si2 = ba[pos++]; + + int sublen = (ba[pos++] & 0xff) | ((ba[pos++] & 0xff) << 8); + if (sublen > (ba.length - pos)) { + throw new IOException("Extra subfield lenght exceeds remaining bytes in extra: " + sublen + " > " + (ba.length - pos)); + } + + f.payload = new byte[sublen]; + System.arraycopy(ba, pos, f.payload, 0, sublen); + pos += sublen; + + e.fieldsList.add(f); + e.totalSize = pos; + } + + if (pos < ba.length) { + throw new IOException("" + (ba.length - pos) + " remaining bytes not used to parse an extra subfield."); + } + + return e; + } + + // -------------- + + private final List<SubField> fieldsList = new ArrayList<>(); + private int totalSize = 0; + + public Extra() { + } + + public boolean isEmpty() { + return fieldsList.isEmpty(); + } + + public void clear() { + fieldsList.clear(); + totalSize = 0; + } + + /** + * The bytes count of this extra payload when encoded. This does not include its + * own 16 bits size. For N sub fields, the total is all subfields payloads + 4N. + */ + public int getEncodedSize() { + return totalSize; + } + + /** + * The count of subfields contained in this extra. + */ + public int getSize() { + return fieldsList.size(); + } + + /** + * @return an unmodifiable copy of the subfields list. + */ + public List<SubField> getFieldsList() { + return Collections.unmodifiableList(fieldsList); + } + + /** + * Append a subfield by a 2-chars ISO-8859-1 string. The char at index 0 and 1 + * are respectiovely si1 and si2 (subfield id 1 and 2). + * + * @throws IllegalArgumentException if the subfield is not 2-chars or the + * payload is null + * + * @throws IOException if appending this subfield would exceed the + * max size 65535 of the extra header. + */ + public Extra appendSubField(String subfieldId, byte[] payload) throws IOException { + if (subfieldId.length() != 2) { + throw new IllegalArgumentException("subfield id must be a 2-chars iso-8859-1 string."); + } + if (payload == null) { + throw new IllegalArgumentException("payload was null"); + } + + char si1 = subfieldId.charAt(0); + char si2 = subfieldId.charAt(1); + if ((si1 & 0xff00) != 0 || (si2 & 0xff00) != 0) { + throw new IllegalArgumentException("subfield id must be a 2-chars iso-8859-1 string."); + } + + SubField f = new SubField((byte) (si1 & 0xff), (byte) (si2 & 0xff), payload); + int len = 4 + payload.length; + if (totalSize + len > MAX_SIZE) { + throw new IOException("extra subfield '" + f.getId() + "' too big (extras total size is already at " + totalSize + ")"); + } + + fieldsList.add(f); + totalSize += len; + + return this; + } + + byte[] toBytes() { + if (fieldsList.isEmpty()) { + return ZERO_BYTES; + } + + byte[] ba = new byte[totalSize]; + + int pos = 0; + for (SubField f : fieldsList) { + ba[pos++] = f.si1; + ba[pos++] = f.si2; + ba[pos++] = (byte) (f.payload.length & 0xff); // little endian expected + ba[pos++] = (byte) (f.payload.length >>> 8); + System.arraycopy(f.payload, 0, ba, pos, f.payload.length); + pos += f.payload.length; + } + return ba; + } + + /** + * Give all 2-chars ISO-8859-1 strings denoting the subfields. Note that this is + * imprecise as ids can repeat. Use the methods with indexes to find a specific + * occurence. + */ + public List<String> listIds() { + return fieldsList.stream().map(SubField::getId).collect(Collectors.toList()); + } + + /** + * Find the 1st subfield that matches the id. + * + * @return the SubField if found, null otherwise. + */ + public SubField findFirstSubField(String subfieldId) { + return fieldsList.stream().filter(f -> f.getId().equals(subfieldId)).findFirst().orElse(null); + } + + /** + * Find the subfield at the given index. + */ + public SubField subFieldAt(int i) { + return fieldsList.get(i); + } + + /** + * The carrier for a subfield in the gzip extra. + */ + public static class SubField { + byte si1; + byte si2; + byte[] payload; + + SubField() { + } + + SubField(byte si1, byte si2, byte[] payload) { + this.si1 = si1; + this.si2 = si2; + this.payload = payload; + } + + /** + * The 2 char iso-8859-1 string made from the si1 and si2 bytes of the sub field + * id. + */ + public String getId() { + return "" + ((char) (si1 & 0xff)) + ((char) (si2 & 0xff)); + } + + /** + * The subfield payload. + */ + public byte[] getPayload() { + return payload; + } + } + +} diff --git a/src/main/java/org/apache/commons/compress/compressors/gzip/GzipCompressorInputStream.java b/src/main/java/org/apache/commons/compress/compressors/gzip/GzipCompressorInputStream.java index bacc9fa7f..9a5885aef 100644 --- a/src/main/java/org/apache/commons/compress/compressors/gzip/GzipCompressorInputStream.java +++ b/src/main/java/org/apache/commons/compress/compressors/gzip/GzipCompressorInputStream.java @@ -244,17 +244,13 @@ public class GzipCompressorInputStream extends CompressorInputStream implements } parameters.setOperatingSystem(inData.readUnsignedByte()); - // Extra field, ignored + // Extra field if ((flg & FEXTRA) != 0) { int xlen = inData.readUnsignedByte(); xlen |= inData.readUnsignedByte() << 8; - - // This isn't as efficient as calling in.skip would be, - // but it's lazier to handle unexpected end of input this way. - // Most files don't have an extra field anyway. - while (xlen-- > 0) { - inData.readUnsignedByte(); - } + byte[] extra = new byte[xlen]; + inData.readFully(extra); + parameters.setExtra(Extra.fromBytes(extra)); } // Original file name diff --git a/src/main/java/org/apache/commons/compress/compressors/gzip/GzipCompressorOutputStream.java b/src/main/java/org/apache/commons/compress/compressors/gzip/GzipCompressorOutputStream.java index d3123a967..2d84dd477 100644 --- a/src/main/java/org/apache/commons/compress/compressors/gzip/GzipCompressorOutputStream.java +++ b/src/main/java/org/apache/commons/compress/compressors/gzip/GzipCompressorOutputStream.java @@ -38,6 +38,9 @@ import org.apache.commons.compress.compressors.CompressorOutputStream; */ public class GzipCompressorOutputStream extends CompressorOutputStream<OutputStream> { + /** Header flag indicating an EXTRA subfields collection follows the header */ + private static final int FEXTRA = 1 << 2; + /** Header flag indicating a file name follows the header */ private static final int FNAME = 1 << 3; @@ -170,11 +173,12 @@ public class GzipCompressorOutputStream extends CompressorOutputStream<OutputStr private void writeHeader(final GzipParameters parameters) throws IOException { final String fileName = parameters.getFileName(); final String comment = parameters.getComment(); + final byte[] extra = parameters.getExtra() != null ? parameters.getExtra().toBytes() : null; final ByteBuffer buffer = ByteBuffer.allocate(10); buffer.order(ByteOrder.LITTLE_ENDIAN); buffer.putShort((short) GZIPInputStream.GZIP_MAGIC); buffer.put((byte) Deflater.DEFLATED); // compression method (8: deflate) - buffer.put((byte) ((fileName != null ? FNAME : 0) | (comment != null ? FCOMMENT : 0))); // flags + buffer.put((byte) ((extra != null ? FEXTRA : 0) | (fileName != null ? FNAME : 0) | (comment != null ? FCOMMENT : 0))); // flags buffer.putInt((int) (parameters.getModificationTime() / 1000)); // extra flags final int compressionLevel = parameters.getCompressionLevel(); @@ -187,6 +191,11 @@ public class GzipCompressorOutputStream extends CompressorOutputStream<OutputStr } buffer.put((byte) parameters.getOperatingSystem()); out.write(buffer.array()); + if (extra != null) { + out.write(extra.length & 0xff); // little endian + out.write((extra.length >>> 8) & 0xff); + out.write(extra); + } write(fileName, parameters.getFileNameCharset()); write(comment, parameters.getFileNameCharset()); } diff --git a/src/main/java/org/apache/commons/compress/compressors/gzip/GzipParameters.java b/src/main/java/org/apache/commons/compress/compressors/gzip/GzipParameters.java index 77a80da45..d7cad8665 100644 --- a/src/main/java/org/apache/commons/compress/compressors/gzip/GzipParameters.java +++ b/src/main/java/org/apache/commons/compress/compressors/gzip/GzipParameters.java @@ -26,6 +26,7 @@ import java.util.zip.Deflater; import org.apache.commons.io.Charsets; + /** * Parameters for the GZIP compressor. * @@ -289,6 +290,7 @@ public class GzipParameters { * </p> */ private Instant modificationTime = Instant.EPOCH; + private Extra extra; private String fileName; private Charset fileNameCharset = GzipUtils.GZIP_ENCODING; private String comment; @@ -341,6 +343,16 @@ public class GzipParameters { return deflateStrategy; } + /** + * Gets the Extra. + * + * @return the extra. + * @since 1.28.0 + */ + public Extra getExtra() { + return extra; + } + /** * Gets the file name. * @@ -468,6 +480,18 @@ public class GzipParameters { this.deflateStrategy = deflateStrategy; } + /** + * Sets the Extra subfields. Note that a non-null Extra will appear in the gzip + * header regardless of the presence of subfields, while a null Extra will not + * appear at all. + * + * @param extra the collections of extra sub fields. + * @since 1.28.0 + */ + public void setExtra(Extra extra) { + this.extra = extra; + } + /** * Sets the name of the compressed file. * diff --git a/src/test/java/org/apache/commons/compress/compressors/gzip/GzipCompressorOutputStreamTest.java b/src/test/java/org/apache/commons/compress/compressors/gzip/GzipCompressorOutputStreamTest.java index 8454bd967..349fda86f 100644 --- a/src/test/java/org/apache/commons/compress/compressors/gzip/GzipCompressorOutputStreamTest.java +++ b/src/test/java/org/apache/commons/compress/compressors/gzip/GzipCompressorOutputStreamTest.java @@ -19,6 +19,7 @@ package org.apache.commons.compress.compressors.gzip; +import static org.junit.Assert.assertArrayEquals; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assumptions.assumeTrue; @@ -28,8 +29,12 @@ import java.nio.charset.Charset; import java.nio.charset.StandardCharsets; import java.nio.file.Files; import java.nio.file.Path; +import java.util.Arrays; +import org.apache.commons.compress.compressors.gzip.Extra.SubField; import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.CsvSource; /** * Tests {@link GzipCompressorOutputStream}. @@ -118,4 +123,69 @@ public class GzipCompressorOutputStreamTest { // "Test Chinese name" testFileName("??????.xml", EXPECTED_FILE_NAME); } + + /** + * Tests the gzip extra header containing subfields. + * + * @throws IOException When the test fails. + */ + @ParameterizedTest + @CsvSource({ + "1, , true", + "1, 0, false", + "1, 65531, false", + "1, 65532, true", + "2, 0, false", + "2, 32764, true", + "2, 32763, false" + }) + public void testExtraSubfields(int subfieldQty, Integer payloadSize, boolean shouldFail) throws IOException { + final Path tempSourceFile = Files.createTempFile("test_gzip_extra_", ".txt"); + final Path targetFile = Files.createTempFile("test_gzip_extra_", ".txt.gz"); + + Files.write(tempSourceFile, "Hello World!".getBytes(StandardCharsets.ISO_8859_1)); + + final GzipParameters parameters = new GzipParameters(); + Extra extra = new Extra(); + + boolean failed = false; + + byte[][] payloads = new byte[subfieldQty][]; + for (int i = 0; i < subfieldQty; i++) { + if (payloadSize != null) { + payloads[i] = new byte[payloadSize]; + Arrays.fill(payloads[i], (byte) ('a' + i)); + } + + try { + extra.appendSubField("z" + i, payloads[i]); + } catch (Exception e) { + failed = true; + break; + } + } + + assertEquals(shouldFail, failed, "appending subfield " + (shouldFail ? "succes" : "failure") + " was not expected."); + if (shouldFail) { + return; + } + + parameters.setExtra(extra); + + try (OutputStream fos = Files.newOutputStream(targetFile); + GzipCompressorOutputStream gos = new GzipCompressorOutputStream(fos, parameters)) { + Files.copy(tempSourceFile, gos); + } + + try (GzipCompressorInputStream gis = new GzipCompressorInputStream(Files.newInputStream(targetFile))) { + Extra extra2 = gis.getMetaData().getExtra(); + for (int i = 0; i < subfieldQty; i++) { + SubField sf = extra2.subFieldAt(i); + assertEquals("z" + i, sf.getId()); // id was saved/loaded correctly + byte[] ba = sf.getPayload(); + assertArrayEquals("field " + i + " has wrong payload", payloads[i], ba); + } + } + } + }