This is an automated email from the ASF dual-hosted git repository.

ggregory pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/commons-compress.git


The following commit(s) were added to refs/heads/master by this push:
     new 3d2dbdd8b (doc) Added support for gzip extra subfields. (#604)
3d2dbdd8b is described below

commit 3d2dbdd8b76acdb775e103353ebfc18022dc903f
Author: ddeschenes-1 <54910444+ddeschene...@users.noreply.github.com>
AuthorDate: Sun Nov 17 09:34:29 2024 -0500

    (doc) Added support for gzip extra subfields. (#604)
    
    Co-authored-by: Danny Deschenes <ddesche...@towerlogic.com>
---
 .../commons/compress/compressors/gzip/Extra.java   | 227 +++++++++++++++++++++
 .../gzip/GzipCompressorInputStream.java            |  12 +-
 .../gzip/GzipCompressorOutputStream.java           |  11 +-
 .../compress/compressors/gzip/GzipParameters.java  |  24 +++
 .../gzip/GzipCompressorOutputStreamTest.java       |  70 +++++++
 5 files changed, 335 insertions(+), 9 deletions(-)

diff --git 
a/src/main/java/org/apache/commons/compress/compressors/gzip/Extra.java 
b/src/main/java/org/apache/commons/compress/compressors/gzip/Extra.java
new file mode 100644
index 000000000..213c1f14c
--- /dev/null
+++ b/src/main/java/org/apache/commons/compress/compressors/gzip/Extra.java
@@ -0,0 +1,227 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.commons.compress.compressors.gzip;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.stream.Collectors;
+
+/**
+ * Container for subfields.
+ *
+ * This class does not expose the internal subfields list to prevent adding
+ * subfields without total extra length validation. However a copy of the list
+ * is available.
+ *
+ * @since 1.28.0
+ */
+public class Extra {
+
+    static final int MAX_SIZE = 0xFFFF;
+    static final byte[] ZERO_BYTES = new byte[0];
+
+    static Extra fromBytes(byte[] ba) throws IOException {
+        if (ba == null) {
+            return null;
+        }
+
+        Extra e = new Extra();
+
+        int pos = 0;
+        while (pos <= (ba.length - 4)) {
+            SubField f = new SubField();
+            f.si1 = ba[pos++];
+            f.si2 = ba[pos++];
+
+            int sublen = (ba[pos++] & 0xff) | ((ba[pos++] & 0xff) << 8);
+            if (sublen > (ba.length - pos)) {
+                throw new IOException("Extra subfield lenght exceeds remaining 
bytes in extra: " + sublen + " > " + (ba.length - pos));
+            }
+
+            f.payload = new byte[sublen];
+            System.arraycopy(ba, pos, f.payload, 0, sublen);
+            pos += sublen;
+
+            e.fieldsList.add(f);
+            e.totalSize = pos;
+        }
+
+        if (pos < ba.length) {
+            throw new IOException("" + (ba.length - pos) + " remaining bytes 
not used to parse an extra subfield.");
+        }
+
+        return e;
+    }
+
+    // --------------
+
+    private final List<SubField> fieldsList = new ArrayList<>();
+    private int totalSize = 0;
+
+    public Extra() {
+    }
+
+    public boolean isEmpty() {
+        return fieldsList.isEmpty();
+    }
+
+    public void clear() {
+        fieldsList.clear();
+        totalSize = 0;
+    }
+
+    /**
+     * The bytes count of this extra payload when encoded. This does not 
include its
+     * own 16 bits size. For N sub fields, the total is all subfields payloads 
+ 4N.
+     */
+    public int getEncodedSize() {
+        return totalSize;
+    }
+
+    /**
+     * The count of subfields contained in this extra.
+     */
+    public int getSize() {
+        return fieldsList.size();
+    }
+
+    /**
+     * @return an unmodifiable copy of the subfields list.
+     */
+    public List<SubField> getFieldsList() {
+        return Collections.unmodifiableList(fieldsList);
+    }
+
+    /**
+     * Append a subfield by a 2-chars ISO-8859-1 string. The char at index 0 
and 1
+     * are respectiovely si1 and si2 (subfield id 1 and 2).
+     *
+     * @throws IllegalArgumentException if the subfield is not 2-chars or the
+     *                                  payload is null
+     *
+     * @throws IOException              if appending this subfield would 
exceed the
+     *                                  max size 65535 of the extra header.
+     */
+    public Extra appendSubField(String subfieldId, byte[] payload) throws 
IOException {
+        if (subfieldId.length() != 2) {
+            throw new IllegalArgumentException("subfield id must be a 2-chars 
iso-8859-1 string.");
+        }
+        if (payload == null) {
+            throw new IllegalArgumentException("payload was null");
+        }
+
+        char si1 = subfieldId.charAt(0);
+        char si2 = subfieldId.charAt(1);
+        if ((si1 & 0xff00) != 0 || (si2 & 0xff00) != 0) {
+            throw new IllegalArgumentException("subfield id must be a 2-chars 
iso-8859-1 string.");
+        }
+
+        SubField f = new SubField((byte) (si1 & 0xff), (byte) (si2 & 0xff), 
payload);
+        int len = 4 + payload.length;
+        if (totalSize + len > MAX_SIZE) {
+            throw new IOException("extra subfield '" + f.getId() + "' too big 
(extras total size is already at " + totalSize + ")");
+        }
+
+        fieldsList.add(f);
+        totalSize += len;
+
+        return this;
+    }
+
+    byte[] toBytes() {
+        if (fieldsList.isEmpty()) {
+            return ZERO_BYTES;
+        }
+
+        byte[] ba = new byte[totalSize];
+
+        int pos = 0;
+        for (SubField f : fieldsList) {
+            ba[pos++] = f.si1;
+            ba[pos++] = f.si2;
+            ba[pos++] = (byte) (f.payload.length & 0xff); // little endian 
expected
+            ba[pos++] = (byte) (f.payload.length >>> 8);
+            System.arraycopy(f.payload, 0, ba, pos, f.payload.length);
+            pos += f.payload.length;
+        }
+        return ba;
+    }
+
+    /**
+     * Give all 2-chars ISO-8859-1 strings denoting the subfields. Note that 
this is
+     * imprecise as ids can repeat. Use the methods with indexes to find a 
specific
+     * occurence.
+     */
+    public List<String> listIds() {
+        return 
fieldsList.stream().map(SubField::getId).collect(Collectors.toList());
+    }
+
+    /**
+     * Find the 1st subfield that matches the id.
+     *
+     * @return the SubField if found, null otherwise.
+     */
+    public SubField findFirstSubField(String subfieldId) {
+        return fieldsList.stream().filter(f -> 
f.getId().equals(subfieldId)).findFirst().orElse(null);
+    }
+
+    /**
+     * Find the subfield at the given index.
+     */
+    public SubField subFieldAt(int i) {
+        return fieldsList.get(i);
+    }
+
+    /**
+     * The carrier for a subfield in the gzip extra.
+     */
+    public static class SubField {
+        byte si1;
+        byte si2;
+        byte[] payload;
+
+        SubField() {
+        }
+
+        SubField(byte si1, byte si2, byte[] payload) {
+            this.si1 = si1;
+            this.si2 = si2;
+            this.payload = payload;
+        }
+
+        /**
+         * The 2 char iso-8859-1 string made from the si1 and si2 bytes of the 
sub field
+         * id.
+         */
+        public String getId() {
+            return "" + ((char) (si1 & 0xff)) + ((char) (si2 & 0xff));
+        }
+
+        /**
+         * The subfield payload.
+         */
+        public byte[] getPayload() {
+            return payload;
+        }
+    }
+
+}
diff --git 
a/src/main/java/org/apache/commons/compress/compressors/gzip/GzipCompressorInputStream.java
 
b/src/main/java/org/apache/commons/compress/compressors/gzip/GzipCompressorInputStream.java
index bacc9fa7f..9a5885aef 100644
--- 
a/src/main/java/org/apache/commons/compress/compressors/gzip/GzipCompressorInputStream.java
+++ 
b/src/main/java/org/apache/commons/compress/compressors/gzip/GzipCompressorInputStream.java
@@ -244,17 +244,13 @@ public class GzipCompressorInputStream extends 
CompressorInputStream implements
         }
         parameters.setOperatingSystem(inData.readUnsignedByte());
 
-        // Extra field, ignored
+        // Extra field
         if ((flg & FEXTRA) != 0) {
             int xlen = inData.readUnsignedByte();
             xlen |= inData.readUnsignedByte() << 8;
-
-            // This isn't as efficient as calling in.skip would be,
-            // but it's lazier to handle unexpected end of input this way.
-            // Most files don't have an extra field anyway.
-            while (xlen-- > 0) {
-                inData.readUnsignedByte();
-            }
+            byte[] extra = new byte[xlen];
+            inData.readFully(extra);
+            parameters.setExtra(Extra.fromBytes(extra));
         }
 
         // Original file name
diff --git 
a/src/main/java/org/apache/commons/compress/compressors/gzip/GzipCompressorOutputStream.java
 
b/src/main/java/org/apache/commons/compress/compressors/gzip/GzipCompressorOutputStream.java
index d3123a967..2d84dd477 100644
--- 
a/src/main/java/org/apache/commons/compress/compressors/gzip/GzipCompressorOutputStream.java
+++ 
b/src/main/java/org/apache/commons/compress/compressors/gzip/GzipCompressorOutputStream.java
@@ -38,6 +38,9 @@ import 
org.apache.commons.compress.compressors.CompressorOutputStream;
  */
 public class GzipCompressorOutputStream extends 
CompressorOutputStream<OutputStream> {
 
+    /** Header flag indicating an EXTRA subfields collection follows the 
header */
+    private static final int FEXTRA = 1 << 2;
+
     /** Header flag indicating a file name follows the header */
     private static final int FNAME = 1 << 3;
 
@@ -170,11 +173,12 @@ public class GzipCompressorOutputStream extends 
CompressorOutputStream<OutputStr
     private void writeHeader(final GzipParameters parameters) throws 
IOException {
         final String fileName = parameters.getFileName();
         final String comment = parameters.getComment();
+        final byte[] extra = parameters.getExtra() != null ? 
parameters.getExtra().toBytes() : null;
         final ByteBuffer buffer = ByteBuffer.allocate(10);
         buffer.order(ByteOrder.LITTLE_ENDIAN);
         buffer.putShort((short) GZIPInputStream.GZIP_MAGIC);
         buffer.put((byte) Deflater.DEFLATED); // compression method (8: 
deflate)
-        buffer.put((byte) ((fileName != null ? FNAME : 0) | (comment != null ? 
FCOMMENT : 0))); // flags
+        buffer.put((byte) ((extra != null ? FEXTRA : 0) | (fileName != null ? 
FNAME : 0) | (comment != null ? FCOMMENT : 0))); // flags
         buffer.putInt((int) (parameters.getModificationTime() / 1000));
         // extra flags
         final int compressionLevel = parameters.getCompressionLevel();
@@ -187,6 +191,11 @@ public class GzipCompressorOutputStream extends 
CompressorOutputStream<OutputStr
         }
         buffer.put((byte) parameters.getOperatingSystem());
         out.write(buffer.array());
+        if (extra != null) {
+            out.write(extra.length & 0xff); // little endian
+            out.write((extra.length >>> 8) & 0xff);
+            out.write(extra);
+        }
         write(fileName, parameters.getFileNameCharset());
         write(comment, parameters.getFileNameCharset());
     }
diff --git 
a/src/main/java/org/apache/commons/compress/compressors/gzip/GzipParameters.java
 
b/src/main/java/org/apache/commons/compress/compressors/gzip/GzipParameters.java
index 77a80da45..d7cad8665 100644
--- 
a/src/main/java/org/apache/commons/compress/compressors/gzip/GzipParameters.java
+++ 
b/src/main/java/org/apache/commons/compress/compressors/gzip/GzipParameters.java
@@ -26,6 +26,7 @@ import java.util.zip.Deflater;
 
 import org.apache.commons.io.Charsets;
 
+
 /**
  * Parameters for the GZIP compressor.
  *
@@ -289,6 +290,7 @@ public class GzipParameters {
      * </p>
      */
     private Instant modificationTime = Instant.EPOCH;
+    private Extra extra;
     private String fileName;
     private Charset fileNameCharset = GzipUtils.GZIP_ENCODING;
     private String comment;
@@ -341,6 +343,16 @@ public class GzipParameters {
         return deflateStrategy;
     }
 
+    /**
+     * Gets the Extra.
+     *
+     * @return the extra.
+     * @since 1.28.0
+     */
+    public Extra getExtra() {
+        return extra;
+    }
+
     /**
      * Gets the file name.
      *
@@ -468,6 +480,18 @@ public class GzipParameters {
         this.deflateStrategy = deflateStrategy;
     }
 
+    /**
+     * Sets the Extra subfields. Note that a non-null Extra will appear in the 
gzip
+     * header regardless of the presence of subfields, while a null Extra will 
not
+     * appear at all.
+     *
+     * @param extra the collections of extra sub fields.
+     * @since 1.28.0
+     */
+    public void setExtra(Extra extra) {
+        this.extra = extra;
+    }
+
     /**
      * Sets the name of the compressed file.
      *
diff --git 
a/src/test/java/org/apache/commons/compress/compressors/gzip/GzipCompressorOutputStreamTest.java
 
b/src/test/java/org/apache/commons/compress/compressors/gzip/GzipCompressorOutputStreamTest.java
index 8454bd967..349fda86f 100644
--- 
a/src/test/java/org/apache/commons/compress/compressors/gzip/GzipCompressorOutputStreamTest.java
+++ 
b/src/test/java/org/apache/commons/compress/compressors/gzip/GzipCompressorOutputStreamTest.java
@@ -19,6 +19,7 @@
 
 package org.apache.commons.compress.compressors.gzip;
 
+import static org.junit.Assert.assertArrayEquals;
 import static org.junit.jupiter.api.Assertions.assertEquals;
 import static org.junit.jupiter.api.Assumptions.assumeTrue;
 
@@ -28,8 +29,12 @@ import java.nio.charset.Charset;
 import java.nio.charset.StandardCharsets;
 import java.nio.file.Files;
 import java.nio.file.Path;
+import java.util.Arrays;
 
+import org.apache.commons.compress.compressors.gzip.Extra.SubField;
 import org.junit.jupiter.api.Test;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.CsvSource;
 
 /**
  * Tests {@link GzipCompressorOutputStream}.
@@ -118,4 +123,69 @@ public class GzipCompressorOutputStreamTest {
         // "Test Chinese name"
         testFileName("??????.xml", EXPECTED_FILE_NAME);
     }
+
+    /**
+     * Tests the gzip extra header containing subfields.
+     *
+     * @throws IOException When the test fails.
+     */
+    @ParameterizedTest
+    @CsvSource({
+        "1,      , true",
+        "1,     0, false",
+        "1, 65531, false",
+        "1, 65532, true",
+        "2,     0, false",
+        "2, 32764, true",
+        "2, 32763, false"
+    })
+    public void testExtraSubfields(int subfieldQty, Integer payloadSize, 
boolean shouldFail) throws IOException {
+        final Path tempSourceFile = Files.createTempFile("test_gzip_extra_", 
".txt");
+        final Path targetFile = Files.createTempFile("test_gzip_extra_", 
".txt.gz");
+
+        Files.write(tempSourceFile, "Hello 
World!".getBytes(StandardCharsets.ISO_8859_1));
+
+        final GzipParameters parameters = new GzipParameters();
+        Extra extra = new Extra();
+
+        boolean failed = false;
+
+        byte[][] payloads = new byte[subfieldQty][];
+        for (int i = 0; i < subfieldQty; i++) {
+            if (payloadSize != null) {
+                payloads[i] = new byte[payloadSize];
+                Arrays.fill(payloads[i], (byte) ('a' + i));
+            }
+
+            try {
+                extra.appendSubField("z" + i, payloads[i]);
+            } catch (Exception e) {
+                failed = true;
+                break;
+            }
+        }
+
+        assertEquals(shouldFail, failed, "appending subfield " + (shouldFail ? 
"succes" : "failure") + " was not expected.");
+        if (shouldFail) {
+            return;
+        }
+
+        parameters.setExtra(extra);
+
+        try (OutputStream fos = Files.newOutputStream(targetFile);
+                GzipCompressorOutputStream gos = new 
GzipCompressorOutputStream(fos, parameters)) {
+            Files.copy(tempSourceFile, gos);
+        }
+
+        try (GzipCompressorInputStream gis = new 
GzipCompressorInputStream(Files.newInputStream(targetFile))) {
+            Extra extra2 = gis.getMetaData().getExtra();
+            for (int i = 0; i < subfieldQty; i++) {
+                SubField sf = extra2.subFieldAt(i);
+                assertEquals("z" + i, sf.getId()); // id was saved/loaded 
correctly
+                byte[] ba = sf.getPayload();
+                assertArrayEquals("field " + i + " has wrong payload", 
payloads[i], ba);
+            }
+        }
+    }
+
 }

Reply via email to