This is an automated email from the ASF dual-hosted git repository. ggregory pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/commons-compress.git
The following commit(s) were added to refs/heads/master by this push: new 770ea82a [COMPRESS-638] The GzipCompressorOutputStream#writeHeader() uses ISO_8859_1 to write the file name and comment. 770ea82a is described below commit 770ea82a132282fb0edb186fe6db646a1a0b7a35 Author: Gary Gregory <garydgreg...@gmail.com> AuthorDate: Sat Jan 21 09:23:35 2023 -0500 [COMPRESS-638] The GzipCompressorOutputStream#writeHeader() uses ISO_8859_1 to write the file name and comment. If the strings contains non-ISO_8859_1 characters, unknown characters are displayed after decompression. Use percent encoding for non ISO_8859_1 characters. --- src/changes/changes.xml | 5 ++++ .../gzip/GzipCompressorOutputStream.java | 29 +++++++++++++++++++--- .../gzip/GzipCompressorOutputStreamTest.java | 14 +++++------ 3 files changed, 38 insertions(+), 10 deletions(-) diff --git a/src/changes/changes.xml b/src/changes/changes.xml index 0ccf35aa..2f0341a0 100644 --- a/src/changes/changes.xml +++ b/src/changes/changes.xml @@ -52,6 +52,11 @@ The <action> type attribute can be add,update,fix,remove. <action type="fix" dev="ggregory" due-to="Arturo Bernal">Remove duplicate conditions. Use switch instead. #298.</action> <action type="fix" dev="ggregory" due-to="Robin Schimpf">Replace JUnit 3 and 4 with JUnit 5 #344, #346.</action> <action type="fix" dev="ggregory" due-to="Glavo">Make 'ZipFile.offsetComparator' static #353.</action> + <action type="fix" issue="COMPRESS-638" dev="ggregory" due-to="Radar wen, Gary Gregory, Michael Osipov"> + The GzipCompressorOutputStream#writeHeader() uses ISO_8859_1 to write the file name and comment. + If the strings contains non-ISO_8859_1 characters, unknown characters are displayed after decompression. + Use percent encoding for non ISO_8859_1 characters. + </action> <!-- ADD --> <action type="add" issue="COMPRESS-614" dev="ggregory" due-to="Andre Brait, Gary Gregory">Use FileTime for time fields in SevenZipArchiveEntry #256.</action> <action type="add" issue="COMPRESS-621" dev="ggregory" due-to="Glavo">Fix calculation the offset of the first zip central directory entry #334.</action> diff --git a/src/main/java/org/apache/commons/compress/compressors/gzip/GzipCompressorOutputStream.java b/src/main/java/org/apache/commons/compress/compressors/gzip/GzipCompressorOutputStream.java index e9d259dd..7b1975ae 100644 --- a/src/main/java/org/apache/commons/compress/compressors/gzip/GzipCompressorOutputStream.java +++ b/src/main/java/org/apache/commons/compress/compressors/gzip/GzipCompressorOutputStream.java @@ -20,8 +20,11 @@ package org.apache.commons.compress.compressors.gzip; import java.io.IOException; import java.io.OutputStream; +import java.net.URI; +import java.net.URISyntaxException; import java.nio.ByteBuffer; import java.nio.ByteOrder; +import java.nio.charset.StandardCharsets; import java.util.zip.CRC32; import java.util.zip.Deflater; import java.util.zip.GZIPInputStream; @@ -132,6 +135,27 @@ public class GzipCompressorOutputStream extends CompressorOutputStream { out.flush(); } + /** + * Gets the bytes encoded in the {@value GzipUtils#GZIP_ENCODING} Charset. + * <p> + * If the string cannot be encoded directly with {@value GzipUtils#GZIP_ENCODING}, then use URI-style percent encoding. + * </p> + * + * @param string The string to encode. + * @return + * @throws IOException + */ + private byte[] getBytes(final String string) throws IOException { + if (GzipUtils.GZIP_ENCODING.newEncoder().canEncode(string)) { + return string.getBytes(GzipUtils.GZIP_ENCODING); + } + try { + return new URI(null, null, string, null).toASCIIString().getBytes(StandardCharsets.US_ASCII); + } catch (final URISyntaxException e) { + throw new IOException(string, e); + } + } + /** * {@inheritDoc} * @@ -151,7 +175,6 @@ public class GzipCompressorOutputStream extends CompressorOutputStream { public void write(final byte[] buffer, final int offset, final int length) throws IOException { if (deflater.finished()) { throw new IOException("Cannot write more data, the end of the compressed data stream has been reached"); - } if (length > 0) { deflater.setInput(buffer, offset, length); @@ -195,12 +218,12 @@ public class GzipCompressorOutputStream extends CompressorOutputStream { out.write(buffer.array()); if (filename != null) { - out.write(filename.getBytes(GzipUtils.GZIP_ENCODING)); + out.write(getBytes(filename)); out.write(0); } if (comment != null) { - out.write(comment.getBytes(GzipUtils.GZIP_ENCODING)); + out.write(getBytes(comment)); out.write(0); } } diff --git a/src/test/java/org/apache/commons/compress/compressors/gzip/GzipCompressorOutputStreamTest.java b/src/test/java/org/apache/commons/compress/compressors/gzip/GzipCompressorOutputStreamTest.java index 70bd8833..aa6922d8 100644 --- a/src/test/java/org/apache/commons/compress/compressors/gzip/GzipCompressorOutputStreamTest.java +++ b/src/test/java/org/apache/commons/compress/compressors/gzip/GzipCompressorOutputStreamTest.java @@ -27,7 +27,6 @@ import java.nio.charset.StandardCharsets; import java.nio.file.Files; import java.nio.file.Path; -import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; /** @@ -35,7 +34,7 @@ import org.junit.jupiter.api.Test; */ public class GzipCompressorOutputStreamTest { - private void testFileName(final String sourceFile) throws IOException { + private void testFileName(final String expected, final String sourceFile) throws IOException { final Path tempSourceFile = Files.createTempFile(sourceFile, sourceFile); Files.write(tempSourceFile, "<text>Hello World!</text>".getBytes(StandardCharsets.ISO_8859_1)); final Path targetFile = Files.createTempFile("test", ".gz"); @@ -45,25 +44,26 @@ public class GzipCompressorOutputStreamTest { Files.copy(tempSourceFile, gos); } try (GzipCompressorInputStream gis = new GzipCompressorInputStream(Files.newInputStream(targetFile))) { - assertEquals(sourceFile, gis.getMetaData().getFilename()); + assertEquals(expected, gis.getMetaData().getFilename()); } } @Test public void testFileNameAscii() throws IOException { - testFileName("ASCII.xml"); + testFileName("ASCII.xml", "ASCII.xml"); } /** * Tests COMPRESS-638. * + * GZip RFC requires ISO 8859-1 (LATIN-1). + * * @throws IOException When the test fails. */ @Test - @Disabled("COMPRESS-638") - public void testFileNameChinese() throws IOException { + public void testFileNameChinesePercentEncoded() throws IOException { // "Test Chinese name" - testFileName("\u6D4B\u8BD5\u4E2D\u6587\u540D\u79F0.xml"); + testFileName("%E6%B5%8B%E8%AF%95%E4%B8%AD%E6%96%87%E5%90%8D%E7%A7%B0.xml", "\u6D4B\u8BD5\u4E2D\u6587\u540D\u79F0.xml"); } }