This is an automated email from the ASF dual-hosted git repository. ggregory pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/commons-compress.git
The following commit(s) were added to refs/heads/master by this push: new 37047a928 Add and use ZipEncodingHelper.getZipEncoding(Charset) 37047a928 is described below commit 37047a92822ad75f9a5e33d35a7c8c4417b0903b Author: Gary Gregory <garydgreg...@gmail.com> AuthorDate: Sat Jan 13 16:38:52 2024 -0500 Add and use ZipEncodingHelper.getZipEncoding(Charset) ZipFile now uses a Charset instance variable instead of a String --- .../commons/compress/archivers/tar/TarUtils.java | 2 +- .../compress/archivers/zip/ZipEncodingHelper.java | 42 ++++++++--- .../commons/compress/archivers/zip/ZipFile.java | 81 ++++++++++++---------- .../compress/archivers/zip/ZipEncodingTest.java | 3 +- 4 files changed, 81 insertions(+), 47 deletions(-) diff --git a/src/main/java/org/apache/commons/compress/archivers/tar/TarUtils.java b/src/main/java/org/apache/commons/compress/archivers/tar/TarUtils.java index 285ed65bb..8ce32b42c 100644 --- a/src/main/java/org/apache/commons/compress/archivers/tar/TarUtils.java +++ b/src/main/java/org/apache/commons/compress/archivers/tar/TarUtils.java @@ -48,7 +48,7 @@ public class TarUtils { private static final int BYTE_MASK = 255; - static final ZipEncoding DEFAULT_ENCODING = ZipEncodingHelper.getZipEncoding(null); + static final ZipEncoding DEFAULT_ENCODING = ZipEncodingHelper.getZipEncoding(Charset.defaultCharset()); /** * Encapsulates the algorithms used up to Commons Compress 1.3 as ZipEncoding. diff --git a/src/main/java/org/apache/commons/compress/archivers/zip/ZipEncodingHelper.java b/src/main/java/org/apache/commons/compress/archivers/zip/ZipEncodingHelper.java index 75c56ba7e..c73476ed8 100644 --- a/src/main/java/org/apache/commons/compress/archivers/zip/ZipEncodingHelper.java +++ b/src/main/java/org/apache/commons/compress/archivers/zip/ZipEncodingHelper.java @@ -37,8 +37,25 @@ public abstract class ZipEncodingHelper { static final ZipEncoding ZIP_ENCODING_UTF_8 = getZipEncoding(CharsetNames.UTF_8); /** - * Instantiates a ZIP encoding. An NIO based character set encoder/decoder will be returned. As a special case, if the character set is UTF-8, theNIOencoder - * will be configured replace malformed and unmappable characters with '?'. This matches existing behavior from the older fallback encoder. + * Instantiates a ZIP encoding. An NIO based character set encoder/decoder will be returned. As a special case, if the character set is UTF-8, the NIO + * encoder will be configured replace malformed and unmappable characters with '?'. This matches existing behavior from the older fallback encoder. + * <p> + * If the requested character set cannot be found, the platform default will be used instead. + * </p> + * + * @param charset The charset of the ZIP encoding. Specify {@code null} for the platform's default encoding. + * @return A ZIP encoding for the given encoding name. + * @since 1.26.0 + */ + public static ZipEncoding getZipEncoding(final Charset charset) { + final Charset actual = Charsets.toCharset(charset); + final boolean useReplacement = isUTF8(actual); + return new NioZipEncoding(actual, useReplacement); + } + + /** + * Instantiates a ZIP encoding. An NIO based character set encoder/decoder will be returned. As a special case, if the character set is UTF-8, the NIO + * encoder will be configured replace malformed and unmappable characters with '?'. This matches existing behavior from the older fallback encoder. * <p> * If the requested character set cannot be found, the platform default will be used instead. * </p> @@ -59,23 +76,30 @@ public abstract class ZipEncodingHelper { static ByteBuffer growBufferBy(final ByteBuffer buffer, final int increment) { buffer.limit(buffer.position()); buffer.rewind(); - final ByteBuffer on = ByteBuffer.allocate(buffer.capacity() + increment); - on.put(buffer); return on; } + /** + * Tests whether a given encoding is UTF-8. If the given name is null, then check the platform's default encoding. + * + * @param charset If the given charset is null, then check the platform's default encoding. + */ + static boolean isUTF8(final Charset charset) { + return isUTF8Alias(Charsets.toCharset(charset).name()); + } + + private static boolean isUTF8Alias(final String actual) { + return UTF_8.name().equalsIgnoreCase(actual) || UTF_8.aliases().stream().anyMatch(alias -> alias.equalsIgnoreCase(actual)); + } + /** * Tests whether a given encoding is UTF-8. If the given name is null, then check the platform's default encoding. * * @param charsetName If the given name is null, then check the platform's default encoding. */ static boolean isUTF8(final String charsetName) { - final String actual = charsetName != null ? charsetName : Charset.defaultCharset().name(); - if (UTF_8.name().equalsIgnoreCase(actual)) { - return true; - } - return UTF_8.aliases().stream().anyMatch(alias -> alias.equalsIgnoreCase(actual)); + return isUTF8Alias(charsetName != null ? charsetName : Charset.defaultCharset().name()); } } diff --git a/src/main/java/org/apache/commons/compress/archivers/zip/ZipFile.java b/src/main/java/org/apache/commons/compress/archivers/zip/ZipFile.java index 2764b5753..d2791b108 100644 --- a/src/main/java/org/apache/commons/compress/archivers/zip/ZipFile.java +++ b/src/main/java/org/apache/commons/compress/archivers/zip/ZipFile.java @@ -27,6 +27,7 @@ import java.io.SequenceInputStream; import java.nio.ByteBuffer; import java.nio.channels.FileChannel; import java.nio.channels.SeekableByteChannel; +import java.nio.charset.Charset; import java.nio.charset.StandardCharsets; import java.nio.file.Files; import java.nio.file.OpenOption; @@ -53,6 +54,7 @@ import org.apache.commons.compress.utils.CharsetNames; import org.apache.commons.compress.utils.IOUtils; import org.apache.commons.compress.utils.InputStreamStatistics; import org.apache.commons.compress.utils.SeekableInMemoryByteChannel; +import org.apache.commons.io.Charsets; import org.apache.commons.io.build.AbstractOrigin.ByteArrayOrigin; import org.apache.commons.io.build.AbstractStreamBuilder; import org.apache.commons.io.input.CountingInputStream; @@ -121,13 +123,15 @@ public class ZipFile implements Closeable { */ public static class Builder extends AbstractStreamBuilder<ZipFile, Builder> { + static final Charset DEFAULT_CHARSET = StandardCharsets.UTF_8; + private SeekableByteChannel seekableByteChannel; private boolean useUnicodeExtraFields = true; private boolean ignoreLocalFileHeader; public Builder() { - setCharset(StandardCharsets.UTF_8); - setCharsetDefault(StandardCharsets.UTF_8); + setCharset(DEFAULT_CHARSET); + setCharsetDefault(DEFAULT_CHARSET); } @SuppressWarnings("resource") // caller closes @@ -151,7 +155,7 @@ public class ZipFile implements Closeable { actualDescription = path.toString(); } final boolean closeOnError = seekableByteChannel != null; - return new ZipFile(actualChannel, actualDescription, getCharset().name(), useUnicodeExtraFields, closeOnError, ignoreLocalFileHeader); + return new ZipFile(actualChannel, actualDescription, getCharset(), useUnicodeExtraFields, closeOnError, ignoreLocalFileHeader); } /** @@ -444,6 +448,16 @@ public class ZipFile implements Closeable { private static final Comparator<ZipArchiveEntry> offsetComparator = Comparator.comparingLong(ZipArchiveEntry::getDiskNumberStart) .thenComparingLong(ZipArchiveEntry::getLocalHeaderOffset); + /** + * Creates a new Builder. + * + * @return a new Builder. + * @since 1.26.0 + */ + public static Builder builder() { + return new Builder(); + } + /** * Closes a ZIP file quietly; throwing no IOException, does nothing on null input. * @@ -481,7 +495,7 @@ public class ZipFile implements Closeable { * Defaults to UTF-8. * </p> */ - private final String encoding; + private final Charset encoding; /** * The ZIP encoding to use for file names and the file comment. @@ -531,16 +545,6 @@ public class ZipFile implements Closeable { private long firstLocalFileHeaderOffset; - /** - * Creates a new Builder. - * - * @return a new Builder. - * @since 1.26.0 - */ - public static Builder builder() { - return new Builder(); - } - /** * Opens the given file for reading, assuming "UTF8" for file names. * @@ -705,6 +709,31 @@ public class ZipFile implements Closeable { this(channel, "a SeekableByteChannel", encoding, true); } + private ZipFile(final SeekableByteChannel channel, final String channelDescription, final Charset encoding, final boolean useUnicodeExtraFields, + final boolean closeOnError, final boolean ignoreLocalFileHeader) throws IOException { + this.isSplitZipArchive = channel instanceof ZipSplitReadOnlySeekableByteChannel; + this.encoding = Charsets.toCharset(encoding, Builder.DEFAULT_CHARSET); + this.zipEncoding = ZipEncodingHelper.getZipEncoding(encoding); + this.useUnicodeExtraFields = useUnicodeExtraFields; + this.archive = channel; + boolean success = false; + try { + final Map<ZipArchiveEntry, NameAndComment> entriesWithoutUTF8Flag = populateFromCentralDirectory(); + if (!ignoreLocalFileHeader) { + resolveLocalFileHeaderData(entriesWithoutUTF8Flag); + } + fillNameMap(); + success = true; + } catch (final IOException e) { + throw new IOException("Error reading Zip content from " + channelDescription, e); + } finally { + this.closed = !success; + if (!success && closeOnError) { + org.apache.commons.io.IOUtils.closeQuietly(archive); + } + } + } + /** * Opens the given channel for reading, assuming the specified encoding for file names. * <p> @@ -755,27 +784,7 @@ public class ZipFile implements Closeable { private ZipFile(final SeekableByteChannel channel, final String channelDescription, final String encoding, final boolean useUnicodeExtraFields, final boolean closeOnError, final boolean ignoreLocalFileHeader) throws IOException { - this.isSplitZipArchive = channel instanceof ZipSplitReadOnlySeekableByteChannel; - this.encoding = encoding; - this.zipEncoding = ZipEncodingHelper.getZipEncoding(encoding); - this.useUnicodeExtraFields = useUnicodeExtraFields; - this.archive = channel; - boolean success = false; - try { - final Map<ZipArchiveEntry, NameAndComment> entriesWithoutUTF8Flag = populateFromCentralDirectory(); - if (!ignoreLocalFileHeader) { - resolveLocalFileHeaderData(entriesWithoutUTF8Flag); - } - fillNameMap(); - success = true; - } catch (final IOException e) { - throw new IOException("Error reading Zip content from " + channelDescription, e); - } finally { - this.closed = !success; - if (!success && closeOnError) { - org.apache.commons.io.IOUtils.closeQuietly(archive); - } - } + this(channel, channelDescription, Charsets.toCharset(encoding), useUnicodeExtraFields, closeOnError, ignoreLocalFileHeader); } /** @@ -914,7 +923,7 @@ public class ZipFile implements Closeable { * @return null if using the platform's default character encoding. */ public String getEncoding() { - return encoding; + return encoding.name(); } /** diff --git a/src/test/java/org/apache/commons/compress/archivers/zip/ZipEncodingTest.java b/src/test/java/org/apache/commons/compress/archivers/zip/ZipEncodingTest.java index a4697ce1f..6a9c1e4df 100644 --- a/src/test/java/org/apache/commons/compress/archivers/zip/ZipEncodingTest.java +++ b/src/test/java/org/apache/commons/compress/archivers/zip/ZipEncodingTest.java @@ -111,7 +111,8 @@ public class ZipEncodingTest { public void testIsUTF8() { assertTrue(ZipEncodingHelper.isUTF8(CharsetNames.UTF_8)); assertTrue(ZipEncodingHelper.isUTF8("UTF8")); - Assertions.assertEquals(Charset.defaultCharset().name().equals(CharsetNames.UTF_8), ZipEncodingHelper.isUTF8(null)); + Assertions.assertEquals(Charset.defaultCharset().name().equals(CharsetNames.UTF_8), ZipEncodingHelper.isUTF8((Charset) null)); + Assertions.assertEquals(Charset.defaultCharset().name().equals(CharsetNames.UTF_8), ZipEncodingHelper.isUTF8((String) null)); } @Test