COMPRESS-410 Remove Non-NIO character set encoders. As a special case, the UTF-8 encoder will replace malformed / unmappable input with '?'. This behavior is required for compatibility with existing behavior.
Signed-off-by: Simon Spero <sesunc...@gmail.com> (cherry picked from commit 1987719) Signed-off-by: Simon Spero <sesunc...@gmail.com> Project: http://git-wip-us.apache.org/repos/asf/commons-compress/repo Commit: http://git-wip-us.apache.org/repos/asf/commons-compress/commit/db586bae Tree: http://git-wip-us.apache.org/repos/asf/commons-compress/tree/db586bae Diff: http://git-wip-us.apache.org/repos/asf/commons-compress/diff/db586bae Branch: refs/heads/master Commit: db586baee29fc90f79898e9a274cc1bd585b5f53 Parents: cb590b3 Author: Simon Spero <sesunc...@gmail.com> Authored: Fri Jun 16 20:17:13 2017 -0400 Committer: Stefan Bodewig <bode...@apache.org> Committed: Wed Jul 5 16:30:00 2017 +0200 ---------------------------------------------------------------------- .../compress/archivers/zip/NioZipEncoding.java | 80 ++++++++++++++------ 1 file changed, 55 insertions(+), 25 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/commons-compress/blob/db586bae/src/main/java/org/apache/commons/compress/archivers/zip/NioZipEncoding.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/commons/compress/archivers/zip/NioZipEncoding.java b/src/main/java/org/apache/commons/compress/archivers/zip/NioZipEncoding.java index ffd2efd..6f0306b 100644 --- a/src/main/java/org/apache/commons/compress/archivers/zip/NioZipEncoding.java +++ b/src/main/java/org/apache/commons/compress/archivers/zip/NioZipEncoding.java @@ -23,6 +23,7 @@ import java.io.IOException; import java.nio.ByteBuffer; import java.nio.CharBuffer; import java.nio.charset.Charset; +import java.nio.charset.CharsetDecoder; import java.nio.charset.CharsetEncoder; import java.nio.charset.CoderResult; import java.nio.charset.CodingErrorAction; @@ -30,54 +31,84 @@ import java.nio.charset.CodingErrorAction; /** * A ZipEncoding, which uses a java.nio {@link * java.nio.charset.Charset Charset} to encode names. - * - * <p>This implementation works for all cases under java-1.5 or - * later. However, in java-1.4, some charsets don't have a java.nio - * implementation, most notably the default ZIP encoding Cp437.</p> - * * <p>The methods of this class are reentrant.</p> * @Immutable */ -class NioZipEncoding implements ZipEncoding { +class NioZipEncoding implements ZipEncoding,HasCharset { + private final Charset charset; + private boolean useReplacement= false; + private static final byte[] REPLACEMENT_BYTES = new byte[]{'?'}; + private static final String REPLACEMENT_STRING = "?"; /** * Construct an NIO based zip encoding, which wraps the given * charset. - * + * * @param charset The NIO charset to wrap. */ - public NioZipEncoding(final Charset charset) { + NioZipEncoding(final Charset charset) { this.charset = charset; } + NioZipEncoding(final Charset charset, boolean useReplacement) { + this(charset); + this.useReplacement = useReplacement; + + } + + @Override + public Charset getCharset() { + return charset; + } + /** - * @see - * org.apache.commons.compress.archivers.zip.ZipEncoding#canEncode(java.lang.String) + * @see ZipEncoding#canEncode(java.lang.String) */ @Override public boolean canEncode(final String name) { - final CharsetEncoder enc = this.charset.newEncoder(); - enc.onMalformedInput(CodingErrorAction.REPORT); - enc.onUnmappableCharacter(CodingErrorAction.REPORT); + final CharsetEncoder enc = newEncoder(); return enc.canEncode(name); } + private CharsetEncoder newEncoder() { + if (useReplacement) { + return charset.newEncoder() + .onMalformedInput(CodingErrorAction.REPLACE) + .onUnmappableCharacter(CodingErrorAction.REPLACE) + .replaceWith(REPLACEMENT_BYTES); + } else { + return charset.newEncoder() + .onMalformedInput(CodingErrorAction.REPORT) + .onUnmappableCharacter(CodingErrorAction.REPORT); + } + } + + private CharsetDecoder newDecoder() { + if (!useReplacement) { + return this.charset.newDecoder() + .onMalformedInput(CodingErrorAction.REPORT) + .onUnmappableCharacter(CodingErrorAction.REPORT); + } else { + return charset.newDecoder() + .onMalformedInput(CodingErrorAction.REPLACE) + .onUnmappableCharacter(CodingErrorAction.REPLACE) + .replaceWith(REPLACEMENT_STRING); + } + } + + /** - * @see - * org.apache.commons.compress.archivers.zip.ZipEncoding#encode(java.lang.String) + * @see ZipEncoding#encode(java.lang.String) */ @Override public ByteBuffer encode(final String name) { - final CharsetEncoder enc = this.charset.newEncoder(); - - enc.onMalformedInput(CodingErrorAction.REPORT); - enc.onUnmappableCharacter(CodingErrorAction.REPORT); + final CharsetEncoder enc = newEncoder(); final CharBuffer cb = CharBuffer.wrap(name); - ByteBuffer out = ByteBuffer.allocate(name.length() - + (name.length() + 1) / 2); + int estimatedSize = (int) Math.ceil(name.length() * enc.averageBytesPerChar()); + ByteBuffer out = ByteBuffer.allocate(estimatedSize); while (cb.remaining() > 0) { final CoderResult res = enc.encode(cb, out,true); @@ -114,13 +145,12 @@ class NioZipEncoding implements ZipEncoding { /** * @see - * org.apache.commons.compress.archivers.zip.ZipEncoding#decode(byte[]) + * ZipEncoding#decode(byte[]) */ @Override public String decode(final byte[] data) throws IOException { - return this.charset.newDecoder() - .onMalformedInput(CodingErrorAction.REPORT) - .onUnmappableCharacter(CodingErrorAction.REPORT) + return newDecoder() .decode(ByteBuffer.wrap(data)).toString(); } + }