Author: bodewig Date: Sun Mar 18 17:16:51 2012 New Revision: 1302170 URL: http://svn.apache.org/viewvc?rev=1302170&view=rev Log: infrastructure for non-ASCII encoding of file names in tar. COMPRESS-183
Modified: commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveEntry.java commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveInputStream.java commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveOutputStream.java commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/tar/TarUtils.java commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/zip/ZipEncoding.java commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/zip/ZipEncodingHelper.java commons/proper/compress/trunk/src/test/java/org/apache/commons/compress/archivers/TarTestCase.java Modified: commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveEntry.java URL: http://svn.apache.org/viewvc/commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveEntry.java?rev=1302170&r1=1302169&r2=1302170&view=diff ============================================================================== --- commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveEntry.java (original) +++ commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveEntry.java Sun Mar 18 17:16:51 2012 @@ -19,11 +19,13 @@ package org.apache.commons.compress.archivers.tar; import java.io.File; +import java.io.IOException; import java.nio.ByteBuffer; import java.util.Date; import java.util.Locale; import org.apache.commons.compress.archivers.ArchiveEntry; +import org.apache.commons.compress.archivers.zip.ZipEncoding; /** * This class represents an entry in a Tar archive. It consists @@ -177,7 +179,7 @@ public class TarArchiveEntry implements /** * Construct an empty entry and prepares the header values. */ - private TarArchiveEntry () { + private TarArchiveEntry() { this.magic = MAGIC_POSIX; this.version = VERSION_POSIX; this.name = ""; @@ -307,8 +309,30 @@ public class TarArchiveEntry implements * @throws IllegalArgumentException if any of the numeric fields have an invalid format */ public TarArchiveEntry(byte[] headerBuf) { + this(headerBuf, null); + } + + /** + * Construct an entry from an archive's header bytes. File is set + * to null. + * + * @param headerBuf The header bytes from a tar archive entry. + * @param encoding encoding to use for file names + * @since Commons Compress 1.4 + * @throws IllegalArgumentException if any of the numeric fields have an invalid format + */ + public TarArchiveEntry(byte[] headerBuf, ZipEncoding encoding) { this(); - parseTarHeader(headerBuf); + try { + parseTarHeader(headerBuf, encoding); + } catch (IOException ex) { + try { + parseTarHeader(headerBuf, encoding, true); + } catch (IOException ex2) { + // impossible + throw new RuntimeException(ex2); + } + } } /** @@ -865,9 +889,39 @@ public class TarArchiveEntry implements * @throws IllegalArgumentException if any of the numeric fields have an invalid format */ public void parseTarHeader(byte[] header) { + try { + parseTarHeader(header, TarUtils.DEFAULT_ENCODING); + } catch (IOException ex) { + try { + parseTarHeader(header, TarUtils.DEFAULT_ENCODING, true); + } catch (IOException ex2) { + // not really possible + throw new RuntimeException(ex2); + } + } + } + + /** + * Parse an entry's header information from a header buffer. + * + * @param header The tar entry header buffer to get information from. + * @param encoding encoding to use for file names + * @since Commons Compress 1.4 + * @throws IllegalArgumentException if any of the numeric fields + * have an invalid format + */ + public void parseTarHeader(byte[] header, ZipEncoding encoding) + throws IOException { + parseTarHeader(header, encoding, false); + } + + private void parseTarHeader(byte[] header, ZipEncoding encoding, + final boolean oldStyle) + throws IOException { int offset = 0; - name = TarUtils.parseName(header, offset, NAMELEN); + name = oldStyle ? TarUtils.parseName(header, offset, NAMELEN) + : TarUtils.parseName(header, offset, NAMELEN, encoding); offset += NAMELEN; mode = (int) TarUtils.parseOctalOrBinary(header, offset, MODELEN); offset += MODELEN; @@ -881,15 +935,18 @@ public class TarArchiveEntry implements offset += MODTIMELEN; offset += CHKSUMLEN; linkFlag = header[offset++]; - linkName = TarUtils.parseName(header, offset, NAMELEN); + linkName = oldStyle ? TarUtils.parseName(header, offset, NAMELEN) + : TarUtils.parseName(header, offset, NAMELEN, encoding); offset += NAMELEN; magic = TarUtils.parseName(header, offset, MAGICLEN); offset += MAGICLEN; version = TarUtils.parseName(header, offset, VERSIONLEN); offset += VERSIONLEN; - userName = TarUtils.parseName(header, offset, UNAMELEN); + userName = oldStyle ? TarUtils.parseName(header, offset, UNAMELEN) + : TarUtils.parseName(header, offset, UNAMELEN, encoding); offset += UNAMELEN; - groupName = TarUtils.parseName(header, offset, GNAMELEN); + groupName = oldStyle ? TarUtils.parseName(header, offset, GNAMELEN) + : TarUtils.parseName(header, offset, GNAMELEN, encoding); offset += GNAMELEN; devMajor = (int) TarUtils.parseOctalOrBinary(header, offset, DEVLEN); offset += DEVLEN; @@ -913,7 +970,9 @@ public class TarArchiveEntry implements } case FORMAT_POSIX: default: { - String prefix = TarUtils.parseName(header, offset, PREFIXLEN); + String prefix = oldStyle + ? TarUtils.parseName(header, offset, PREFIXLEN) + : TarUtils.parseName(header, offset, PREFIXLEN, encoding); // SunOS tar -E does not add / to directory names, so fix // up to be consistent if (isDirectory() && !name.endsWith("/")){ Modified: commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveInputStream.java URL: http://svn.apache.org/viewvc/commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveInputStream.java?rev=1302170&r1=1302169&r2=1302170&view=diff ============================================================================== --- commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveInputStream.java (original) +++ commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveInputStream.java Sun Mar 18 17:16:51 2012 @@ -33,6 +33,8 @@ import java.util.Map.Entry; import org.apache.commons.compress.archivers.ArchiveEntry; import org.apache.commons.compress.archivers.ArchiveInputStream; +import org.apache.commons.compress.archivers.zip.ZipEncoding; +import org.apache.commons.compress.archivers.zip.ZipEncodingHelper; import org.apache.commons.compress.utils.ArchiveUtils; /** @@ -52,6 +54,7 @@ public class TarArchiveInputStream exten private byte[] readBuf; protected final TarBuffer buffer; private TarArchiveEntry currEntry; + private final ZipEncoding encoding; /** * Constructor for TarInputStream. @@ -64,6 +67,16 @@ public class TarArchiveInputStream exten /** * Constructor for TarInputStream. * @param is the input stream to use + * @param encoding name of the encoding to use for file names + * @since Commons Compress 1.4 + */ + public TarArchiveInputStream(InputStream is, String encoding) { + this(is, TarBuffer.DEFAULT_BLKSIZE, TarBuffer.DEFAULT_RCDSIZE, encoding); + } + + /** + * Constructor for TarInputStream. + * @param is the input stream to use * @param blockSize the block size to use */ public TarArchiveInputStream(InputStream is, int blockSize) { @@ -74,12 +87,38 @@ public class TarArchiveInputStream exten * Constructor for TarInputStream. * @param is the input stream to use * @param blockSize the block size to use + * @param encoding name of the encoding to use for file names + * @since Commons Compress 1.4 + */ + public TarArchiveInputStream(InputStream is, int blockSize, + String encoding) { + this(is, blockSize, TarBuffer.DEFAULT_RCDSIZE, encoding); + } + + /** + * Constructor for TarInputStream. + * @param is the input stream to use + * @param blockSize the block size to use * @param recordSize the record size to use */ public TarArchiveInputStream(InputStream is, int blockSize, int recordSize) { + this(is, blockSize, recordSize, null); + } + + /** + * Constructor for TarInputStream. + * @param is the input stream to use + * @param blockSize the block size to use + * @param recordSize the record size to use + * @param encoding name of the encoding to use for file names + * @since Commons Compress 1.4 + */ + public TarArchiveInputStream(InputStream is, int blockSize, int recordSize, + String encoding) { this.buffer = new TarBuffer(is, blockSize, recordSize); this.readBuf = null; this.hasHitEOF = false; + this.encoding = ZipEncodingHelper.getZipEncoding(encoding); } /** @@ -196,7 +235,7 @@ public class TarArchiveInputStream exten } try { - currEntry = new TarArchiveEntry(headerBuf); + currEntry = new TarArchiveEntry(headerBuf, encoding); } catch (IllegalArgumentException e) { IOException ioe = new IOException("Error detected parsing the header"); ioe.initCause(e); Modified: commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveOutputStream.java URL: http://svn.apache.org/viewvc/commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveOutputStream.java?rev=1302170&r1=1302169&r2=1302170&view=diff ============================================================================== --- commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveOutputStream.java (original) +++ commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveOutputStream.java Sun Mar 18 17:16:51 2012 @@ -26,6 +26,8 @@ import java.util.HashMap; import java.util.Map; import org.apache.commons.compress.archivers.ArchiveEntry; import org.apache.commons.compress.archivers.ArchiveOutputStream; +import org.apache.commons.compress.archivers.zip.ZipEncoding; +import org.apache.commons.compress.archivers.zip.ZipEncodingHelper; import org.apache.commons.compress.utils.ArchiveUtils; import org.apache.commons.compress.utils.CountingOutputStream; @@ -77,6 +79,8 @@ public class TarArchiveOutputStream exte private final OutputStream out; + private final ZipEncoding encoding; + /** * Constructor for TarInputStream. * @param os the output stream to use @@ -88,6 +92,16 @@ public class TarArchiveOutputStream exte /** * Constructor for TarInputStream. * @param os the output stream to use + * @param encoding name of the encoding to use for file names + * @since Commons Compress 1.4 + */ + public TarArchiveOutputStream(OutputStream os, String encoding) { + this(os, TarBuffer.DEFAULT_BLKSIZE, TarBuffer.DEFAULT_RCDSIZE, encoding); + } + + /** + * Constructor for TarInputStream. + * @param os the output stream to use * @param blockSize the block size to use */ public TarArchiveOutputStream(OutputStream os, int blockSize) { @@ -98,10 +112,36 @@ public class TarArchiveOutputStream exte * Constructor for TarInputStream. * @param os the output stream to use * @param blockSize the block size to use + * @param encoding name of the encoding to use for file names + * @since Commons Compress 1.4 + */ + public TarArchiveOutputStream(OutputStream os, int blockSize, + String encoding) { + this(os, blockSize, TarBuffer.DEFAULT_RCDSIZE, encoding); + } + + /** + * Constructor for TarInputStream. + * @param os the output stream to use + * @param blockSize the block size to use * @param recordSize the record size to use */ public TarArchiveOutputStream(OutputStream os, int blockSize, int recordSize) { + this(os, blockSize, recordSize, null); + } + + /** + * Constructor for TarInputStream. + * @param os the output stream to use + * @param blockSize the block size to use + * @param recordSize the record size to use + * @param encoding name of the encoding to use for file names + * @since Commons Compress 1.4 + */ + public TarArchiveOutputStream(OutputStream os, int blockSize, + int recordSize, String encoding) { out = new CountingOutputStream(os); + this.encoding = ZipEncodingHelper.getZipEncoding(encoding); this.buffer = new TarBuffer(out, blockSize, recordSize); this.assemLen = 0; Modified: commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/tar/TarUtils.java URL: http://svn.apache.org/viewvc/commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/tar/TarUtils.java?rev=1302170&r1=1302169&r2=1302170&view=diff ============================================================================== --- commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/tar/TarUtils.java (original) +++ commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/tar/TarUtils.java Sun Mar 18 17:16:51 2012 @@ -18,7 +18,11 @@ */ package org.apache.commons.compress.archivers.tar; +import java.io.IOException; import java.math.BigInteger; +import java.nio.ByteBuffer; +import org.apache.commons.compress.archivers.zip.ZipEncoding; +import org.apache.commons.compress.archivers.zip.ZipEncodingHelper; /** * This class provides static utility methods to work with byte streams. @@ -30,6 +34,9 @@ public class TarUtils { private static final int BYTE_MASK = 255; + static final ZipEncoding DEFAULT_ENCODING = + ZipEncodingHelper.getZipEncoding(null); + /** Private constructor to prevent instantiation of this utility class. */ private TarUtils(){ } @@ -211,6 +218,19 @@ public class TarUtils { * @return The entry name. */ public static String parseName(byte[] buffer, final int offset, final int length) { + try { + return parseName(buffer, offset, length, DEFAULT_ENCODING); + } catch (IOException ex) { + return parseNameFallback(buffer, offset, length); + } + } + + /* + * Used if default encoding cannot encode name and no explicit + * encoding has been specified. + */ + private static String parseNameFallback(byte[] buffer, final int offset, + final int length) { StringBuffer result = new StringBuffer(length); int end = offset + length; @@ -226,7 +246,38 @@ public class TarUtils { } /** - * Copy a name (StringBuffer) into a buffer. + * Parse an entry name from a buffer. + * Parsing stops when a NUL is found + * or the buffer length is reached. + * + * @param buffer The buffer from which to parse. + * @param offset The offset into the buffer from which to parse. + * @param length The maximum number of bytes to parse. + * @param encoding name of the encoding to use for file names + * @since Commons Compress 1.4 + * @return The entry name. + */ + public static String parseName(byte[] buffer, final int offset, + final int length, + final ZipEncoding encoding) + throws IOException { + + int len = length; + for (; len > 0; len--) { + if (buffer[offset + len - 1] != 0) { + break; + } + } + if (len > 0) { + byte[] b = new byte[len]; + System.arraycopy(buffer, offset, b, 0, len); + return encoding.decode(b); + } + return ""; + } + + /** + * Copy a name into a buffer. * Copies characters from the name into the buffer * starting at the specified offset. * If the buffer is longer than the name, the buffer @@ -241,6 +292,20 @@ public class TarUtils { * @return The updated offset, i.e. offset + length */ public static int formatNameBytes(String name, byte[] buf, final int offset, final int length) { + try { + return formatNameBytes(name, buf, offset, length, DEFAULT_ENCODING); + } catch (IOException ex) { + return formatNameBytesFallback(name, buf, offset, length); + } + } + + /* + * Used if default encoding cannot format name and no explicit encoding + * has been specified. + */ + private static int formatNameBytesFallback(String name, byte[] buf, + final int offset, + final int length) { int i; // copy until end of input or output is reached. @@ -257,6 +322,43 @@ public class TarUtils { } /** + * Copy a name (StringBuffer) into a buffer. + * Copies characters from the name into the buffer + * starting at the specified offset. + * If the buffer is longer than the name, the buffer + * is filled with trailing NULs. + * If the name is longer than the buffer, + * the output is truncated. + * + * @param name The header name from which to copy the characters. + * @param buf The buffer where the name is to be stored. + * @param offset The starting offset into the buffer + * @param length The maximum number of header bytes to copy. + * @param encoding name of the encoding to use for file names + * @since Commons Compress 1.4 + * @return The updated offset, i.e. offset + length + */ + public static int formatNameBytes(String name, byte[] buf, final int offset, + final int length, + final ZipEncoding encoding) + throws IOException { + int len = name.length(); + ByteBuffer b = encoding.encode(name); + while (b.limit() > length && len > 0) { + b = encoding.encode(name.substring(0, --len)); + } + final int limit = b.limit(); + System.arraycopy(b.array(), b.arrayOffset(), buf, offset, limit); + + // Pad any remaining output bytes with NUL + for (int i = limit; i < length; ++i) { + buf[offset + i] = 0; + } + + return offset + length; + } + + /** * Fill buffer with unsigned octal number, padded with leading zeroes. * * @param value number to convert to octal - treated as unsigned Modified: commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/zip/ZipEncoding.java URL: http://svn.apache.org/viewvc/commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/zip/ZipEncoding.java?rev=1302170&r1=1302169&r2=1302170&view=diff ============================================================================== --- commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/zip/ZipEncoding.java (original) +++ commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/zip/ZipEncoding.java Sun Mar 18 17:16:51 2012 @@ -41,7 +41,7 @@ import java.nio.ByteBuffer; * <p>All implementations should implement this interface in a * reentrant way.</p> */ -interface ZipEncoding { +public interface ZipEncoding { /** * Check, whether the given string may be losslessly encoded using this * encoding. Modified: commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/zip/ZipEncodingHelper.java URL: http://svn.apache.org/viewvc/commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/zip/ZipEncodingHelper.java?rev=1302170&r1=1302169&r2=1302170&view=diff ============================================================================== --- commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/zip/ZipEncodingHelper.java (original) +++ commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/zip/ZipEncodingHelper.java Sun Mar 18 17:16:51 2012 @@ -27,7 +27,7 @@ import java.util.Map; /** * Static helper functions for robustly encoding filenames in zip files. */ -abstract class ZipEncodingHelper { +public abstract class ZipEncodingHelper { /** * A class, which holds the high characters of a simple encoding @@ -207,7 +207,7 @@ abstract class ZipEncodingHelper { * the platform's default encoding. * @return A zip encoding for the given encoding name. */ - static ZipEncoding getZipEncoding(String name) { + public static ZipEncoding getZipEncoding(String name) { // fallback encoding is good enough for utf-8. if (isUTF8(name)) { Modified: commons/proper/compress/trunk/src/test/java/org/apache/commons/compress/archivers/TarTestCase.java URL: http://svn.apache.org/viewvc/commons/proper/compress/trunk/src/test/java/org/apache/commons/compress/archivers/TarTestCase.java?rev=1302170&r1=1302169&r2=1302170&view=diff ============================================================================== --- commons/proper/compress/trunk/src/test/java/org/apache/commons/compress/archivers/TarTestCase.java (original) +++ commons/proper/compress/trunk/src/test/java/org/apache/commons/compress/archivers/TarTestCase.java Sun Mar 18 17:16:51 2012 @@ -117,7 +117,8 @@ public final class TarTestCase extends A public void testCOMPRESS114() throws Exception { final File input = getFile("COMPRESS-114.tar"); final InputStream is = new FileInputStream(input); - final ArchiveInputStream in = new ArchiveStreamFactory().createArchiveInputStream("tar", is); + final ArchiveInputStream in = new TarArchiveInputStream(is, + "iso-8859-1"); TarArchiveEntry entry = (TarArchiveEntry)in.getNextEntry(); assertEquals("3\u00b1\u00b1\u00b1F06\u00b1W2345\u00b1ZB\u00b1la\u00b1\u00b1\u00b1\u00b1\u00b1\u00b1\u00b1\u00b1BLA", entry.getName()); entry = (TarArchiveEntry)in.getNextEntry();