Author: bodewig Date: Sat Mar 24 05:32:31 2012 New Revision: 1304709 URL: http://svn.apache.org/viewvc?rev=1304709&view=rev Log: optionally use PAX headers when writing non-ASCII file names. COMPRESS-183
Modified: commons/proper/compress/trunk/src/changes/changes.xml commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveOutputStream.java commons/proper/compress/trunk/src/test/java/org/apache/commons/compress/archivers/tar/TarArchiveOutputStreamTest.java Modified: commons/proper/compress/trunk/src/changes/changes.xml URL: http://svn.apache.org/viewvc/commons/proper/compress/trunk/src/changes/changes.xml?rev=1304709&r1=1304708&r2=1304709&view=diff ============================================================================== --- commons/proper/compress/trunk/src/changes/changes.xml (original) +++ commons/proper/compress/trunk/src/changes/changes.xml Sat Mar 24 05:32:31 2012 @@ -46,6 +46,17 @@ The <action> type attribute can be add,u <body> <release version="1.4" date="unreleased" description="Release 1.4"> + <action issue="COMPRESS-183" type="fix" date="2012-03-24"> + The tar package now allows the encoding of file names to be + specified and can optionally use PAX extension headers to + write non-ASCII file names. + The stream classes now write (or expect to read) archives that + use the platform's native encoding for file names. Apache + Commons Compress 1.3 used to strip everything but the lower + eight bits of each character which effectively only worked for + ASCII and ISO-8859-1 file names. + This new default behavior is a breaking change. + </action> <action issue="COMPRESS-184" type="fix" date="2012-03-23"> TarArchiveInputStream failed to parse PAX headers that contained non-ASCII characters. Modified: commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveOutputStream.java URL: http://svn.apache.org/viewvc/commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveOutputStream.java?rev=1304709&r1=1304708&r2=1304709&view=diff ============================================================================== --- commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveOutputStream.java (original) +++ commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveOutputStream.java Sat Mar 24 05:32:31 2012 @@ -81,6 +81,10 @@ public class TarArchiveOutputStream exte private final ZipEncoding encoding; + private boolean addPaxHeadersForNonAsciiNames = false; + private static final ZipEncoding ASCII = + ZipEncodingHelper.getZipEncoding("ASCII"); + /** * Constructor for TarInputStream. * @param os the output stream to use @@ -172,6 +176,13 @@ public class TarArchiveOutputStream exte this.bigNumberMode = bigNumberMode; } + /** + * Whether to add a PAX extension header for non-ASCII file names. + * @since Apache Commons Compress 1.4 + */ + public void setAddPaxHeadersForNonAsciiNames(boolean b) { + addPaxHeadersForNonAsciiNames = b; + } @Deprecated @Override @@ -254,11 +265,14 @@ public class TarArchiveOutputStream exte } TarArchiveEntry entry = (TarArchiveEntry) archiveEntry; Map<String, String> paxHeaders = new HashMap<String, String>(); - final byte[] nameBytes = encoding.encode(entry.getName()).array(); + final String entryName = entry.getName(); + final byte[] nameBytes = encoding.encode(entryName).array(); + boolean paxHeaderContainsPath = false; if (nameBytes.length >= TarConstants.NAMELEN) { if (longFileMode == LONGFILE_POSIX) { - paxHeaders.put("path", entry.getName()); + paxHeaders.put("path", entryName); + paxHeaderContainsPath = true; } else if (longFileMode == LONGFILE_GNU) { // create a TarEntry for the LongLink, the contents // of which are the entry's name @@ -271,7 +285,7 @@ public class TarArchiveOutputStream exte write(0); // NUL terminator closeArchiveEntry(); } else if (longFileMode != LONGFILE_TRUNCATE) { - throw new RuntimeException("file name '" + entry.getName() + throw new RuntimeException("file name '" + entryName + "' is too long ( > " + TarConstants.NAMELEN + " bytes)"); } @@ -283,8 +297,13 @@ public class TarArchiveOutputStream exte failForBigNumbers(entry); } + if (addPaxHeadersForNonAsciiNames && !paxHeaderContainsPath + && !ASCII.canEncode(entryName)) { + paxHeaders.put("path", entryName); + } + if (paxHeaders.size() > 0) { - writePaxHeaders(entry.getName(), paxHeaders); + writePaxHeaders(entryName, paxHeaders); } entry.writeEntryHeader(recordBuf, encoding, @@ -298,7 +317,7 @@ public class TarArchiveOutputStream exte } else { currSize = entry.getSize(); } - currName = entry.getName(); + currName = entryName; haveUnclosedEntry = true; } @@ -426,7 +445,7 @@ public class TarArchiveOutputStream exte */ void writePaxHeaders(String entryName, Map<String, String> headers) throws IOException { - String name = "./PaxHeaders.X/" + entryName; + String name = "./PaxHeaders.X/" + stripTo7Bits(entryName); if (name.length() >= TarConstants.NAMELEN) { name = name.substring(0, TarConstants.NAMELEN - 1); } @@ -461,6 +480,18 @@ public class TarArchiveOutputStream exte closeArchiveEntry(); } + private String stripTo7Bits(String name) { + final int length = name.length(); + StringBuffer result = new StringBuffer(length); + for (int i = 0; i < length; i++) { + char stripped = (char) (name.charAt(i) & 0x7F); + if (stripped != 0) { // would be read as Trailing null + result.append(stripped); + } + } + return result.toString(); + } + /** * Write an EOF (end of archive) record to the tar archive. * An EOF record consists of a record of all zeros. Modified: commons/proper/compress/trunk/src/test/java/org/apache/commons/compress/archivers/tar/TarArchiveOutputStreamTest.java URL: http://svn.apache.org/viewvc/commons/proper/compress/trunk/src/test/java/org/apache/commons/compress/archivers/tar/TarArchiveOutputStreamTest.java?rev=1304709&r1=1304708&r2=1304709&view=diff ============================================================================== --- commons/proper/compress/trunk/src/test/java/org/apache/commons/compress/archivers/tar/TarArchiveOutputStreamTest.java (original) +++ commons/proper/compress/trunk/src/test/java/org/apache/commons/compress/archivers/tar/TarArchiveOutputStreamTest.java Sat Mar 24 05:32:31 2012 @@ -274,4 +274,27 @@ public class TarArchiveOutputStreamTest } } + public void testWriteNonAsciiPathNamePaxHeader() throws Exception { + String n = "\u00e4"; + TarArchiveEntry t = new TarArchiveEntry(n); + t.setSize(10 * 1024); + ByteArrayOutputStream bos = new ByteArrayOutputStream(); + TarArchiveOutputStream tos = new TarArchiveOutputStream(bos); + tos.setAddPaxHeadersForNonAsciiNames(true); + tos.putArchiveEntry(t); + tos.write(new byte[10 * 1024]); + tos.closeArchiveEntry(); + tos.close(); + byte[] data = bos.toByteArray(); + assertEquals("11 path=" + n + "\n", + new String(data, 512, 11, "UTF-8")); + FileOutputStream fos = new FileOutputStream("/tmp/x"); + fos.write(data); + fos.close(); + TarArchiveInputStream tin = + new TarArchiveInputStream(new ByteArrayInputStream(data)); + TarArchiveEntry e = tin.getNextTarEntry(); + assertEquals(n, e.getName()); + } + } \ No newline at end of file