Author: bodewig Date: Fri Mar 23 13:47:59 2012 New Revision: 1304345 URL: http://svn.apache.org/viewvc?rev=1304345&view=rev Log: properly parse non-ASCII content in PAX extension headers. COMPRESS-184
Modified: commons/proper/compress/trunk/src/changes/changes.xml commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveInputStream.java commons/proper/compress/trunk/src/test/java/org/apache/commons/compress/archivers/tar/TarArchiveInputStreamTest.java Modified: commons/proper/compress/trunk/src/changes/changes.xml URL: http://svn.apache.org/viewvc/commons/proper/compress/trunk/src/changes/changes.xml?rev=1304345&r1=1304344&r2=1304345&view=diff ============================================================================== --- commons/proper/compress/trunk/src/changes/changes.xml (original) +++ commons/proper/compress/trunk/src/changes/changes.xml Fri Mar 23 13:47:59 2012 @@ -46,6 +46,10 @@ The <action> type attribute can be add,u <body> <release version="1.4" date="unreleased" description="Release 1.4"> + <action issue="COMPRESS-184" type="fix" date="2012-03-22"> + TarArchiveInputStream failed to parse PAX headers that + contained non-ASCII characters. + </action> <action issue="COMPRESS-182" type="update" date="2012-03-02"> The tar package can now write archives that use star/GNU/BSD extensions or use the POSIX/PAX variant to store numeric Modified: commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveInputStream.java URL: http://svn.apache.org/viewvc/commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveInputStream.java?rev=1304345&r1=1304344&r2=1304345&view=diff ============================================================================== --- commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveInputStream.java (original) +++ commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveInputStream.java Fri Mar 23 13:47:59 2012 @@ -23,10 +23,10 @@ package org.apache.commons.compress.archivers.tar; +import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; -import java.io.Reader; import java.util.HashMap; import java.util.Map; import java.util.Map.Entry; @@ -311,57 +311,44 @@ public class TarArchiveInputStream exten } private void paxHeaders() throws IOException{ - Reader br = new InputStreamReader(this, "UTF-8") { - @Override - public void close() { - // make sure GC doesn't close "this" before we are done - } - }; - Map<String, String> headers = null; - try { - headers = parsePaxHeaders(br); - } finally { - // NO-OP but makes FindBugs happy - br.close(); - } - + Map<String, String> headers = parsePaxHeaders(this); getNextEntry(); // Get the actual file entry applyPaxHeadersToCurrentEntry(headers); } - Map<String, String> parsePaxHeaders(Reader br) throws IOException { + Map<String, String> parsePaxHeaders(InputStream i) throws IOException { Map<String, String> headers = new HashMap<String, String>(); // Format is "length keyword=value\n"; while(true){ // get length int ch; int len = 0; int read = 0; - while((ch = br.read()) != -1){ + while((ch = i.read()) != -1) { read++; if (ch == ' '){ // End of length string // Get keyword - StringBuffer sb = new StringBuffer(); - while((ch = br.read()) != -1){ + ByteArrayOutputStream coll = new ByteArrayOutputStream(); + while((ch = i.read()) != -1) { read++; if (ch == '='){ // end of keyword - String keyword = sb.toString(); + String keyword = coll.toString("UTF-8"); // Get rest of entry - char[] cbuf = new char[len-read]; - int got = br.read(cbuf); + byte[] rest = new byte[len - read]; + int got = i.read(rest); if (got != len - read){ throw new IOException("Failed to read " + "Paxheader. Expected " + (len - read) - + " chars, read " + + " bytes, read " + got); } // Drop trailing NL - String value = new String(cbuf, 0, - len - read - 1); + String value = new String(rest, 0, + len - read - 1, "UTF-8"); headers.put(keyword, value); break; } - sb.append((char) ch); + coll.write((byte) ch); } break; // Processed single header } Modified: commons/proper/compress/trunk/src/test/java/org/apache/commons/compress/archivers/tar/TarArchiveInputStreamTest.java URL: http://svn.apache.org/viewvc/commons/proper/compress/trunk/src/test/java/org/apache/commons/compress/archivers/tar/TarArchiveInputStreamTest.java?rev=1304345&r1=1304344&r2=1304345&view=diff ============================================================================== --- commons/proper/compress/trunk/src/test/java/org/apache/commons/compress/archivers/tar/TarArchiveInputStreamTest.java (original) +++ commons/proper/compress/trunk/src/test/java/org/apache/commons/compress/archivers/tar/TarArchiveInputStreamTest.java Fri Mar 23 13:47:59 2012 @@ -18,9 +18,9 @@ package org.apache.commons.compress.archivers.tar; +import java.io.ByteArrayInputStream; import java.io.File; import java.io.FileInputStream; -import java.io.StringReader; import java.net.URI; import java.net.URL; import java.util.Calendar; @@ -36,7 +36,8 @@ public class TarArchiveInputStreamTest { @Test public void readSimplePaxHeader() throws Exception { Map<String, String> headers = new TarArchiveInputStream(null) - .parsePaxHeaders(new StringReader("30 atime=1321711775.972059463\n")); + .parsePaxHeaders(new ByteArrayInputStream("30 atime=1321711775.972059463\n" + .getBytes("UTF-8"))); assertEquals(1, headers.size()); assertEquals("1321711775.972059463", headers.get("atime")); } @@ -44,12 +45,24 @@ public class TarArchiveInputStreamTest { @Test public void readPaxHeaderWithEmbeddedNewline() throws Exception { Map<String, String> headers = new TarArchiveInputStream(null) - .parsePaxHeaders(new StringReader("28 comment=line1\nline2\nand3\n")); + .parsePaxHeaders(new ByteArrayInputStream("28 comment=line1\nline2\nand3\n" + .getBytes("UTF-8"))); assertEquals(1, headers.size()); assertEquals("line1\nline2\nand3", headers.get("comment")); } @Test + public void readNonAsciiPaxHeader() throws Exception { + String ae = "\u00e4"; + String line = "11 path="+ ae + "\n"; + assertEquals(11, line.getBytes("UTF-8").length); + Map<String, String> headers = new TarArchiveInputStream(null) + .parsePaxHeaders(new ByteArrayInputStream(line.getBytes("UTF-8"))); + assertEquals(1, headers.size()); + assertEquals(ae, headers.get("path")); + } + + @Test public void workaroundForBrokenTimeHeader() throws Exception { URL tar = getClass().getResource("/simple-aix-native-tar.tar"); TarArchiveInputStream in = null;