Repository: commons-compress
Updated Branches:
  refs/heads/master 7a3889d47 -> fd69d8ca2


COMPRESS-469 add lenient flag that allows accepting some broken tars


Project: http://git-wip-us.apache.org/repos/asf/commons-compress/repo
Commit: http://git-wip-us.apache.org/repos/asf/commons-compress/commit/fd69d8ca
Tree: http://git-wip-us.apache.org/repos/asf/commons-compress/tree/fd69d8ca
Diff: http://git-wip-us.apache.org/repos/asf/commons-compress/diff/fd69d8ca

Branch: refs/heads/master
Commit: fd69d8ca2212d373a5a8f220530e8c0fa17ddf3b
Parents: 7a3889d
Author: Stefan Bodewig <bode...@apache.org>
Authored: Sat Nov 10 21:28:54 2018 +0100
Committer: Stefan Bodewig <bode...@apache.org>
Committed: Sat Nov 10 21:28:54 2018 +0100

----------------------------------------------------------------------
 src/changes/changes.xml                         |  4 ++
 .../compress/archivers/tar/TarArchiveEntry.java | 55 ++++++++++++++++----
 .../archivers/tar/TarArchiveInputStream.java    | 31 ++++++++++-
 .../commons/compress/archivers/TarTestCase.java |  9 ++++
 4 files changed, 88 insertions(+), 11 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/commons-compress/blob/fd69d8ca/src/changes/changes.xml
----------------------------------------------------------------------
diff --git a/src/changes/changes.xml b/src/changes/changes.xml
index c220dc0..5b9619a 100644
--- a/src/changes/changes.xml
+++ b/src/changes/changes.xml
@@ -54,6 +54,10 @@ The <action> type attribute can be add,update,fix,remove.
         cost of potentially missing important information. See the
         javadocs of the ZipFile class for details.
       </action>
+      <action issue="COMPRESS-469" type="add" date="2018-11-10">
+        TarArchiveInputStream has a new constructor-arg lenient that
+        can be used to accept certain broken archives.
+      </action>
     </release>
     <release version="1.18" date="2018-08-16"
              description="Release 1.18">

http://git-wip-us.apache.org/repos/asf/commons-compress/blob/fd69d8ca/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveEntry.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveEntry.java 
b/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveEntry.java
index b6b735a..e5c3a06 100644
--- 
a/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveEntry.java
+++ 
b/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveEntry.java
@@ -146,6 +146,13 @@ import org.apache.commons.compress.utils.ArchiveUtils;
 public class TarArchiveEntry implements ArchiveEntry, TarConstants {
     private static final TarArchiveEntry[] EMPTY_TAR_ARCHIVE_ENTRIES = new 
TarArchiveEntry[0];
 
+    /**
+     * Value used to indicate unknown mode, user/groupids, device numbers and 
modTime when parsing a file in lenient
+     * mode an the archive contains illegal fields.
+     * @since 1.19
+     */
+    private static final long UNKNOWN = -1l;
+
     /** The entry's name. */
     private String name = "";
 
@@ -400,8 +407,25 @@ public class TarArchiveEntry implements ArchiveEntry, 
TarConstants {
      */
     public TarArchiveEntry(final byte[] headerBuf, final ZipEncoding encoding)
         throws IOException {
+        this(headerBuf, encoding, false);
+    }
+
+    /**
+     * Construct an entry from an archive's header bytes. File is set
+     * to null.
+     *
+     * @param headerBuf The header bytes from a tar archive entry.
+     * @param encoding encoding to use for file names
+     * @param lenient when set to true illegal values for group/userid, mode, 
device numbers and timestamp will be
+     * ignored and the fields set to {@link #UNKNOWN}. When set to false such 
illegal fields cause an exception instead.
+     * @since 1.19
+     * @throws IllegalArgumentException if any of the numeric fields have an 
invalid format
+     * @throws IOException on error
+     */
+    public TarArchiveEntry(final byte[] headerBuf, final ZipEncoding encoding, 
boolean lenient)
+        throws IOException {
         this(false);
-        parseTarHeader(headerBuf, encoding);
+        parseTarHeader(headerBuf, encoding, false, lenient);
     }
 
     /**
@@ -1243,7 +1267,7 @@ public class TarArchiveEntry implements ArchiveEntry, 
TarConstants {
             parseTarHeader(header, TarUtils.DEFAULT_ENCODING);
         } catch (final IOException ex) { // NOSONAR
             try {
-                parseTarHeader(header, TarUtils.DEFAULT_ENCODING, true);
+                parseTarHeader(header, TarUtils.DEFAULT_ENCODING, true, false);
             } catch (final IOException ex2) {
                 // not really possible
                 throw new RuntimeException(ex2); //NOSONAR
@@ -1263,26 +1287,26 @@ public class TarArchiveEntry implements ArchiveEntry, 
TarConstants {
      */
     public void parseTarHeader(final byte[] header, final ZipEncoding encoding)
         throws IOException {
-        parseTarHeader(header, encoding, false);
+        parseTarHeader(header, encoding, false, false);
     }
 
     private void parseTarHeader(final byte[] header, final ZipEncoding 
encoding,
-                                final boolean oldStyle)
+                                final boolean oldStyle, final boolean lenient)
         throws IOException {
         int offset = 0;
 
         name = oldStyle ? TarUtils.parseName(header, offset, NAMELEN)
             : TarUtils.parseName(header, offset, NAMELEN, encoding);
         offset += NAMELEN;
-        mode = (int) TarUtils.parseOctalOrBinary(header, offset, MODELEN);
+        mode = (int) parseOctalOrBinary(header, offset, MODELEN, lenient);
         offset += MODELEN;
-        userId = (int) TarUtils.parseOctalOrBinary(header, offset, UIDLEN);
+        userId = (int) parseOctalOrBinary(header, offset, UIDLEN, lenient);
         offset += UIDLEN;
-        groupId = (int) TarUtils.parseOctalOrBinary(header, offset, GIDLEN);
+        groupId = (int) parseOctalOrBinary(header, offset, GIDLEN, lenient);
         offset += GIDLEN;
         size = TarUtils.parseOctalOrBinary(header, offset, SIZELEN);
         offset += SIZELEN;
-        modTime = TarUtils.parseOctalOrBinary(header, offset, MODTIMELEN);
+        modTime = parseOctalOrBinary(header, offset, MODTIMELEN, lenient);
         offset += MODTIMELEN;
         checkSumOK = TarUtils.verifyCheckSum(header);
         offset += CHKSUMLEN;
@@ -1301,9 +1325,9 @@ public class TarArchiveEntry implements ArchiveEntry, 
TarConstants {
             : TarUtils.parseName(header, offset, GNAMELEN, encoding);
         offset += GNAMELEN;
         if (linkFlag == LF_CHR || linkFlag == LF_BLK) {
-            devMajor = (int) TarUtils.parseOctalOrBinary(header, offset, 
DEVLEN);
+            devMajor = (int) parseOctalOrBinary(header, offset, DEVLEN, 
lenient);
             offset += DEVLEN;
-            devMinor = (int) TarUtils.parseOctalOrBinary(header, offset, 
DEVLEN);
+            devMinor = (int) parseOctalOrBinary(header, offset, DEVLEN, 
lenient);
             offset += DEVLEN;
         } else {
             offset += 2 * DEVLEN;
@@ -1350,6 +1374,17 @@ public class TarArchiveEntry implements ArchiveEntry, 
TarConstants {
         }
     }
 
+    private long parseOctalOrBinary(byte[] header, int offset, int length, 
boolean lenient) {
+        if (lenient) {
+            try {
+                return TarUtils.parseOctalOrBinary(header, offset, length);
+            } catch (IllegalArgumentException ex) {
+                return UNKNOWN;
+            }
+        }
+        return TarUtils.parseOctalOrBinary(header, offset, length);
+    }
+
     /**
      * Strips Windows' drive letter as well as any leading slashes,
      * turns path separators into forward slahes.

http://git-wip-us.apache.org/repos/asf/commons-compress/blob/fd69d8ca/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveInputStream.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveInputStream.java
 
b/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveInputStream.java
index daaf729..7aeb6dd 100644
--- 
a/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveInputStream.java
+++ 
b/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveInputStream.java
@@ -80,6 +80,8 @@ public class TarArchiveInputStream extends ArchiveInputStream 
{
     // the global PAX header
     private Map<String, String> globalPaxHeaders = new HashMap<>();
 
+    private final boolean lenient;
+
     /**
      * Constructor for TarInputStream.
      * @param is the input stream to use
@@ -91,6 +93,17 @@ public class TarArchiveInputStream extends 
ArchiveInputStream {
     /**
      * Constructor for TarInputStream.
      * @param is the input stream to use
+     * @param lenient when set to true illegal values for group/userid, mode, 
device numbers and timestamp will be
+     * ignored and the fields set to {@link #UNKNOWN}. When set to false such 
illegal fields cause an exception instead.
+     * @since 1.19
+     */
+    public TarArchiveInputStream(final InputStream is, boolean lenient) {
+        this(is, TarConstants.DEFAULT_BLKSIZE, TarConstants.DEFAULT_RCDSIZE, 
null, lenient);
+    }
+
+    /**
+     * Constructor for TarInputStream.
+     * @param is the input stream to use
      * @param encoding name of the encoding to use for file names
      * @since 1.4
      */
@@ -140,12 +153,28 @@ public class TarArchiveInputStream extends 
ArchiveInputStream {
      */
     public TarArchiveInputStream(final InputStream is, final int blockSize, 
final int recordSize,
                                  final String encoding) {
+        this(is, blockSize, recordSize, encoding, false);
+    }
+
+    /**
+     * Constructor for TarInputStream.
+     * @param is the input stream to use
+     * @param blockSize the block size to use
+     * @param recordSize the record size to use
+     * @param encoding name of the encoding to use for file names
+     * @param lenient when set to true illegal values for group/userid, mode, 
device numbers and timestamp will be
+     * ignored and the fields set to {@link #UNKNOWN}. When set to false such 
illegal fields cause an exception instead.
+     * @since 1.19
+     */
+    public TarArchiveInputStream(final InputStream is, final int blockSize, 
final int recordSize,
+                                 final String encoding, boolean lenient) {
         this.is = is;
         this.hasHitEOF = false;
         this.encoding = encoding;
         this.zipEncoding = ZipEncodingHelper.getZipEncoding(encoding);
         this.recordSize = recordSize;
         this.blockSize = blockSize;
+        this.lenient = lenient;
     }
 
     /**
@@ -280,7 +309,7 @@ public class TarArchiveInputStream extends 
ArchiveInputStream {
         }
 
         try {
-            currEntry = new TarArchiveEntry(headerBuf, zipEncoding);
+            currEntry = new TarArchiveEntry(headerBuf, zipEncoding, lenient);
         } catch (final IllegalArgumentException e) {
             throw new IOException("Error detected parsing the header", e);
         }

http://git-wip-us.apache.org/repos/asf/commons-compress/blob/fd69d8ca/src/test/java/org/apache/commons/compress/archivers/TarTestCase.java
----------------------------------------------------------------------
diff --git 
a/src/test/java/org/apache/commons/compress/archivers/TarTestCase.java 
b/src/test/java/org/apache/commons/compress/archivers/TarTestCase.java
index 4792583..e694927 100644
--- a/src/test/java/org/apache/commons/compress/archivers/TarTestCase.java
+++ b/src/test/java/org/apache/commons/compress/archivers/TarTestCase.java
@@ -330,4 +330,13 @@ public final class TarTestCase extends AbstractTestCase {
         in.close();
     }
 
+    @Test
+    public void testCOMPRESS178Lenient() throws Exception {
+        final File input = getFile("COMPRESS-178.tar");
+        final InputStream is = new FileInputStream(input);
+        try (final ArchiveInputStream in = new TarArchiveInputStream(is, 
true)) {
+            in.getNextEntry();
+        }
+    }
+
 }

Reply via email to