Repository: commons-compress
Updated Branches:
  refs/heads/master 02e0f9ae6 -> af2da2e15


COMPRESS-345 add support for GNU sparse files using PAX dialects


Project: http://git-wip-us.apache.org/repos/asf/commons-compress/repo
Commit: http://git-wip-us.apache.org/repos/asf/commons-compress/commit/af2da2e1
Tree: http://git-wip-us.apache.org/repos/asf/commons-compress/tree/af2da2e1
Diff: http://git-wip-us.apache.org/repos/asf/commons-compress/diff/af2da2e1

Branch: refs/heads/master
Commit: af2da2e151a8c76e217bc239616174cafbb702ec
Parents: 02e0f9a
Author: Stefan Bodewig <bode...@apache.org>
Authored: Wed Mar 23 18:29:33 2016 +0100
Committer: Stefan Bodewig <bode...@apache.org>
Committed: Wed Mar 23 18:29:33 2016 +0100

----------------------------------------------------------------------
 src/changes/changes.xml                         |   4 ++
 .../compress/archivers/tar/TarArchiveEntry.java |  57 +++++++++++++++++--
 .../archivers/tar/TarArchiveInputStream.java    |  19 +++++--
 .../compress/archivers/tar/SparseFilesTest.java |  27 +++++++++
 .../compress/archivers/tar/TarLister.java       |   3 +
 src/test/resources/pax_gnu_sparse.tar           | Bin 0 -> 20480 bytes
 6 files changed, 102 insertions(+), 8 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/commons-compress/blob/af2da2e1/src/changes/changes.xml
----------------------------------------------------------------------
diff --git a/src/changes/changes.xml b/src/changes/changes.xml
index 8efd3d3..8abb6e8 100644
--- a/src/changes/changes.xml
+++ b/src/changes/changes.xml
@@ -44,6 +44,10 @@ The <action> type attribute can be add,update,fix,remove.
   <body>
     <release version="1.11" date="not released, yet"
              description="Release 1.11">
+      <action issue="COMPRESS-345" type="add" date="2016-03-23">
+        GNU sparse files using one of the PAX formats are now
+        detected, but cannot be extracted.
+      </action>
       <action issue="COMPRESS-344" type="fix" date="2016-03-22">
         ArArchiveInputStream can now read GNU extended names that are
         terminated with a NUL byte rather than a linefeed.

http://git-wip-us.apache.org/repos/asf/commons-compress/blob/af2da2e1/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveEntry.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveEntry.java 
b/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveEntry.java
index 7945706..1578a1c 100644
--- 
a/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveEntry.java
+++ 
b/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveEntry.java
@@ -22,6 +22,7 @@ import java.io.File;
 import java.io.IOException;
 import java.util.Date;
 import java.util.Locale;
+import java.util.Map;
 
 import org.apache.commons.compress.archivers.ArchiveEntry;
 import org.apache.commons.compress.archivers.zip.ZipEncoding;
@@ -195,6 +196,9 @@ public class TarArchiveEntry implements TarConstants, 
ArchiveEntry {
     /** The entry's real size in case of a sparse file. */
     private long realSize;
 
+    /** is this entry a GNU sparse entry using one of the PAX formats? */
+    private boolean paxGNUSparse;
+
     /** The entry's file reference */
     private final File file;
 
@@ -728,10 +732,10 @@ public class TarArchiveEntry implements TarConstants, 
ArchiveEntry {
     }
 
     /**
-     * Indicates in case of a sparse file if an extension sparse header
-     * follows.
+     * Indicates in case of an oldgnu sparse file if an extension
+     * sparse header follows.
      *
-     * @return true if an extension sparse header follows.
+     * @return true if an extension oldgnu sparse header follows.
      */
     public boolean isExtended() {
         return isExtended;
@@ -747,15 +751,36 @@ public class TarArchiveEntry implements TarConstants, 
ArchiveEntry {
     }
 
     /**
-     * Indicate if this entry is a GNU sparse block
+     * Indicate if this entry is a GNU sparse block.
      *
      * @return true if this is a sparse extension provided by GNU tar
      */
     public boolean isGNUSparse() {
+        return isOldGNUSparse() || isPaxGNUSparse();
+    }
+
+    /**
+     * Indicate if this entry is a GNU sparse block using the oldgnu format.
+     *
+     * @return true if this is a sparse extension provided by GNU tar
+     * @since 1.11
+     */
+    public boolean isOldGNUSparse() {
         return linkFlag == LF_GNUTYPE_SPARSE;
     }
 
     /**
+     * Indicate if this entry is a GNU sparse block using one of the
+     * PAX formats.
+     *
+     * @return true if this is a sparse extension provided by GNU tar
+     * @since 1.11
+     */
+    public boolean isPaxGNUSparse() {
+        return paxGNUSparse;
+    }
+
+    /**
      * Indicate if this entry is a GNU long linkname block
      *
      * @return true if this is a long name extension provided by GNU tar
@@ -885,6 +910,15 @@ public class TarArchiveEntry implements TarConstants, 
ArchiveEntry {
     }
 
     /**
+     * Check whether this is a sparse entry.
+     *
+     * @since 1.11
+     */
+    public boolean isSparse() {
+        return isGNUSparse();
+    }
+
+    /**
      * If this entry represents a file, and the file is a directory, return
      * an array of TarEntries for this entry's children.
      *
@@ -1169,5 +1203,20 @@ public class TarArchiveEntry implements TarConstants, 
ArchiveEntry {
         }
         return 0;
     }
+
+    void fillGNUSparse0xData(Map<String, String> headers) {
+        paxGNUSparse = true;
+        realSize = Integer.parseInt(headers.get("GNU.sparse.size"));
+        if (headers.containsKey("GNU.sparse.name")) {
+            // version 0.1
+            name = headers.get("GNU.sparse.name");
+        }
+    }
+
+    void fillGNUSparse1xData(Map<String, String> headers) {
+        paxGNUSparse = true;
+        realSize = Integer.parseInt(headers.get("GNU.sparse.realsize"));
+        name = headers.get("GNU.sparse.name");
+    }
 }
 

http://git-wip-us.apache.org/repos/asf/commons-compress/blob/af2da2e1/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveInputStream.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveInputStream.java
 
b/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveInputStream.java
index 118bf7a..becb9a4 100644
--- 
a/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveInputStream.java
+++ 
b/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveInputStream.java
@@ -314,8 +314,8 @@ public class TarArchiveInputStream extends 
ArchiveInputStream {
             paxHeaders();
         }
 
-        if (currEntry.isGNUSparse()){ // Process sparse files
-            readGNUSparse();
+        if (currEntry.isOldGNUSparse()){ // Process sparse files
+            readOldGNUSparse();
         }
 
         // If the size of the next element in the archive has changed
@@ -434,6 +434,9 @@ public class TarArchiveInputStream extends 
ArchiveInputStream {
         applyPaxHeadersToCurrentEntry(headers);
     }
 
+    // NOTE, using a Map here makes it impossible to ever support GNU
+    // sparse files using the PAX Format 0.0, see
+    // https://www.gnu.org/software/tar/manual/html_section/tar_92.html#SEC188
     Map<String, String> parsePaxHeaders(InputStream i) throws IOException {
         Map<String, String> headers = new HashMap<String, String>();
         // Format is "length keyword=value\n";
@@ -492,6 +495,10 @@ public class TarArchiveInputStream extends 
ArchiveInputStream {
          * size
          * uid,uname
          * SCHILY.devminor, SCHILY.devmajor: don't have setters/getters for 
those
+         *
+         * GNU sparse files use additional members, we use
+         * GNU.sparse.size to detect the 0.0 and 0.1 versions and
+         * GNU.sparse.realsize for 1.0.
          */
         for (Entry<String, String> ent : headers.entrySet()){
             String key = ent.getKey();
@@ -516,6 +523,10 @@ public class TarArchiveInputStream extends 
ArchiveInputStream {
                 currEntry.setDevMinor(Integer.parseInt(val));
             } else if ("SCHILY.devmajor".equals(key)){
                 currEntry.setDevMajor(Integer.parseInt(val));
+            } else if ("GNU.sparse.size".equals(key)) {
+                currEntry.fillGNUSparse0xData(headers);
+            } else if ("GNU.sparse.realsize".equals(key)) {
+                currEntry.fillGNUSparse1xData(headers);
             }
         }
     }
@@ -528,7 +539,7 @@ public class TarArchiveInputStream extends 
ArchiveInputStream {
      *
      * @todo Sparse files get not yet really processed.
      */
-    private void readGNUSparse() throws IOException {
+    private void readOldGNUSparse() throws IOException {
         /* we do not really process sparse files yet
         sparses = new ArrayList();
         sparses.addAll(currEntry.getSparses());
@@ -642,7 +653,7 @@ public class TarArchiveInputStream extends 
ArchiveInputStream {
     public boolean canReadEntryData(ArchiveEntry ae) {
         if (ae instanceof TarArchiveEntry) {
             TarArchiveEntry te = (TarArchiveEntry) ae;
-            return !te.isGNUSparse();
+            return !te.isSparse();
         }
         return false;
     }

http://git-wip-us.apache.org/repos/asf/commons-compress/blob/af2da2e1/src/test/java/org/apache/commons/compress/archivers/tar/SparseFilesTest.java
----------------------------------------------------------------------
diff --git 
a/src/test/java/org/apache/commons/compress/archivers/tar/SparseFilesTest.java 
b/src/test/java/org/apache/commons/compress/archivers/tar/SparseFilesTest.java
index e8427dc..ad22750 100644
--- 
a/src/test/java/org/apache/commons/compress/archivers/tar/SparseFilesTest.java
+++ 
b/src/test/java/org/apache/commons/compress/archivers/tar/SparseFilesTest.java
@@ -36,7 +36,9 @@ public class SparseFilesTest {
             tin = new TarArchiveInputStream(new FileInputStream(file));
             TarArchiveEntry ae = tin.getNextTarEntry();
             assertEquals("sparsefile", ae.getName());
+            assertTrue(ae.isOldGNUSparse());
             assertTrue(ae.isGNUSparse());
+            assertFalse(ae.isPaxGNUSparse());
             assertFalse(tin.canReadEntryData(ae));
         } finally {
             if (tin != null) {
@@ -44,5 +46,30 @@ public class SparseFilesTest {
             }
         }
     }
+
+    @Test
+    public void testPaxGNU() throws Throwable {
+        File file = getFile("pax_gnu_sparse.tar");
+        TarArchiveInputStream tin = null;
+        try {
+            tin = new TarArchiveInputStream(new FileInputStream(file));
+            assertPaxGNUEntry(tin, "0.0");
+            assertPaxGNUEntry(tin, "0.1");
+            assertPaxGNUEntry(tin, "1.0");
+        } finally {
+            if (tin != null) {
+                tin.close();
+            }
+        }
+    }
+
+    private void assertPaxGNUEntry(TarArchiveInputStream tin, String suffix) 
throws Throwable {
+        TarArchiveEntry ae = tin.getNextTarEntry();
+        assertEquals("sparsefile-" + suffix, ae.getName());
+        assertTrue(ae.isGNUSparse());
+        assertTrue(ae.isPaxGNUSparse());
+        assertFalse(ae.isOldGNUSparse());
+        assertFalse(tin.canReadEntryData(ae));
+    }
 }
 

http://git-wip-us.apache.org/repos/asf/commons-compress/blob/af2da2e1/src/test/java/org/apache/commons/compress/archivers/tar/TarLister.java
----------------------------------------------------------------------
diff --git 
a/src/test/java/org/apache/commons/compress/archivers/tar/TarLister.java 
b/src/test/java/org/apache/commons/compress/archivers/tar/TarLister.java
index ce78e7e..750d3d1 100644
--- a/src/test/java/org/apache/commons/compress/archivers/tar/TarLister.java
+++ b/src/test/java/org/apache/commons/compress/archivers/tar/TarLister.java
@@ -73,6 +73,9 @@ public final class TarLister {
             }
             System.out.print(ae.getLinkName());
         }
+        if (ae.isSparse()) {
+            System.out.print(" (sparse)");
+        }
         System.out.println();
     }
             

http://git-wip-us.apache.org/repos/asf/commons-compress/blob/af2da2e1/src/test/resources/pax_gnu_sparse.tar
----------------------------------------------------------------------
diff --git a/src/test/resources/pax_gnu_sparse.tar 
b/src/test/resources/pax_gnu_sparse.tar
new file mode 100644
index 0000000..11fb4dd
Binary files /dev/null and b/src/test/resources/pax_gnu_sparse.tar differ

Reply via email to