Repository: commons-compress Updated Branches: refs/heads/master 02e0f9ae6 -> af2da2e15
COMPRESS-345 add support for GNU sparse files using PAX dialects Project: http://git-wip-us.apache.org/repos/asf/commons-compress/repo Commit: http://git-wip-us.apache.org/repos/asf/commons-compress/commit/af2da2e1 Tree: http://git-wip-us.apache.org/repos/asf/commons-compress/tree/af2da2e1 Diff: http://git-wip-us.apache.org/repos/asf/commons-compress/diff/af2da2e1 Branch: refs/heads/master Commit: af2da2e151a8c76e217bc239616174cafbb702ec Parents: 02e0f9a Author: Stefan Bodewig <bode...@apache.org> Authored: Wed Mar 23 18:29:33 2016 +0100 Committer: Stefan Bodewig <bode...@apache.org> Committed: Wed Mar 23 18:29:33 2016 +0100 ---------------------------------------------------------------------- src/changes/changes.xml | 4 ++ .../compress/archivers/tar/TarArchiveEntry.java | 57 +++++++++++++++++-- .../archivers/tar/TarArchiveInputStream.java | 19 +++++-- .../compress/archivers/tar/SparseFilesTest.java | 27 +++++++++ .../compress/archivers/tar/TarLister.java | 3 + src/test/resources/pax_gnu_sparse.tar | Bin 0 -> 20480 bytes 6 files changed, 102 insertions(+), 8 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/commons-compress/blob/af2da2e1/src/changes/changes.xml ---------------------------------------------------------------------- diff --git a/src/changes/changes.xml b/src/changes/changes.xml index 8efd3d3..8abb6e8 100644 --- a/src/changes/changes.xml +++ b/src/changes/changes.xml @@ -44,6 +44,10 @@ The <action> type attribute can be add,update,fix,remove. <body> <release version="1.11" date="not released, yet" description="Release 1.11"> + <action issue="COMPRESS-345" type="add" date="2016-03-23"> + GNU sparse files using one of the PAX formats are now + detected, but cannot be extracted. + </action> <action issue="COMPRESS-344" type="fix" date="2016-03-22"> ArArchiveInputStream can now read GNU extended names that are terminated with a NUL byte rather than a linefeed. http://git-wip-us.apache.org/repos/asf/commons-compress/blob/af2da2e1/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveEntry.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveEntry.java b/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveEntry.java index 7945706..1578a1c 100644 --- a/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveEntry.java +++ b/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveEntry.java @@ -22,6 +22,7 @@ import java.io.File; import java.io.IOException; import java.util.Date; import java.util.Locale; +import java.util.Map; import org.apache.commons.compress.archivers.ArchiveEntry; import org.apache.commons.compress.archivers.zip.ZipEncoding; @@ -195,6 +196,9 @@ public class TarArchiveEntry implements TarConstants, ArchiveEntry { /** The entry's real size in case of a sparse file. */ private long realSize; + /** is this entry a GNU sparse entry using one of the PAX formats? */ + private boolean paxGNUSparse; + /** The entry's file reference */ private final File file; @@ -728,10 +732,10 @@ public class TarArchiveEntry implements TarConstants, ArchiveEntry { } /** - * Indicates in case of a sparse file if an extension sparse header - * follows. + * Indicates in case of an oldgnu sparse file if an extension + * sparse header follows. * - * @return true if an extension sparse header follows. + * @return true if an extension oldgnu sparse header follows. */ public boolean isExtended() { return isExtended; @@ -747,15 +751,36 @@ public class TarArchiveEntry implements TarConstants, ArchiveEntry { } /** - * Indicate if this entry is a GNU sparse block + * Indicate if this entry is a GNU sparse block. * * @return true if this is a sparse extension provided by GNU tar */ public boolean isGNUSparse() { + return isOldGNUSparse() || isPaxGNUSparse(); + } + + /** + * Indicate if this entry is a GNU sparse block using the oldgnu format. + * + * @return true if this is a sparse extension provided by GNU tar + * @since 1.11 + */ + public boolean isOldGNUSparse() { return linkFlag == LF_GNUTYPE_SPARSE; } /** + * Indicate if this entry is a GNU sparse block using one of the + * PAX formats. + * + * @return true if this is a sparse extension provided by GNU tar + * @since 1.11 + */ + public boolean isPaxGNUSparse() { + return paxGNUSparse; + } + + /** * Indicate if this entry is a GNU long linkname block * * @return true if this is a long name extension provided by GNU tar @@ -885,6 +910,15 @@ public class TarArchiveEntry implements TarConstants, ArchiveEntry { } /** + * Check whether this is a sparse entry. + * + * @since 1.11 + */ + public boolean isSparse() { + return isGNUSparse(); + } + + /** * If this entry represents a file, and the file is a directory, return * an array of TarEntries for this entry's children. * @@ -1169,5 +1203,20 @@ public class TarArchiveEntry implements TarConstants, ArchiveEntry { } return 0; } + + void fillGNUSparse0xData(Map<String, String> headers) { + paxGNUSparse = true; + realSize = Integer.parseInt(headers.get("GNU.sparse.size")); + if (headers.containsKey("GNU.sparse.name")) { + // version 0.1 + name = headers.get("GNU.sparse.name"); + } + } + + void fillGNUSparse1xData(Map<String, String> headers) { + paxGNUSparse = true; + realSize = Integer.parseInt(headers.get("GNU.sparse.realsize")); + name = headers.get("GNU.sparse.name"); + } } http://git-wip-us.apache.org/repos/asf/commons-compress/blob/af2da2e1/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveInputStream.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveInputStream.java b/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveInputStream.java index 118bf7a..becb9a4 100644 --- a/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveInputStream.java +++ b/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveInputStream.java @@ -314,8 +314,8 @@ public class TarArchiveInputStream extends ArchiveInputStream { paxHeaders(); } - if (currEntry.isGNUSparse()){ // Process sparse files - readGNUSparse(); + if (currEntry.isOldGNUSparse()){ // Process sparse files + readOldGNUSparse(); } // If the size of the next element in the archive has changed @@ -434,6 +434,9 @@ public class TarArchiveInputStream extends ArchiveInputStream { applyPaxHeadersToCurrentEntry(headers); } + // NOTE, using a Map here makes it impossible to ever support GNU + // sparse files using the PAX Format 0.0, see + // https://www.gnu.org/software/tar/manual/html_section/tar_92.html#SEC188 Map<String, String> parsePaxHeaders(InputStream i) throws IOException { Map<String, String> headers = new HashMap<String, String>(); // Format is "length keyword=value\n"; @@ -492,6 +495,10 @@ public class TarArchiveInputStream extends ArchiveInputStream { * size * uid,uname * SCHILY.devminor, SCHILY.devmajor: don't have setters/getters for those + * + * GNU sparse files use additional members, we use + * GNU.sparse.size to detect the 0.0 and 0.1 versions and + * GNU.sparse.realsize for 1.0. */ for (Entry<String, String> ent : headers.entrySet()){ String key = ent.getKey(); @@ -516,6 +523,10 @@ public class TarArchiveInputStream extends ArchiveInputStream { currEntry.setDevMinor(Integer.parseInt(val)); } else if ("SCHILY.devmajor".equals(key)){ currEntry.setDevMajor(Integer.parseInt(val)); + } else if ("GNU.sparse.size".equals(key)) { + currEntry.fillGNUSparse0xData(headers); + } else if ("GNU.sparse.realsize".equals(key)) { + currEntry.fillGNUSparse1xData(headers); } } } @@ -528,7 +539,7 @@ public class TarArchiveInputStream extends ArchiveInputStream { * * @todo Sparse files get not yet really processed. */ - private void readGNUSparse() throws IOException { + private void readOldGNUSparse() throws IOException { /* we do not really process sparse files yet sparses = new ArrayList(); sparses.addAll(currEntry.getSparses()); @@ -642,7 +653,7 @@ public class TarArchiveInputStream extends ArchiveInputStream { public boolean canReadEntryData(ArchiveEntry ae) { if (ae instanceof TarArchiveEntry) { TarArchiveEntry te = (TarArchiveEntry) ae; - return !te.isGNUSparse(); + return !te.isSparse(); } return false; } http://git-wip-us.apache.org/repos/asf/commons-compress/blob/af2da2e1/src/test/java/org/apache/commons/compress/archivers/tar/SparseFilesTest.java ---------------------------------------------------------------------- diff --git a/src/test/java/org/apache/commons/compress/archivers/tar/SparseFilesTest.java b/src/test/java/org/apache/commons/compress/archivers/tar/SparseFilesTest.java index e8427dc..ad22750 100644 --- a/src/test/java/org/apache/commons/compress/archivers/tar/SparseFilesTest.java +++ b/src/test/java/org/apache/commons/compress/archivers/tar/SparseFilesTest.java @@ -36,7 +36,9 @@ public class SparseFilesTest { tin = new TarArchiveInputStream(new FileInputStream(file)); TarArchiveEntry ae = tin.getNextTarEntry(); assertEquals("sparsefile", ae.getName()); + assertTrue(ae.isOldGNUSparse()); assertTrue(ae.isGNUSparse()); + assertFalse(ae.isPaxGNUSparse()); assertFalse(tin.canReadEntryData(ae)); } finally { if (tin != null) { @@ -44,5 +46,30 @@ public class SparseFilesTest { } } } + + @Test + public void testPaxGNU() throws Throwable { + File file = getFile("pax_gnu_sparse.tar"); + TarArchiveInputStream tin = null; + try { + tin = new TarArchiveInputStream(new FileInputStream(file)); + assertPaxGNUEntry(tin, "0.0"); + assertPaxGNUEntry(tin, "0.1"); + assertPaxGNUEntry(tin, "1.0"); + } finally { + if (tin != null) { + tin.close(); + } + } + } + + private void assertPaxGNUEntry(TarArchiveInputStream tin, String suffix) throws Throwable { + TarArchiveEntry ae = tin.getNextTarEntry(); + assertEquals("sparsefile-" + suffix, ae.getName()); + assertTrue(ae.isGNUSparse()); + assertTrue(ae.isPaxGNUSparse()); + assertFalse(ae.isOldGNUSparse()); + assertFalse(tin.canReadEntryData(ae)); + } } http://git-wip-us.apache.org/repos/asf/commons-compress/blob/af2da2e1/src/test/java/org/apache/commons/compress/archivers/tar/TarLister.java ---------------------------------------------------------------------- diff --git a/src/test/java/org/apache/commons/compress/archivers/tar/TarLister.java b/src/test/java/org/apache/commons/compress/archivers/tar/TarLister.java index ce78e7e..750d3d1 100644 --- a/src/test/java/org/apache/commons/compress/archivers/tar/TarLister.java +++ b/src/test/java/org/apache/commons/compress/archivers/tar/TarLister.java @@ -73,6 +73,9 @@ public final class TarLister { } System.out.print(ae.getLinkName()); } + if (ae.isSparse()) { + System.out.print(" (sparse)"); + } System.out.println(); } http://git-wip-us.apache.org/repos/asf/commons-compress/blob/af2da2e1/src/test/resources/pax_gnu_sparse.tar ---------------------------------------------------------------------- diff --git a/src/test/resources/pax_gnu_sparse.tar b/src/test/resources/pax_gnu_sparse.tar new file mode 100644 index 0000000..11fb4dd Binary files /dev/null and b/src/test/resources/pax_gnu_sparse.tar differ