Fix reading of multibyte name entries This fixes COMPRESS-459 by using the name number of bytes from the field in the stream instead of relying on the assumption that each character is exactly one byte, which isn't true for UTF-8, UTF-16 or other multi-byte character encodings.
Project: http://git-wip-us.apache.org/repos/asf/commons-compress/repo Commit: http://git-wip-us.apache.org/repos/asf/commons-compress/commit/9e80104b Tree: http://git-wip-us.apache.org/repos/asf/commons-compress/tree/9e80104b Diff: http://git-wip-us.apache.org/repos/asf/commons-compress/diff/9e80104b Branch: refs/heads/master Commit: 9e80104befc54daaa097870b857e1bc334521490 Parents: f5330f7 Author: Jens Reimann <jreim...@redhat.com> Authored: Mon Jul 9 11:41:43 2018 +0200 Committer: Stefan Bodewig <bode...@apache.org> Committed: Wed Jul 11 18:05:49 2018 +0200 ---------------------------------------------------------------------- .../compress/archivers/cpio/CpioArchiveEntry.java | 17 ++++++++++++++++- .../archivers/cpio/CpioArchiveInputStream.java | 4 ++-- .../cpio/CpioArchiveInputStreamTest.java | 16 ++++++++++++++++ src/test/resources/COMPRESS-459.cpio | Bin 0 -> 512 bytes 4 files changed, 34 insertions(+), 3 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/commons-compress/blob/9e80104b/src/main/java/org/apache/commons/compress/archivers/cpio/CpioArchiveEntry.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/commons/compress/archivers/cpio/CpioArchiveEntry.java b/src/main/java/org/apache/commons/compress/archivers/cpio/CpioArchiveEntry.java index 28e5823..3ad7c87 100644 --- a/src/main/java/org/apache/commons/compress/archivers/cpio/CpioArchiveEntry.java +++ b/src/main/java/org/apache/commons/compress/archivers/cpio/CpioArchiveEntry.java @@ -469,10 +469,25 @@ public class CpioArchiveEntry implements CpioConstants, ArchiveEntry { * @return the number of bytes needed to pad the header (0,1,2,3) */ public int getHeaderPadCount(){ + long namesize = name != null ? name.length() : 0; + return getHeaderPadCount(namesize); + } + + /** + * Get the number of bytes needed to pad the header to the alignment boundary. + * + * @param namesize + * The length of the name in bytes, as read in the stream. + * Without the trailing zero byte. + * @return the number of bytes needed to pad the header (0,1,2,3) + * + * @since 1.18 + */ + public int getHeaderPadCount(long namesize){ if (this.alignmentBoundary == 0) { return 0; } int size = this.headerSize + 1; // Name has terminating null if (name != null) { - size += name.length(); + size += namesize; } final int remain = size % this.alignmentBoundary; if (remain > 0){ http://git-wip-us.apache.org/repos/asf/commons-compress/blob/9e80104b/src/main/java/org/apache/commons/compress/archivers/cpio/CpioArchiveInputStream.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/commons/compress/archivers/cpio/CpioArchiveInputStream.java b/src/main/java/org/apache/commons/compress/archivers/cpio/CpioArchiveInputStream.java index ad8e125..b64d091 100644 --- a/src/main/java/org/apache/commons/compress/archivers/cpio/CpioArchiveInputStream.java +++ b/src/main/java/org/apache/commons/compress/archivers/cpio/CpioArchiveInputStream.java @@ -393,7 +393,7 @@ public class CpioArchiveInputStream extends ArchiveInputStream implements + ArchiveUtils.sanitize(name) + " Occured at byte: " + getBytesRead()); } - skip(ret.getHeaderPadCount()); + skip(ret.getHeaderPadCount(namesize-1)); return ret; } @@ -449,7 +449,7 @@ public class CpioArchiveInputStream extends ArchiveInputStream implements + ArchiveUtils.sanitize(name) + "Occured at byte: " + getBytesRead()); } - skip(ret.getHeaderPadCount()); + skip(ret.getHeaderPadCount(namesize-1)); return ret; } http://git-wip-us.apache.org/repos/asf/commons-compress/blob/9e80104b/src/test/java/org/apache/commons/compress/archivers/cpio/CpioArchiveInputStreamTest.java ---------------------------------------------------------------------- diff --git a/src/test/java/org/apache/commons/compress/archivers/cpio/CpioArchiveInputStreamTest.java b/src/test/java/org/apache/commons/compress/archivers/cpio/CpioArchiveInputStreamTest.java index f174405..762d464 100644 --- a/src/test/java/org/apache/commons/compress/archivers/cpio/CpioArchiveInputStreamTest.java +++ b/src/test/java/org/apache/commons/compress/archivers/cpio/CpioArchiveInputStreamTest.java @@ -65,4 +65,20 @@ public class CpioArchiveInputStreamTest extends AbstractTestCase { assertEquals(count, 1); } + + @Test + public void testCpioUnarchiveMultibyteCharName() throws Exception { + final CpioArchiveInputStream in = + new CpioArchiveInputStream(new FileInputStream(getFile("COMPRESS-459.cpio")), "UTF-8"); + CpioArchiveEntry entry= null; + + int count = 0; + while ((entry = (CpioArchiveEntry) in.getNextEntry()) != null) { + count++; + assertNotNull(entry); + } + in.close(); + + assertEquals(2, count); + } } http://git-wip-us.apache.org/repos/asf/commons-compress/blob/9e80104b/src/test/resources/COMPRESS-459.cpio ---------------------------------------------------------------------- diff --git a/src/test/resources/COMPRESS-459.cpio b/src/test/resources/COMPRESS-459.cpio new file mode 100644 index 0000000..8ae1662 Binary files /dev/null and b/src/test/resources/COMPRESS-459.cpio differ