COMPRESS-409 remove internal buffering from TarArchiveOutputStream
Project: http://git-wip-us.apache.org/repos/asf/commons-compress/repo Commit: http://git-wip-us.apache.org/repos/asf/commons-compress/commit/c82dc1fc Tree: http://git-wip-us.apache.org/repos/asf/commons-compress/tree/c82dc1fc Diff: http://git-wip-us.apache.org/repos/asf/commons-compress/diff/c82dc1fc Branch: refs/heads/master Commit: c82dc1fc6238b6f22797c678f10b64c9cd36ddcb Parents: 77beca1 Author: Stefan Bodewig <bode...@apache.org> Authored: Sun Oct 8 18:54:22 2017 +0200 Committer: Stefan Bodewig <bode...@apache.org> Committed: Sun Oct 8 18:56:23 2017 +0200 ---------------------------------------------------------------------- src/changes/changes.xml | 4 + .../archivers/tar/TarArchiveOutputStream.java | 112 +++---------------- .../tar/TarArchiveOutputStreamTest.java | 4 + 3 files changed, 22 insertions(+), 98 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/commons-compress/blob/c82dc1fc/src/changes/changes.xml ---------------------------------------------------------------------- diff --git a/src/changes/changes.xml b/src/changes/changes.xml index b29952d..9348e63 100644 --- a/src/changes/changes.xml +++ b/src/changes/changes.xml @@ -123,6 +123,10 @@ wanted to create such files."> When reading tar headers with name fields containing embedded NULs, the name will now be terminated at the first NUL byte. </action> + <action issue="COMPRESS-409" type="fix" date="2017-10-08"> + Simplified TarArchiveOutputStream by replacing the internal + buffering with new class FixedLengthBlockOutputStream. + </action> </release> <release version="1.14" date="2017-05-14" description="Release 1.14"> http://git-wip-us.apache.org/repos/asf/commons-compress/blob/c82dc1fc/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveOutputStream.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveOutputStream.java b/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveOutputStream.java index 7cff5e3..328b5a9 100644 --- a/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveOutputStream.java +++ b/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveOutputStream.java @@ -34,6 +34,7 @@ import org.apache.commons.compress.archivers.zip.ZipEncoding; import org.apache.commons.compress.archivers.zip.ZipEncodingHelper; import org.apache.commons.compress.utils.CharsetNames; import org.apache.commons.compress.utils.CountingOutputStream; +import org.apache.commons.compress.utils.FixedLengthBlockOutputStream; /** * The TarOutputStream writes a UNIX tar archive as an OutputStream. Methods are provided to put @@ -83,8 +84,6 @@ public class TarArchiveOutputStream extends ArchiveOutputStream { private String currName; private long currBytes; private final byte[] recordBuf; - private int assemLen; - private final byte[] assemBuf; private int longFileMode = LONGFILE_ERROR; private int bigNumberMode = BIGNUMBER_ERROR; private int recordsWritten; @@ -102,7 +101,8 @@ public class TarArchiveOutputStream extends ArchiveOutputStream { */ private boolean finished = false; - private final OutputStream out; + private final FixedLengthBlockOutputStream out; + private final CountingOutputStream countingOut; private final ZipEncoding zipEncoding; @@ -203,12 +203,11 @@ public class TarArchiveOutputStream extends ArchiveOutputStream { if (realBlockSize <=0 || realBlockSize % RECORD_SIZE != 0) { throw new IllegalArgumentException("Block size must be a multiple of 512 bytes. Attempt to use set size of " + blockSize); } - out = new CountingOutputStream(os); + out = new FixedLengthBlockOutputStream(countingOut = new CountingOutputStream(os), + RECORD_SIZE); this.encoding = encoding; this.zipEncoding = ZipEncodingHelper.getZipEncoding(encoding); - this.assemLen = 0; - this.assemBuf = new byte[RECORD_SIZE]; this.recordBuf = new byte[RECORD_SIZE]; this.recordsPerBlock = realBlockSize / RECORD_SIZE; } @@ -255,7 +254,7 @@ public class TarArchiveOutputStream extends ArchiveOutputStream { @Override public long getBytesWritten() { - return ((CountingOutputStream) out).getBytesWritten(); + return countingOut.getBytesWritten(); } /** @@ -402,32 +401,24 @@ public class TarArchiveOutputStream extends ArchiveOutputStream { if (!haveUnclosedEntry) { throw new IOException("No current entry to close"); } - if (assemLen > 0) { - for (int i = assemLen; i < assemBuf.length; ++i) { - assemBuf[i] = 0; - } - - writeRecord(assemBuf); - - currBytes += assemLen; - assemLen = 0; - } - + out.flushBlock(); if (currBytes < currSize) { throw new IOException("entry '" + currName + "' closed at '" + currBytes + "' before the '" + currSize + "' bytes specified in the header were written"); } + recordsWritten += (currSize / RECORD_SIZE); + if (0 != currSize % RECORD_SIZE) { + recordsWritten++; + } haveUnclosedEntry = false; } /** * Writes bytes to the current tar archive entry. This method is aware of the current entry and * will throw an exception if you attempt to write bytes past the length specified for the - * current entry. The method is also (painfully) aware of the record buffering required by - * TarBuffer, and manages buffers that are not a multiple of recordsize in length, including - * assembling records from small buffers. + * current entry. * * @param wBuf The buffer to write to the archive. * @param wOffset The offset in the buffer from which to get bytes. @@ -444,63 +435,9 @@ public class TarArchiveOutputStream extends ArchiveOutputStream { + "' bytes exceeds size in header of '" + currSize + "' bytes for entry '" + currName + "'"); - - // - // We have to deal with assembly!!! - // The programmer can be writing little 32 byte chunks for all - // we know, and we must assemble complete records for writing. - // REVIEW Maybe this should be in TarBuffer? Could that help to - // eliminate some of the buffer copying. - // - } - - if (assemLen > 0) { - if (assemLen + numToWrite >= recordBuf.length) { - final int aLen = recordBuf.length - assemLen; - - System.arraycopy(assemBuf, 0, recordBuf, 0, - assemLen); - System.arraycopy(wBuf, wOffset, recordBuf, - assemLen, aLen); - writeRecord(recordBuf); - - currBytes += recordBuf.length; - wOffset += aLen; - numToWrite -= aLen; - assemLen = 0; - } else { - System.arraycopy(wBuf, wOffset, assemBuf, assemLen, - numToWrite); - - wOffset += numToWrite; - assemLen += numToWrite; - numToWrite = 0; - } - } - - // - // When we get here we have EITHER: - // o An empty "assemble" buffer. - // o No bytes to write (numToWrite == 0) - // - while (numToWrite > 0) { - if (numToWrite < recordBuf.length) { - System.arraycopy(wBuf, wOffset, assemBuf, assemLen, - numToWrite); - - assemLen += numToWrite; - - break; - } - - writeRecord(wBuf, wOffset); - - final int num = recordBuf.length; - - currBytes += num; - numToWrite -= num; - wOffset += num; } + out.write(wBuf, wOffset, numToWrite); + currBytes += numToWrite; } /** @@ -617,27 +554,6 @@ public class TarArchiveOutputStream extends ArchiveOutputStream { recordsWritten++; } - /** - * Write an archive record to the archive, where the record may be inside of a larger array - * buffer. The buffer must be "offset plus record size" long. - * - * @param buf The buffer containing the record data to write. - * @param offset The offset of the record data within buf. - * @throws IOException on error - */ - private void writeRecord(final byte[] buf, final int offset) throws IOException { - - if (offset + RECORD_SIZE > buf.length) { - throw new IOException("record has length '" + buf.length - + "' with offset '" + offset - + "' which is less than the record size of '" - + RECORD_SIZE + "'"); - } - - out.write(buf, offset, RECORD_SIZE); - recordsWritten++; - } - private void padAsNeeded() throws IOException { final int start = recordsWritten % recordsPerBlock; if (start != 0) { http://git-wip-us.apache.org/repos/asf/commons-compress/blob/c82dc1fc/src/test/java/org/apache/commons/compress/archivers/tar/TarArchiveOutputStreamTest.java ---------------------------------------------------------------------- diff --git a/src/test/java/org/apache/commons/compress/archivers/tar/TarArchiveOutputStreamTest.java b/src/test/java/org/apache/commons/compress/archivers/tar/TarArchiveOutputStreamTest.java index e5f9fbd..3bdfe9d 100644 --- a/src/test/java/org/apache/commons/compress/archivers/tar/TarArchiveOutputStreamTest.java +++ b/src/test/java/org/apache/commons/compress/archivers/tar/TarArchiveOutputStreamTest.java @@ -659,6 +659,10 @@ public class TarArchiveOutputStreamTest extends AbstractTestCase { } catch (IllegalArgumentException e) { //expected } + // test with "content" that is an exact multiple of record length + contents = new byte[2048]; + java.util.Arrays.fill(contents, (byte) 42); + testPadding(TarConstants.DEFAULT_BLKSIZE, fileName, contents); } private void testPadding(int blockSize, String fileName, byte[] contents) throws IOException {