This is an automated email from the ASF dual-hosted git repository. aherbert pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/commons-codec.git
commit 6b67d6f093a821ede0b393f260b407d035289e07 Author: Alexander Pinske <a...@pinske.eu> AuthorDate: Mon May 10 21:36:22 2021 +0200 CODEC-301: Reduce byte[] allocations by reusing buffers * Reduces byte[] allocations from 280MB to <4MB when reading a 133MB base64 stream. Messured with JFR. * Keep reusing inital buffer when decoding BaseN * Attempt to fill up the user-provided buffer Previously we only filled up to a maximum of 8KB - encoding-overhead (e.g. 6KB for Base64) even if the provided buffer was bigger. * Reuse hasData method for checking pos/readPos markers --- .../apache/commons/codec/binary/BaseNCodec.java | 14 ++++++++----- .../codec/binary/BaseNCodecInputStream.java | 16 ++++++++++++--- .../codec/binary/Base64InputStreamTest.java | 24 ++++++++++++++++++++++ 3 files changed, 46 insertions(+), 8 deletions(-) diff --git a/src/main/java/org/apache/commons/codec/binary/BaseNCodec.java b/src/main/java/org/apache/commons/codec/binary/BaseNCodec.java index b054253..b22f2a4 100644 --- a/src/main/java/org/apache/commons/codec/binary/BaseNCodec.java +++ b/src/main/java/org/apache/commons/codec/binary/BaseNCodec.java @@ -394,7 +394,7 @@ public abstract class BaseNCodec implements BinaryEncoder, BinaryDecoder { * @return The amount of buffered data available for reading. */ int available(final Context context) { // package protected for access from I/O streams - return context.buffer != null ? context.pos - context.readPos : 0; + return hasData(context) ? context.pos - context.readPos : 0; } /** @@ -632,7 +632,7 @@ public abstract class BaseNCodec implements BinaryEncoder, BinaryDecoder { * @return true if there is data still available for reading. */ boolean hasData(final Context context) { // package protected for access from I/O streams - return context.buffer != null; + return context.pos > context.readPos; } /** @@ -711,12 +711,16 @@ public abstract class BaseNCodec implements BinaryEncoder, BinaryDecoder { * @return The number of bytes successfully extracted into the provided byte[] array. */ int readResults(final byte[] b, final int bPos, final int bAvail, final Context context) { - if (context.buffer != null) { + if (hasData(context)) { final int len = Math.min(available(context), bAvail); System.arraycopy(context.buffer, context.readPos, b, bPos, len); context.readPos += len; - if (context.readPos >= context.pos) { - context.buffer = null; // so hasData() will return false, and this method can return -1 + if (!hasData(context)) { + // All data read. + // Reset position markers but do not set buffer to null to allow its reuse. + // hasData(context) will still return false, and this method will return 0 until + // more data is available, or -1 if EOF. + context.pos = context.readPos = 0; } return len; } diff --git a/src/main/java/org/apache/commons/codec/binary/BaseNCodecInputStream.java b/src/main/java/org/apache/commons/codec/binary/BaseNCodecInputStream.java index f225aab..29c6259 100644 --- a/src/main/java/org/apache/commons/codec/binary/BaseNCodecInputStream.java +++ b/src/main/java/org/apache/commons/codec/binary/BaseNCodecInputStream.java @@ -39,12 +39,15 @@ public class BaseNCodecInputStream extends FilterInputStream { private final byte[] singleByte = new byte[1]; + private final byte[] buf; + private final Context context = new Context(); protected BaseNCodecInputStream(final InputStream input, final BaseNCodec baseNCodec, final boolean doEncode) { super(input); this.doEncode = doEncode; this.baseNCodec = baseNCodec; + this.buf = new byte[doEncode ? 4096 : 8192]; } /** @@ -169,9 +172,11 @@ public class BaseNCodecInputStream extends FilterInputStream { ----- This is a fix for CODEC-101 */ - while (readLen == 0) { + // Attempt to read the request length + while (readLen < len) { if (!baseNCodec.hasData(context)) { - final byte[] buf = new byte[doEncode ? 4096 : 8192]; + // Obtain more data. + // buf is reused across calls to read to avoid repeated allocations final int c = in.read(buf); if (doEncode) { baseNCodec.encode(buf, 0, c, context); @@ -179,7 +184,12 @@ public class BaseNCodecInputStream extends FilterInputStream { baseNCodec.decode(buf, 0, c, context); } } - readLen = baseNCodec.readResults(array, offset, len, context); + final int read = baseNCodec.readResults(array, offset + readLen, len - readLen, context); + if (read < 0) { + // Return the amount read or EOF + return readLen != 0 ? readLen : -1; + } + readLen += read; } return readLen; } diff --git a/src/test/java/org/apache/commons/codec/binary/Base64InputStreamTest.java b/src/test/java/org/apache/commons/codec/binary/Base64InputStreamTest.java index 93172b9..cdbfc7c 100644 --- a/src/test/java/org/apache/commons/codec/binary/Base64InputStreamTest.java +++ b/src/test/java/org/apache/commons/codec/binary/Base64InputStreamTest.java @@ -409,6 +409,30 @@ public class Base64InputStreamTest { } /** + * Tests read using different buffer sizes + * + * @throws Exception + * for some failure scenarios. + */ + @Test + public void testReadMultipleBufferSizes() throws Exception { + final byte[][] randomData = BaseNTestData.randomData(new Base64(0, null, false), 1024 * 64); + byte[] encoded = randomData[1]; + byte[] decoded = randomData[0]; + final ByteArrayInputStream bin = new ByteArrayInputStream(encoded); + final ByteArrayOutputStream out = new ByteArrayOutputStream(); + try (final Base64InputStream in = new Base64InputStream(bin)) { + for (int i : new int[] { 4 * 1024, 4 * 1024, 8 * 1024, 8 * 1024, 16 * 1024, 16 * 1024, 8 * 1024 }) { + final byte[] buf = new byte[i]; + int bytesRead = in.read(buf); + assertEquals(i, bytesRead); + out.write(buf, 0, bytesRead); + } + } + assertArrayEquals(decoded, out.toByteArray()); + } + + /** * Tests read returning 0 * * @throws Exception