Author: ggregory Date: Mon Feb 4 12:20:41 2008 New Revision: 618419 URL: http://svn.apache.org/viewvc?rev=618419&view=rev Log: [CODEC-61] Base64.EncodeBase64() throws NegativeArraySizeException on large files. Throws IllegalArgumentException when the input array needs an output array bigger than Integer.MAX_VALUE.
Modified: commons/proper/codec/trunk/src/java/org/apache/commons/codec/binary/Base64.java Modified: commons/proper/codec/trunk/src/java/org/apache/commons/codec/binary/Base64.java URL: http://svn.apache.org/viewvc/commons/proper/codec/trunk/src/java/org/apache/commons/codec/binary/Base64.java?rev=618419&r1=618418&r2=618419&view=diff ============================================================================== --- commons/proper/codec/trunk/src/java/org/apache/commons/codec/binary/Base64.java (original) +++ commons/proper/codec/trunk/src/java/org/apache/commons/codec/binary/Base64.java Mon Feb 4 12:20:41 2008 @@ -13,7 +13,7 @@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. - */ + */ package org.apache.commons.codec.binary; @@ -25,10 +25,11 @@ /** * Provides Base64 encoding and decoding as defined by RFC 2045. * - * <p>This class implements section <cite>6.8. Base64 Content-Transfer-Encoding</cite> - * from RFC 2045 <cite>Multipurpose Internet Mail Extensions (MIME) Part One: - * Format of Internet Message Bodies</cite> by Freed and Borenstein.</p> - * + * <p> + * This class implements section <cite>6.8. Base64 Content-Transfer-Encoding</cite> from RFC 2045 <cite>Multipurpose + * Internet Mail Extensions (MIME) Part One: Format of Internet Message Bodies</cite> by Freed and Borenstein. + * </p> + * * @see <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045</a> * @author Apache Software Foundation * @since 1.0-dev @@ -39,8 +40,10 @@ /** * Chunk size per RFC 2045 section 6.8. * - * <p>The [EMAIL PROTECTED] character limit does not count the trailing CRLF, but counts - * all other characters, including any equal signs.</p> + * <p> + * The [EMAIL PROTECTED] character limit does not count the trailing CRLF, but counts all other characters, including any + * equal signs. + * </p> * * @see <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045 section 6.8</a> */ @@ -87,7 +90,7 @@ * Used to test the sign of a byte. */ private static final int SIGN = -128; - + /** * Byte used to pad output. */ @@ -104,7 +107,7 @@ * </p> */ private static final byte[] base64Alphabet = new byte[BASELENGTH]; - + /** * <p> * Contains the Base64 encodings <code>A</code> through <code>Z</code>, followed by <code>a</code> through @@ -157,7 +160,8 @@ /** * Returns whether or not the <code>octect</code> is in the base 64 alphabet. * - * @param octect The value to test + * @param octect + * The value to test * @return <code>true</code> if the value is defined in the the base 64 alphabet, <code>false</code> otherwise. */ private static boolean isBase64(byte octect) { @@ -171,12 +175,12 @@ } /** - * Tests a given byte array to see if it contains - * only valid characters within the Base64 alphabet. - * - * @param arrayOctect byte array to test - * @return <code>true</code> if all bytes are valid characters in the Base64 - * alphabet or if the byte array is empty; false, otherwise + * Tests a given byte array to see if it contains only valid characters within the Base64 alphabet. + * + * @param arrayOctect + * byte array to test + * @return <code>true</code> if all bytes are valid characters in the Base64 alphabet or if the byte array is + * empty; false, otherwise */ public static boolean isArrayByteBase64(byte[] arrayOctect) { @@ -197,10 +201,10 @@ } /** - * Encodes binary data using the base64 algorithm but - * does not chunk the output. - * - * @param binaryData binary data to encode + * Encodes binary data using the base64 algorithm but does not chunk the output. + * + * @param binaryData + * binary data to encode * @return Base64 characters */ public static byte[] encodeBase64(byte[] binaryData) { @@ -208,28 +212,25 @@ } /** - * Encodes binary data using the base64 algorithm and chunks - * the encoded output into 76 character blocks - * - * @param binaryData binary data to encode + * Encodes binary data using the base64 algorithm and chunks the encoded output into 76 character blocks + * + * @param binaryData + * binary data to encode * @return Base64 characters chunked in 76 character blocks */ public static byte[] encodeBase64Chunked(byte[] binaryData) { return encodeBase64(binaryData, true); } - /** - * Decodes an Object using the base64 algorithm. This method - * is provided in order to satisfy the requirements of the - * Decoder interface, and will throw a DecoderException if the - * supplied object is not of type byte[]. - * - * @param pObject Object to decode - * @return An object (of type byte[]) containing the - * binary data which corresponds to the byte[] supplied. - * @throws DecoderException if the parameter supplied is not - * of type byte[] + * Decodes an Object using the base64 algorithm. This method is provided in order to satisfy the requirements of the + * Decoder interface, and will throw a DecoderException if the supplied object is not of type byte[]. + * + * @param pObject + * Object to decode + * @return An object (of type byte[]) containing the binary data which corresponds to the byte[] supplied. + * @throws DecoderException + * if the parameter supplied is not of type byte[] */ public Object decode(Object pObject) throws DecoderException { if (!(pObject instanceof byte[])) { @@ -239,10 +240,10 @@ } /** - * Decodes a byte[] containing containing - * characters in the Base64 alphabet. - * - * @param pArray A byte array containing Base64 character data + * Decodes a byte[] containing containing characters in the Base64 alphabet. + * + * @param pArray + * A byte array containing Base64 character data * @return a byte array containing binary data */ public byte[] decode(byte[] pArray) { @@ -250,41 +251,48 @@ } /** - * Encodes binary data using the base64 algorithm, optionally - * chunking the output into 76 character blocks. - * - * @param binaryData Array containing binary data to encode. - * @param isChunked if <code>true</code> this encoder will chunk - * the base64 output into 76 character blocks + * Encodes binary data using the base64 algorithm, optionally chunking the output into 76 character blocks. + * + * @param binaryData + * Array containing binary data to encode. + * @param isChunked + * if <code>true</code> this encoder will chunk the base64 output into 76 character blocks * @return Base64-encoded data. + * @throws IllegalArgumentException + * Thrown when the input array needs an output array bigger than [EMAIL PROTECTED] Integer#MAX_VALUE} */ public static byte[] encodeBase64(byte[] binaryData, boolean isChunked) { - int lengthDataBits = binaryData.length * EIGHTBIT; - int fewerThan24bits = lengthDataBits % TWENTYFOURBITGROUP; - int numberTriplets = lengthDataBits / TWENTYFOURBITGROUP; - byte encodedData[] = null; - int encodedDataLength = 0; - int nbrChunks = 0; + long binaryDataLength = binaryData.length; + long lengthDataBits = binaryDataLength * EIGHTBIT; + long fewerThan24bits = lengthDataBits % TWENTYFOURBITGROUP; + long tripletCount = lengthDataBits / TWENTYFOURBITGROUP; + long encodedDataLengthLong = 0; + int chunckCount = 0; if (fewerThan24bits != 0) { - //data not divisible by 24 bit - encodedDataLength = (numberTriplets + 1) * 4; + // data not divisible by 24 bit + encodedDataLengthLong = (tripletCount + 1) * 4; } else { // 16 or 8 bit - encodedDataLength = numberTriplets * 4; + encodedDataLengthLong = tripletCount * 4; } - // If the output is to be "chunked" into 76 character sections, - // for compliance with RFC 2045 MIME, then it is important to + // If the output is to be "chunked" into 76 character sections, + // for compliance with RFC 2045 MIME, then it is important to // allow for extra length to account for the separator(s) if (isChunked) { - nbrChunks = - (CHUNK_SEPARATOR.length == 0 ? 0 : (int) Math.ceil((float) encodedDataLength / CHUNK_SIZE)); - encodedDataLength += nbrChunks * CHUNK_SEPARATOR.length; + chunckCount = (CHUNK_SEPARATOR.length == 0 ? 0 : (int) Math + .ceil((float) encodedDataLengthLong / CHUNK_SIZE)); + encodedDataLengthLong += chunckCount * CHUNK_SEPARATOR.length; } - encodedData = new byte[encodedDataLength]; + if (encodedDataLengthLong > Integer.MAX_VALUE) { + throw new IllegalArgumentException( + "Input array too big, output array would be bigger than Integer.MAX_VALUE=" + Integer.MAX_VALUE); + } + int encodedDataLength = (int) encodedDataLengthLong; + byte encodedData[] = new byte[encodedDataLength]; byte k = 0, l = 0, b1 = 0, b2 = 0, b3 = 0; @@ -294,33 +302,28 @@ int nextSeparatorIndex = CHUNK_SIZE; int chunksSoFar = 0; - //log.debug("number of triplets = " + numberTriplets); - for (i = 0; i < numberTriplets; i++) { + // log.debug("number of triplets = " + numberTriplets); + for (i = 0; i < tripletCount; i++) { dataIndex = i * 3; b1 = binaryData[dataIndex]; b2 = binaryData[dataIndex + 1]; b3 = binaryData[dataIndex + 2]; - //log.debug("b1= " + b1 +", b2= " + b2 + ", b3= " + b3); + // log.debug("b1= " + b1 +", b2= " + b2 + ", b3= " + b3); l = (byte) (b2 & 0x0f); k = (byte) (b1 & 0x03); - byte val1 = - ((b1 & SIGN) == 0) ? (byte) (b1 >> 2) : (byte) ((b1) >> 2 ^ 0xc0); - byte val2 = - ((b2 & SIGN) == 0) ? (byte) (b2 >> 4) : (byte) ((b2) >> 4 ^ 0xf0); - byte val3 = - ((b3 & SIGN) == 0) ? (byte) (b3 >> 6) : (byte) ((b3) >> 6 ^ 0xfc); + byte val1 = ((b1 & SIGN) == 0) ? (byte) (b1 >> 2) : (byte) ((b1) >> 2 ^ 0xc0); + byte val2 = ((b2 & SIGN) == 0) ? (byte) (b2 >> 4) : (byte) ((b2) >> 4 ^ 0xf0); + byte val3 = ((b3 & SIGN) == 0) ? (byte) (b3 >> 6) : (byte) ((b3) >> 6 ^ 0xfc); encodedData[encodedIndex] = lookUpBase64Alphabet[val1]; - //log.debug( "val2 = " + val2 ); - //log.debug( "k4 = " + (k<<4) ); - //log.debug( "vak = " + (val2 | (k<<4)) ); - encodedData[encodedIndex + 1] = - lookUpBase64Alphabet[val2 | (k << 4)]; - encodedData[encodedIndex + 2] = - lookUpBase64Alphabet[(l << 2) | val3]; + // log.debug( "val2 = " + val2 ); + // log.debug( "k4 = " + (k<<4) ); + // log.debug( "vak = " + (val2 | (k<<4)) ); + encodedData[encodedIndex + 1] = lookUpBase64Alphabet[val2 | (k << 4)]; + encodedData[encodedIndex + 2] = lookUpBase64Alphabet[(l << 2) | val3]; encodedData[encodedIndex + 3] = lookUpBase64Alphabet[b3 & 0x3f]; encodedIndex += 4; @@ -329,16 +332,9 @@ if (isChunked) { // this assumes that CHUNK_SIZE % 4 == 0 if (encodedIndex == nextSeparatorIndex) { - System.arraycopy( - CHUNK_SEPARATOR, - 0, - encodedData, - encodedIndex, - CHUNK_SEPARATOR.length); + System.arraycopy(CHUNK_SEPARATOR, 0, encodedData, encodedIndex, CHUNK_SEPARATOR.length); chunksSoFar++; - nextSeparatorIndex = - (CHUNK_SIZE * (chunksSoFar + 1)) + - (chunksSoFar * CHUNK_SEPARATOR.length); + nextSeparatorIndex = (CHUNK_SIZE * (chunksSoFar + 1)) + (chunksSoFar * CHUNK_SEPARATOR.length); encodedIndex += CHUNK_SEPARATOR.length; } } @@ -350,10 +346,9 @@ if (fewerThan24bits == EIGHTBIT) { b1 = binaryData[dataIndex]; k = (byte) (b1 & 0x03); - //log.debug("b1=" + b1); - //log.debug("b1<<2 = " + (b1>>2) ); - byte val1 = - ((b1 & SIGN) == 0) ? (byte) (b1 >> 2) : (byte) ((b1) >> 2 ^ 0xc0); + // log.debug("b1=" + b1); + // log.debug("b1<<2 = " + (b1>>2) ); + byte val1 = ((b1 & SIGN) == 0) ? (byte) (b1 >> 2) : (byte) ((b1) >> 2 ^ 0xc0); encodedData[encodedIndex] = lookUpBase64Alphabet[val1]; encodedData[encodedIndex + 1] = lookUpBase64Alphabet[k << 4]; encodedData[encodedIndex + 2] = PAD; @@ -365,27 +360,20 @@ l = (byte) (b2 & 0x0f); k = (byte) (b1 & 0x03); - byte val1 = - ((b1 & SIGN) == 0) ? (byte) (b1 >> 2) : (byte) ((b1) >> 2 ^ 0xc0); - byte val2 = - ((b2 & SIGN) == 0) ? (byte) (b2 >> 4) : (byte) ((b2) >> 4 ^ 0xf0); + byte val1 = ((b1 & SIGN) == 0) ? (byte) (b1 >> 2) : (byte) ((b1) >> 2 ^ 0xc0); + byte val2 = ((b2 & SIGN) == 0) ? (byte) (b2 >> 4) : (byte) ((b2) >> 4 ^ 0xf0); encodedData[encodedIndex] = lookUpBase64Alphabet[val1]; - encodedData[encodedIndex + 1] = - lookUpBase64Alphabet[val2 | (k << 4)]; + encodedData[encodedIndex + 1] = lookUpBase64Alphabet[val2 | (k << 4)]; encodedData[encodedIndex + 2] = lookUpBase64Alphabet[l << 2]; encodedData[encodedIndex + 3] = PAD; } if (isChunked) { // we also add a separator to the end of the final chunk. - if (chunksSoFar < nbrChunks) { - System.arraycopy( - CHUNK_SEPARATOR, - 0, - encodedData, - encodedDataLength - CHUNK_SEPARATOR.length, - CHUNK_SEPARATOR.length); + if (chunksSoFar < chunckCount) { + System.arraycopy(CHUNK_SEPARATOR, 0, encodedData, encodedDataLength - CHUNK_SEPARATOR.length, + CHUNK_SEPARATOR.length); } } @@ -394,8 +382,9 @@ /** * Decodes Base64 data into octects - * - * @param base64Data Byte array containing Base64 data + * + * @param base64Data + * Byte array containing Base64 data * @return Array containing decoded data. */ public static byte[] decodeBase64(byte[] base64Data) { @@ -426,59 +415,57 @@ } decodedData = new byte[lastData - numberQuadruple]; } - + for (int i = 0; i < numberQuadruple; i++) { dataIndex = i * 4; marker0 = base64Data[dataIndex + 2]; marker1 = base64Data[dataIndex + 3]; - + b1 = base64Alphabet[base64Data[dataIndex]]; b2 = base64Alphabet[base64Data[dataIndex + 1]]; - + if (marker0 != PAD && marker1 != PAD) { - //No PAD e.g 3cQl + // No PAD e.g 3cQl b3 = base64Alphabet[marker0]; b4 = base64Alphabet[marker1]; - + decodedData[encodedIndex] = (byte) (b1 << 2 | b2 >> 4); - decodedData[encodedIndex + 1] = - (byte) (((b2 & 0xf) << 4) | ((b3 >> 2) & 0xf)); + decodedData[encodedIndex + 1] = (byte) (((b2 & 0xf) << 4) | ((b3 >> 2) & 0xf)); decodedData[encodedIndex + 2] = (byte) (b3 << 6 | b4); } else if (marker0 == PAD) { - //Two PAD e.g. 3c[Pad][Pad] + // Two PAD e.g. 3c[Pad][Pad] decodedData[encodedIndex] = (byte) (b1 << 2 | b2 >> 4); } else if (marker1 == PAD) { - //One PAD e.g. 3cQ[Pad] + // One PAD e.g. 3cQ[Pad] b3 = base64Alphabet[marker0]; - + decodedData[encodedIndex] = (byte) (b1 << 2 | b2 >> 4); - decodedData[encodedIndex + 1] = - (byte) (((b2 & 0xf) << 4) | ((b3 >> 2) & 0xf)); + decodedData[encodedIndex + 1] = (byte) (((b2 & 0xf) << 4) | ((b3 >> 2) & 0xf)); } encodedIndex += 3; } return decodedData; } - + /** * Discards any whitespace from a base-64 encoded block. - * - * @param data The base-64 encoded data to discard the whitespace - * from. + * + * @param data + * The base-64 encoded data to discard the whitespace from. * @return The data, less whitespace (see RFC 2045). */ static byte[] discardWhitespace(byte[] data) { byte groomedData[] = new byte[data.length]; int bytesCopied = 0; - + for (int i = 0; i < data.length; i++) { switch (data[i]) { - case (byte) ' ' : - case (byte) '\n' : - case (byte) '\r' : - case (byte) '\t' : + case (byte) ' ' : + case (byte) '\n' : + case (byte) '\r' : + case (byte) '\t' : break; - default: + default : groomedData[bytesCopied++] = data[i]; } } @@ -491,12 +478,11 @@ } /** - * Discards any characters outside of the base64 alphabet, per - * the requirements on page 25 of RFC 2045 - "Any characters - * outside of the base64 alphabet are to be ignored in base64 - * encoded data." - * - * @param data The base-64 encoded data to groom + * Discards any characters outside of the base64 alphabet, per the requirements on page 25 of RFC 2045 - "Any + * characters outside of the base64 alphabet are to be ignored in base64 encoded data." + * + * @param data + * The base-64 encoded data to groom * @return The data, less non-base64 characters (see RFC 2045). */ static byte[] discardNonBase64(byte[] data) { @@ -516,34 +502,30 @@ return packedData; } - // Implementation of the Encoder Interface /** - * Encodes an Object using the base64 algorithm. This method - * is provided in order to satisfy the requirements of the - * Encoder interface, and will throw an EncoderException if the - * supplied object is not of type byte[]. - * - * @param pObject Object to encode - * @return An object (of type byte[]) containing the - * base64 encoded data which corresponds to the byte[] supplied. - * @throws EncoderException if the parameter supplied is not - * of type byte[] + * Encodes an Object using the base64 algorithm. This method is provided in order to satisfy the requirements of the + * Encoder interface, and will throw an EncoderException if the supplied object is not of type byte[]. + * + * @param pObject + * Object to encode + * @return An object (of type byte[]) containing the base64 encoded data which corresponds to the byte[] supplied. + * @throws EncoderException + * if the parameter supplied is not of type byte[] */ public Object encode(Object pObject) throws EncoderException { if (!(pObject instanceof byte[])) { - throw new EncoderException( - "Parameter supplied to Base64 encode is not a byte[]"); + throw new EncoderException("Parameter supplied to Base64 encode is not a byte[]"); } return encode((byte[]) pObject); } /** - * Encodes a byte[] containing binary data, into a byte[] containing - * characters in the Base64 alphabet. - * - * @param pArray a byte array containing binary data + * Encodes a byte[] containing binary data, into a byte[] containing characters in the Base64 alphabet. + * + * @param pArray + * a byte array containing binary data * @return A byte array containing only Base64 character data */ public byte[] encode(byte[] pArray) {