svn commit: r1447443 - in /commons/proper/codec/trunk/src: changes/ main/java/org/apache/commons/codec/binary/

julius Mon, 18 Feb 2013 11:14:55 -0800

Author: julius
Date: Mon Feb 18 19:14:31 2013
New Revision: 1447443

URL: http://svn.apache.org/r1447443
Log:
CODEC-166 - Base64 could be faster.


Added:
    
commons/proper/codec/trunk/src/main/java/org/apache/commons/codec/binary/ApacheModifiedMiGBase64.java
    
commons/proper/codec/trunk/src/main/java/org/apache/commons/codec/binary/MiGBase64.original
Modified:
    commons/proper/codec/trunk/src/changes/changes.xml
    
commons/proper/codec/trunk/src/main/java/org/apache/commons/codec/binary/Base64.java

Modified: commons/proper/codec/trunk/src/changes/changes.xml
URL: 
http://svn.apache.org/viewvc/commons/proper/codec/trunk/src/changes/changes.xml?rev=1447443&r1=1447442&r2=1447443&view=diff
==============================================================================
--- commons/proper/codec/trunk/src/changes/changes.xml (original)
+++ commons/proper/codec/trunk/src/changes/changes.xml Mon Feb 18 19:14:31 2013
@@ -48,6 +48,7 @@ The <action> type attribute can be add,u
     </release>
     -->
     <release version="1.8" date="TBA" description="Feature and fix release.">
+      <action dev="julius" type="fix" issue="CODEC-166">Base64 could be 
faster.</action>
       <action dev="julius" type="update" issue="CODEC-167">Adds JUnit to test 
our decode with pad character in the middle.</action> 
       <action dev="ggregory" type="add" issue="CODEC-161" due-to="crice">Add 
Match Rating Approach (MRA) phonetic algorithm encoder.</action>   
       <action dev="ggregory" type="fix" issue="CODEC-163" 
due-to="leo141">ColognePhonetic encoder unneccessarily creates many char arrays 
on every loop run.</action>   

Added: 
commons/proper/codec/trunk/src/main/java/org/apache/commons/codec/binary/ApacheModifiedMiGBase64.java
URL: 
http://svn.apache.org/viewvc/commons/proper/codec/trunk/src/main/java/org/apache/commons/codec/binary/ApacheModifiedMiGBase64.java?rev=1447443&view=auto
==============================================================================
--- 
commons/proper/codec/trunk/src/main/java/org/apache/commons/codec/binary/ApacheModifiedMiGBase64.java
 (added)
+++ 
commons/proper/codec/trunk/src/main/java/org/apache/commons/codec/binary/ApacheModifiedMiGBase64.java
 Mon Feb 18 19:14:31 2013
@@ -0,0 +1,550 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *
+ * NOTE ABOUT PROVENANCE:
+ * ----------------------
+ * This source file is called ApacheModifiedMiGBase64.java.
+ * We took the BSD-licensed MiGBase64.java file from SourceForge
+ * on January 28th, 2013 (http://migbase64.sourceforge.net/), and
+ * modified it to make it suitable for inclusion inside Apache
+ * Commons-Codec.
+ *
+ * The original file is licensed according to the BSD 2-clause
+ * license (see below, after the section titled "Licence (BSD)".
+ * You should also be able to obtain the original file as
+ * "MiGBase64.original" within the same source directory as this file.
+ *
+ */
+
+package org.apache.commons.codec.binary;
+
+import java.util.Arrays;
+
+/**
+ * <pre>====================================================</pre>
+ * Modified by Apache Software Foundation on February 18th, 2013, in the 
following ways:
+ * <p/>
+ * - Set all methods to "package" level visibility, since this is strictly
+ * meant to be back-end for our non-streaming Base64 implementation.
+ * (Streaming Base64 still uses our original implementation).
+ * <p/>
+ * - Added support for the Apache Commons Codec variations to make all the 
Commons-Codec
+ * unit tests pass:
+ * <ol>
+ * <li> Ability to alter line-length from default of 76</li>
+ * <li> If we are using line-separators, must always end with a 
line-separator, no matter
+ * length of final line.</li>
+ * <li> Make '=' and '==' padding optional when decoding.</li>
+ * <li> Make decoding of Base64 with inner padding (e.g., AA==AA==) consistent 
with Commons-Codec..</li>
+ * <li> Add support for URL-Safe Base64 alphabet (which, incidentally, omits 
'=' and '==' padding).</li>
+ * </ol>
+ * <p/>
+ * - And thus Apache Commons-Codec is now as fast as MiGBase64, since it uses 
MiGBase64 under the
+ * hood.  Yay!  (Non-streaming encode speed-up is around 200%).
+ * <p/>
+ * And now, back to your regular scheduled programming:
+ * <pre>====================================================</pre>
+ * <p/>
+ * A very fast and memory efficient class to encode and decode to and from 
BASE64 in full accordance
+ * with RFC 2045.<br><br>
+ * On Windows XP sp1 with 1.4.2_04 and later ;), this encoder and decoder is 
about 10 times faster
+ * on small arrays (10 - 1000 bytes) and 2-3 times as fast on larger arrays 
(10000 - 1000000 bytes)
+ * compared to <code>sun.misc.Encoder()/Decoder()</code>.<br><br>
+ * <p/>
+ * On byte arrays the encoder is about 20% faster than Jakarta Commons Codec 
for encode and
+ * about 50% faster for decoding large arrays. This implementation is about 
twice as fast on very small
+ * arrays (&lt 30 bytes). If source/destination is a <code>String</code> this
+ * version is about three times as fast due to the fact that the Commons Codec 
result has to be recoded
+ * to a <code>String</code> from <code>byte[]</code>, which is very 
expensive.<br><br>
+ * <p/>
+ * This encode/decode algorithm doesn't create any temporary arrays as many 
other codecs do, it only
+ * allocates the resulting array. This produces less garbage and it is 
possible to handle arrays twice
+ * as large as algorithms that create a temporary array. (E.g. Jakarta Commons 
Codec). It is unknown
+ * whether Sun's <code>sun.misc.Encoder()/Decoder()</code> produce temporary 
arrays but since performance
+ * is quite low it probably does.<br><br>
+ * <p/>
+ * The encoder produces the same output as the Sun one except that the Sun's 
encoder appends
+ * a trailing line separator if the last character isn't a pad. Unclear why 
but it only adds to the
+ * length and is probably a side effect. Both are in conformance with RFC 2045 
though.<br>
+ * Commons codec seem to always att a trailing line separator.<br><br>
+ * <p/>
+ * <b>Note!</b>
+ * The encode/decode method pairs (types) come in three versions with the 
<b>exact</b> same algorithm and
+ * thus a lot of code redundancy. This is to not create any temporary arrays 
for transcoding to/from different
+ * format types. The methods not used can simply be commented out.<br><br>
+ * <p/>
+ * <S>There is also a "fast" version of all decode methods that works the same 
way as the normal ones, but
+ * har a few demands on the decoded input. Normally though, these fast verions 
should be used if the source if
+ * the input is known and it hasn't bee tampered with.</S> (- removed for 
ApacheModifiedMiGBase64). <br><br>
+ * <p/>
+ * If you find the code useful or you find a bug, please send me a note at 
base64 @ miginfocom . com.
+ * <p/>
+ * Licence (BSD):
+ * ==============
+ * <p/>
+ * Copyright (c) 2004, Mikael Grev, MiG InfoCom AB. (base64 @ miginfocom . com)
+ * All rights reserved.
+ * <p/>
+ * Redistribution and use in source and binary forms, with or without 
modification,
+ * are permitted provided that the following conditions are met:
+ * Redistributions of source code must retain the above copyright notice, this 
list
+ * of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright notice, 
this
+ * list of conditions and the following disclaimer in the documentation and/or 
other
+ * materials provided with the distribution.
+ * Neither the name of the MiG InfoCom AB nor the names of its contributors 
may be
+ * used to endorse or promote products derived from this software without 
specific
+ * prior written permission.
+ * <p/>
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 
AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 
DISCLAIMED.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY 
DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 
(INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF 
USE, DATA,
+ * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 
LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR 
OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 
POSSIBILITY
+ * OF SUCH DAMAGE.
+ *
+ * @author Mikael Grev
+ *         Date: 2004-aug-02
+ *         Time: 11:31:11
+ * @version 2.2
+ */
+
+public final class ApacheModifiedMiGBase64 {
+    // Marked the class public so that it shows up in javadoc generation.  All 
methods are static "package" level.
+
+    private final static byte[] CRLF = {'\r', '\n'};
+
+    private static final char[] CA =
+            
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/".toCharArray();
+
+    private static final char[] CA_URL_SAFE =
+            
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_".toCharArray();
+
+    private static final int[] IA = new int[256];
+
+    /**
+     * Private constructor to prevent instantiation.
+     */
+    private ApacheModifiedMiGBase64() {}
+
+    static {
+        Arrays.fill(IA, -1);
+        for (int i = 0, iS = CA.length; i < iS; i++) {
+            IA[CA[i]] = i;
+
+            // Store the URL_SAFE values in the same IA array.
+            // This way we can auto-decode URL-SAFE or standard alphabet, 
without
+            // consumer needing to specify decode alphabet ahead of time:
+            IA[CA_URL_SAFE[i]] = i;
+        }
+        IA['='] = 0;
+    }
+
+    /**
+     * Decodes a BASE64 encoded char array. All illegal characters will be 
ignored and can handle both arrays with
+     * and without line separators.
+     *
+     * @param sArr The source array.
+     * @return The decoded array of bytes. May be of length 0.
+     */
+    static byte[] decode(final char[] sArr) {
+        final int sLen = sArr != null ? sArr.length : 0;
+        if (sLen == 0) {
+            return new byte[0];
+        }
+
+        // Find earliest pad character so that we can decode things like 
"AA==AA==" consistently.
+        int padPos = sLen;
+        int padCount = 0;
+        for (int i = sLen - 1; i >= 0; i--) {
+            if (sArr[i] == '=') {
+                padPos = i;
+                padCount = 1;
+                if (i + 1 < sLen && sArr[i + 1] == '=') {
+                    padCount = 2;
+                }
+            }
+        }
+
+        // Count illegal characters (including '\r', '\n') to know what size 
the returned array will be,
+        // so we don't have to reallocate & copy it later.
+        int sepCnt = 0; // Number of separator characters. (Actually illegal 
characters, but that's a bonus...)
+        for (int i = 0; i < padPos; i++) {
+            if (IA[sArr[i]] < 0) {
+                sepCnt++;
+            }
+        }
+
+        final int len = ((padPos + padCount - sepCnt) * 6 >> 3) - padCount;
+        if (len <= 0) {
+            return new byte[0];
+        }
+        final byte[] dArr = new byte[len];       // Preallocate byte[] of 
exact length
+        int d = 0;
+        int i = 0;
+        try {
+            for (int s = 0; d < len; ) {
+                // Assemble three bytes into an int from four "valid" 
characters.
+                i = 0;
+                for (int j = 0; j < 4; j++) {   // j only increased if a valid 
char was found.
+                    int c = IA[sArr[s++]];
+                    if (c >= 0) {
+                        i |= c << (18 - j * 6);
+                    } else {
+                        j--;
+                    }
+                }
+                // Add the bytes
+                dArr[d++] = (byte) (i >> 16);
+                if (d < len) {
+                    dArr[d++] = (byte) (i >> 8);
+                    if (d < len) {
+                        dArr[d++] = (byte) i;
+                    }
+                }
+            }
+        } catch (ArrayIndexOutOfBoundsException aioobe) {
+            // Handle url-safe input (with no padding).
+            dArr[d++] = (byte) (i >> 16);
+            if (d < len) {
+                dArr[d++] = (byte) (i >> 8);
+                if (d < len) {
+                    dArr[d] = (byte) i;
+                }
+            }
+        }
+        return dArr;
+    }
+
+    /**
+     * Decodes a BASE64 encoded byte array. All illegal characters will be 
ignored and can handle both arrays with
+     * and without line separators.
+     *
+     * @param sArr The source array.
+     * @return The decoded array of bytes. May be of length 0.
+     */
+    static byte[] decode(final byte[] sArr) {
+        final int sLen = sArr != null ? sArr.length : 0;
+        if (sLen == 0) {
+            return new byte[0];
+        }
+
+        // Find earliest pad character so that we can decode things like 
"AA==AA==" consistently.
+        int padPos = sLen;
+        int padCount = 0;
+        for (int i = sLen - 1; i >= 0; i--) {
+            if (sArr[i] == '=') {
+                padPos = i;
+                padCount = 1;
+                if (i + 1 < sLen && sArr[i + 1] == '=') {
+                    padCount = 2;
+                }
+            }
+        }
+
+        // Count illegal characters (including '\r', '\n') to know what size 
the returned array will be,
+        // so we don't have to reallocate & copy it later.
+        int sepCnt = 0; // Number of separator characters. (Actually illegal 
characters, but that's a bonus...)
+        for (int i = 0; i < padPos; i++) {
+            if (IA[sArr[i] & 0xff] < 0) {
+                sepCnt++;
+            }
+        }
+
+        final int len = ((padPos + padCount - sepCnt) * 6 >> 3) - padCount;
+        if (len <= 0) {
+            return new byte[0];
+        }
+        final byte[] dArr = new byte[len];       // Preallocate byte[] of 
exact length
+        int d = 0;
+        int i = 0;
+        try {
+            for (int s = 0; d < len; ) {
+                // Assemble three bytes into an int from four "valid" 
characters.
+                i = 0;
+                for (int j = 0; j < 4; j++) {   // j only increased if a valid 
char was found.
+                    int c = IA[sArr[s++] & 0xff];
+                    if (c >= 0) {
+                        i |= c << (18 - j * 6);
+                    } else {
+                        j--;
+                    }
+                }
+
+                // Add the bytes
+                dArr[d++] = (byte) (i >> 16);
+                if (d < len) {
+                    dArr[d++] = (byte) (i >> 8);
+                    if (d < len) {
+                        dArr[d++] = (byte) i;
+                    }
+                }
+            }
+        } catch (ArrayIndexOutOfBoundsException aioobe) {
+            // Handle url-safe input (with no padding).
+            dArr[d++] = (byte) (i >> 16);
+            if (d < len) {
+                dArr[d++] = (byte) (i >> 8);
+                if (d < len) {
+                    dArr[d] = (byte) i;
+                }
+            }
+        }
+        return dArr;
+    }
+
+    /**
+     * Encodes a raw byte array into a BASE64 <code>byte[]</code> 
representation i accordance with RFC 2045.
+     *
+     * @param sArr          The bytes to convert.
+     * @param lineSep       Optional "\r\n" after 76 characters, unless end of 
file.<br>
+     *                      No line separator will be in breach of RFC 2045 
which specifies max 76 per line but will be a
+     *                      little faster.
+     * @param urlSafe       If true, use the URL_SAFE base64 alphabet (-_) 
instead of the standard alphabet (+/).
+     * @param maxResultSize Largest size of result we are willing to encode 
(typically Integer.MAX_VALUE).
+     * @return A BASE64 encoded array.
+     */
+    static byte[] encodeToByte(byte[] sArr, final boolean lineSep, boolean 
urlSafe, int maxResultSize) {
+        return encodeToByte(sArr, lineSep, urlSafe, maxResultSize, CRLF, 76);
+    }
+
+    /**
+     * Encodes a raw byte array into a BASE64 <code>byte[]</code> 
representation i accordance with RFC 2045.
+     *
+     * @param sArr          The bytes to convert.
+     * @param lineSep       Optional "\r\n" after 76 characters, unless end of 
file.<br>
+     *                      No line separator will be in breach of RFC 2045 
which specifies max 76 per line but will be a
+     *                      little faster.
+     * @param urlSafe       If true, use the URL_SAFE base64 alphabet (-_) 
instead of the standard alphabet (+/).
+     * @param maxResultSize Largest size of result we are willing to encode 
(typically Integer.MAX_VALUE).
+     * @param lineSeparator Sequence of bytes to use as the line separator 
(typically {'\r','\n'}).  Ignored
+     *                      if <code>lineSep</code> is set to false.
+     * @param lineLen       Number of characters to write out per line before 
writing the lineSeparator
+     *                      sequence.  Ignored if <code>lineSep</code> is set 
to false.
+     * @return A BASE64 encoded array.
+     */
+    static byte[] encodeToByte(
+            final byte[] sArr, final boolean lineSep, final boolean urlSafe, 
final int maxResultSize,
+            final byte[] lineSeparator, final int lineLen
+    ) {
+        if (sArr == null || sArr.length == 0) { return sArr; }
+
+        final int sLen = sArr.length;
+        final int eLen = (sLen / 3) * 3;                        // Length of 
even 24-bits.
+        final int left = sLen - eLen;                           // A value 
between 0 and 2.
+        final int cCnt = ((sLen - 1) / 3 + 1) << 2;             // Returned 
character count
+        int dLen = cCnt + (lineSep ? (cCnt - 1) / lineLen * 
lineSeparator.length : 0); // Length of returned array
+
+        // org.apache.commons.binary.codec.Base64 always ends with CRLF in 
chunking mode.
+        if (lineSep) {
+            dLen += lineSeparator.length;
+        }
+
+        final char[] ENCODE_ARRAY = urlSafe ? 
ApacheModifiedMiGBase64.CA_URL_SAFE : ApacheModifiedMiGBase64.CA;
+        if (urlSafe && left > 0) {
+            dLen--;
+            if (left != 2) {
+                dLen--;
+            }
+        }
+        checkLen(dLen, maxResultSize);
+        final byte[] dArr = new byte[dLen];
+
+        // Encode even 24-bits
+        int charCount = 0;
+        for (int s = 0, d = 0; s < eLen; ) {
+            // Copy next three bytes into lower 24 bits of int, paying 
attension to sign.
+            int i = (sArr[s++] & 0xff) << 16 | (sArr[s++] & 0xff) << 8 | 
(sArr[s++] & 0xff);
+
+            // Encode the int into four chars
+            dArr[d++] = (byte) ENCODE_ARRAY[(i >>> 18) & 0x3f];
+            dArr[d++] = (byte) ENCODE_ARRAY[(i >>> 12) & 0x3f];
+            dArr[d++] = (byte) ENCODE_ARRAY[(i >>> 6) & 0x3f];
+            dArr[d++] = (byte) ENCODE_ARRAY[i & 0x3f];
+            charCount += 4;
+
+            // Add optional line separator
+            if (lineSep && charCount % lineLen <= 3 && d < dLen - 
lineSeparator.length) {
+                System.arraycopy(lineSeparator, 0, dArr, d, 
lineSeparator.length);
+                d += lineSeparator.length;
+            }
+        }
+
+        // Make space for our final CRLF.
+        if (lineSep) {
+            dLen -= lineSeparator.length;
+        }
+
+        // Pad and encode last bits if source isn't an even 24 bits.
+        if (left > 0) {
+            // Prepare the int
+            int i = ((sArr[eLen] & 0xff) << 10) | (left == 2 ? ((sArr[sLen - 
1] & 0xff) << 2) : 0);
+
+            // Set last four chars
+            // (url-safe omits the '=' padding).
+            if (urlSafe && left == 2) {
+                dArr[dLen - 3] = (byte) ENCODE_ARRAY[i >> 12];
+                dArr[dLen - 2] = (byte) ENCODE_ARRAY[(i >>> 6) & 0x3f];
+                dArr[dLen - 1] = (byte) ENCODE_ARRAY[i & 0x3f];
+            } else if (urlSafe) {
+                dArr[dLen - 2] = (byte) ENCODE_ARRAY[i >> 12];
+                dArr[dLen - 1] = (byte) ENCODE_ARRAY[(i >>> 6) & 0x3f];
+            } else {
+                dArr[dLen - 4] = (byte) ENCODE_ARRAY[i >> 12];
+                dArr[dLen - 3] = (byte) ENCODE_ARRAY[(i >>> 6) & 0x3f];
+                dArr[dLen - 2] = (byte) (left == 2 ? ENCODE_ARRAY[i & 0x3f] : 
'=');
+                dArr[dLen - 1] = '=';
+            }
+        }
+
+        // And now we append our final CRLF if necessary.
+        if (lineSep) {
+            dLen += lineSeparator.length;
+            System.arraycopy(lineSeparator, 0, dArr, dLen - 
lineSeparator.length, lineSeparator.length);
+        }
+        return dArr;
+    }
+
+    /**
+     * Encodes a raw byte array into a BASE64 <code>char[]</code> 
representation in accordance with RFC 2045.
+     *
+     * @param sArr          The bytes to convert.
+     * @param lineSep       Optional "\r\n" after 76 characters, unless end of 
file.<br>
+     *                      No line separator will be in breach of RFC 2045 
which specifies max 76 per line but will be a
+     *                      little faster.
+     * @param urlSafe       If true, use the URL_SAFE base64 alphabet (-_) 
instead of the standard alphabet (+/).
+     * @param maxResultSize Largest size of result we are willing to encode 
(typically Integer.MAX_VALUE).
+     * @return A BASE64 encoded array.
+     */
+    static char[] encodeToChar(
+            final byte[] sArr, final boolean lineSep, final boolean urlSafe, 
final int maxResultSize
+    ) {
+        if (sArr == null) { return null; }
+        if (sArr.length == 0) { return new char[0]; }
+
+        final int sLen = sArr.length;
+        final int eLen = (sLen / 3) * 3;                        // Length of 
even 24-bits.
+        final int left = sLen - eLen;                           // A value 
between 0 and 2.
+        final int cCnt = ((sLen - 1) / 3 + 1) << 2;             // Returned 
character count
+        int dLen = cCnt + (lineSep ? (cCnt - 1) / 76 << 1 : 0); // Length of 
returned array
+
+        // org.apache.commons.binary.codec.Base64 always ends with CRLF in 
chunking mode.
+        if (lineSep) {
+            dLen += 2;
+        }
+
+        final char[] ENCODE_ARRAY = urlSafe ? 
ApacheModifiedMiGBase64.CA_URL_SAFE : ApacheModifiedMiGBase64.CA;
+        if (urlSafe && left > 0) {
+            dLen--;
+            if (left != 2) {
+                dLen--;
+            }
+        }
+        checkLen(dLen, maxResultSize);
+        final char[] dArr = new char[dLen];
+
+        // Encode even 24-bits
+        for (int s = 0, d = 0, cc = 0; s < eLen; ) {
+            // Copy next three bytes into lower 24 bits of int, paying 
attension to sign.
+            int i = (sArr[s++] & 0xff) << 16 | (sArr[s++] & 0xff) << 8 | 
(sArr[s++] & 0xff);
+
+            // Encode the int into four chars
+            dArr[d++] = ENCODE_ARRAY[(i >>> 18) & 0x3f];
+            dArr[d++] = ENCODE_ARRAY[(i >>> 12) & 0x3f];
+            dArr[d++] = ENCODE_ARRAY[(i >>> 6) & 0x3f];
+            dArr[d++] = ENCODE_ARRAY[i & 0x3f];
+
+            // Add optional line separator
+            if (lineSep && ++cc == 19 && d < dLen - 2) {
+                dArr[d++] = '\r';
+                dArr[d++] = '\n';
+                cc = 0;
+            }
+        }
+
+        // Make space for our final CRLF.
+        if (lineSep) {
+            dLen -= 2;
+        }
+
+        // Pad and encode last bits if source isn't even 24 bits.
+        if (left > 0) {
+            // Prepare the int
+            int i = ((sArr[eLen] & 0xff) << 10) | (left == 2 ? ((sArr[sLen - 
1] & 0xff) << 2) : 0);
+
+            // Set last four chars
+            // (url-safe omits the '=' padding).
+            if (urlSafe && left == 2) {
+                dArr[dLen - 3] = ENCODE_ARRAY[i >> 12];
+                dArr[dLen - 2] = ENCODE_ARRAY[(i >>> 6) & 0x3f];
+                dArr[dLen - 1] = ENCODE_ARRAY[i & 0x3f];
+            } else if (urlSafe) {
+                dArr[dLen - 2] = ENCODE_ARRAY[i >> 12];
+                dArr[dLen - 1] = ENCODE_ARRAY[(i >>> 6) & 0x3f];
+            } else {
+                dArr[dLen - 4] = ENCODE_ARRAY[i >> 12];
+                dArr[dLen - 3] = ENCODE_ARRAY[(i >>> 6) & 0x3f];
+                dArr[dLen - 2] = left == 2 ? ENCODE_ARRAY[i & 0x3f] : '=';
+                dArr[dLen - 1] = '=';
+            }
+        }
+
+        // And now we append our final CRLF if necessary.
+        if (lineSep) {
+            dLen += 2;
+            dArr[dLen - 2] = '\r';
+            dArr[dLen - 1] = '\n';
+        }
+        return dArr;
+    }
+
+    /**
+     * Encodes a raw byte array into a BASE64 <code>String</code> 
representation i accordance with RFC 2045.
+     *
+     * @param sArr          The bytes to convert.
+     * @param lineSep       Optional "\r\n" after 76 characters, unless end of 
file.<br>
+     *                      No line separator will be in breach of RFC 2045 
which specifies max 76 per line but will be a
+     *                      little faster.
+     * @param urlSafe       If true, use the URL_SAFE base64 alphabet (-_) 
instead of the standard alphabet (+/).
+     * @param maxResultSize Largest size of result we are willing to encode 
(typically Integer.MAX_VALUE).
+     * @return A BASE64 encoded array.
+     */
+    static String encodeToString(
+            byte[] sArr, boolean lineSep, boolean urlSafe, int maxResultSize
+    ) {
+        if (sArr == null) { return null; }
+        if (sArr.length == 0) { return ""; }
+
+        // Reuse char[] since we can't create a String incrementally anyway 
and StringBuffer/Builder would be slower.
+        return new String(encodeToChar(sArr, lineSep, urlSafe, maxResultSize));
+    }
+
+
+    private static void checkLen(int dLen, int maxResultSize) {
+        if (dLen > maxResultSize) {
+            throw new IllegalArgumentException("Input array too big, the 
output array would be bigger (" +
+                    dLen +
+                    ") than the specified maximum size of " +
+                    maxResultSize);
+        }
+    }
+
+}
\ No newline at end of file

Modified: 
commons/proper/codec/trunk/src/main/java/org/apache/commons/codec/binary/Base64.java
URL: 
http://svn.apache.org/viewvc/commons/proper/codec/trunk/src/main/java/org/apache/commons/codec/binary/Base64.java?rev=1447443&r1=1447442&r2=1447443&view=diff
==============================================================================
--- 
commons/proper/codec/trunk/src/main/java/org/apache/commons/codec/binary/Base64.java
 (original)
+++ 
commons/proper/codec/trunk/src/main/java/org/apache/commons/codec/binary/Base64.java
 Mon Feb 18 19:14:31 2013
@@ -480,6 +480,25 @@ public class Base64 extends BaseNCodec {
         }
     }
 
+    @Override
+    public byte[] decode(final byte[] pArray) {
+        if (pArray == null || pArray.length == 0) {
+            return pArray;
+        }
+        return ApacheModifiedMiGBase64.decode(pArray);
+    }
+
+    @Override
+    public byte[] encode(final byte[] pArray) {
+        if (pArray == null || pArray.length == 0) {
+            return pArray;
+        }
+        return ApacheModifiedMiGBase64.encodeToByte(
+                pArray, lineSeparator != null, isUrlSafe(), Integer.MAX_VALUE, 
lineSeparator, lineLength
+        );
+    }
+
+
     /**
      * Tests a given byte array to see if it contains only valid characters 
within the Base64 alphabet. Currently the
      * method treats whitespace as valid.
@@ -563,7 +582,9 @@ public class Base64 extends BaseNCodec {
      * @since 1.4 (NOTE:  1.4 chunked the output, whereas 1.5 does not).
      */
     public static String encodeBase64String(final byte[] binaryData) {
-        return StringUtils.newStringUtf8(encodeBase64(binaryData, false));
+        return ApacheModifiedMiGBase64.encodeToString(
+                binaryData, false, false, Integer.MAX_VALUE
+        );
     }
 
     /**
@@ -589,7 +610,9 @@ public class Base64 extends BaseNCodec {
      * @since 1.4
      */
     public static String encodeBase64URLSafeString(final byte[] binaryData) {
-        return StringUtils.newStringUtf8(encodeBase64(binaryData, false, 
true));
+        return ApacheModifiedMiGBase64.encodeToString(
+                binaryData, false, true, Integer.MAX_VALUE
+        );
     }
 
     /**
@@ -656,24 +679,12 @@ public class Base64 extends BaseNCodec {
      */
     public static byte[] encodeBase64(final byte[] binaryData, final boolean 
isChunked,
                                       final boolean urlSafe, final int 
maxResultSize) {
-        if (binaryData == null || binaryData.length == 0) {
-            return binaryData;
-        }
-
-        // Create this so can use the super-class method
-        // Also ensures that the same roundings are performed by the ctor and 
the code
-        final Base64 b64 = isChunked ? new Base64(urlSafe) : new Base64(0, 
CHUNK_SEPARATOR, urlSafe);
-        final long len = b64.getEncodedLength(binaryData);
-        if (len > maxResultSize) {
-            throw new IllegalArgumentException("Input array too big, the 
output array would be bigger (" +
-                len +
-                ") than the specified maximum size of " +
-                maxResultSize);
-        }
-
-        return b64.encode(binaryData);
+        return ApacheModifiedMiGBase64.encodeToByte(
+                binaryData, isChunked, urlSafe, maxResultSize
+        );
     }
 
+
     /**
      * Decodes a Base64 String into octets
      *
@@ -683,7 +694,10 @@ public class Base64 extends BaseNCodec {
      * @since 1.4
      */
     public static byte[] decodeBase64(final String base64String) {
-        return new Base64().decode(base64String);
+        if (base64String == null) { return null; }
+        if ("".equals(base64String)) { return new byte[0]; }
+
+        return ApacheModifiedMiGBase64.decode(base64String.toCharArray());
     }
 
     /**
@@ -694,7 +708,9 @@ public class Base64 extends BaseNCodec {
      * @return Array containing decoded data.
      */
     public static byte[] decodeBase64(final byte[] base64Data) {
-        return new Base64().decode(base64Data);
+        if (base64Data == null || base64Data.length == 0) { return base64Data; 
}
+
+        return ApacheModifiedMiGBase64.decode(base64Data);
     }
 
     // Implementation of the Encoder Interface

Added: 
commons/proper/codec/trunk/src/main/java/org/apache/commons/codec/binary/MiGBase64.original
URL: 
http://svn.apache.org/viewvc/commons/proper/codec/trunk/src/main/java/org/apache/commons/codec/binary/MiGBase64.original?rev=1447443&view=auto
==============================================================================
--- 
commons/proper/codec/trunk/src/main/java/org/apache/commons/codec/binary/MiGBase64.original
 (added)
+++ 
commons/proper/codec/trunk/src/main/java/org/apache/commons/codec/binary/MiGBase64.original
 Mon Feb 18 19:14:31 2013
@@ -0,0 +1,590 @@
+
+// Okay, it's slightly different than true original MiGBase64.java:
+// - This comment added, and the code was reformatted using default
+//   code formatting options in IntelliJ IDEA 10.5. (Jan 28th, 2013)
+
+package util;
+
+import java.util.Arrays;
+
+/**
+ * A very fast and memory efficient class to encode and decode to and from 
BASE64 in full accordance
+ * with RFC 2045.<br><br>
+ * On Windows XP sp1 with 1.4.2_04 and later ;), this encoder and decoder is 
about 10 times faster
+ * on small arrays (10 - 1000 bytes) and 2-3 times as fast on larger arrays 
(10000 - 1000000 bytes)
+ * compared to <code>sun.misc.Encoder()/Decoder()</code>.<br><br>
+ * <p/>
+ * On byte arrays the encoder is about 20% faster than Jakarta Commons Codec 
for encode and
+ * about 50% faster for decoding large arrays. This implementation is about 
twice as fast on very small
+ * arrays (&lt 30 bytes). If source/destination is a <code>String</code> this
+ * version is about three times as fast due to the fact that the Commons Codec 
result has to be recoded
+ * to a <code>String</code> from <code>byte[]</code>, which is very 
expensive.<br><br>
+ * <p/>
+ * This encode/decode algorithm doesn't create any temporary arrays as many 
other codecs do, it only
+ * allocates the resulting array. This produces less garbage and it is 
possible to handle arrays twice
+ * as large as algorithms that create a temporary array. (E.g. Jakarta Commons 
Codec). It is unknown
+ * whether Sun's <code>sun.misc.Encoder()/Decoder()</code> produce temporary 
arrays but since performance
+ * is quite low it probably does.<br><br>
+ * <p/>
+ * The encoder produces the same output as the Sun one except that the Sun's 
encoder appends
+ * a trailing line separator if the last character isn't a pad. Unclear why 
but it only adds to the
+ * length and is probably a side effect. Both are in conformance with RFC 2045 
though.<br>
+ * Commons codec seem to always att a trailing line separator.<br><br>
+ * <p/>
+ * <b>Note!</b>
+ * The encode/decode method pairs (types) come in three versions with the 
<b>exact</b> same algorithm and
+ * thus a lot of code redundancy. This is to not create any temporary arrays 
for transcoding to/from different
+ * format types. The methods not used can simply be commented out.<br><br>
+ * <p/>
+ * There is also a "fast" version of all decode methods that works the same 
way as the normal ones, but
+ * har a few demands on the decoded input. Normally though, these fast verions 
should be used if the source if
+ * the input is known and it hasn't bee tampered with.<br><br>
+ * <p/>
+ * If you find the code useful or you find a bug, please send me a note at 
base64 @ miginfocom . com.
+ * <p/>
+ * Licence (BSD):
+ * ==============
+ * <p/>
+ * Copyright (c) 2004, Mikael Grev, MiG InfoCom AB. (base64 @ miginfocom . com)
+ * All rights reserved.
+ * <p/>
+ * Redistribution and use in source and binary forms, with or without 
modification,
+ * are permitted provided that the following conditions are met:
+ * Redistributions of source code must retain the above copyright notice, this 
list
+ * of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright notice, 
this
+ * list of conditions and the following disclaimer in the documentation and/or 
other
+ * materials provided with the distribution.
+ * Neither the name of the MiG InfoCom AB nor the names of its contributors 
may be
+ * used to endorse or promote products derived from this software without 
specific
+ * prior written permission.
+ * <p/>
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 
AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 
DISCLAIMED.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY 
DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 
(INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF 
USE, DATA,
+ * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 
LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR 
OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 
POSSIBILITY
+ * OF SUCH DAMAGE.
+ *
+ * @author Mikael Grev
+ *         Date: 2004-aug-02
+ *         Time: 11:31:11
+ * @version 2.2
+ */
+
+public class MiGBase64 {
+    private static final char[] CA = 
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/".toCharArray();
+    private static final int[] IA = new int[256];
+
+    static {
+        Arrays.fill(IA, -1);
+        for (int i = 0, iS = CA.length; i < iS; i++)
+            IA[CA[i]] = i;
+        IA['='] = 0;
+    }
+
+    // 
****************************************************************************************
+    // *  char[] version
+    // 
****************************************************************************************
+
+    /**
+     * Encodes a raw byte array into a BASE64 <code>char[]</code> 
representation i accordance with RFC 2045.
+     *
+     * @param sArr    The bytes to convert. If <code>null</code> or length 0 
an empty array will be returned.
+     * @param lineSep Optional "\r\n" after 76 characters, unless end of 
file.<br>
+     *                No line separator will be in breach of RFC 2045 which 
specifies max 76 per line but will be a
+     *                little faster.
+     * @return A BASE64 encoded array. Never <code>null</code>.
+     */
+    public final static char[] encodeToChar(byte[] sArr, boolean lineSep) {
+        // Check special case
+        int sLen = sArr != null ? sArr.length : 0;
+        if (sLen == 0)
+            return new char[0];
+
+        int eLen = (sLen / 3) * 3;              // Length of even 24-bits.
+        int cCnt = ((sLen - 1) / 3 + 1) << 2;   // Returned character count
+        int dLen = cCnt + (lineSep ? (cCnt - 1) / 76 << 1 : 0); // Length of 
returned array
+        char[] dArr = new char[dLen];
+
+        // Encode even 24-bits
+        for (int s = 0, d = 0, cc = 0; s < eLen; ) {
+            // Copy next three bytes into lower 24 bits of int, paying 
attension to sign.
+            int i = (sArr[s++] & 0xff) << 16 | (sArr[s++] & 0xff) << 8 | 
(sArr[s++] & 0xff);
+
+            // Encode the int into four chars
+            dArr[d++] = CA[(i >>> 18) & 0x3f];
+            dArr[d++] = CA[(i >>> 12) & 0x3f];
+            dArr[d++] = CA[(i >>> 6) & 0x3f];
+            dArr[d++] = CA[i & 0x3f];
+
+            // Add optional line separator
+            if (lineSep && ++cc == 19 && d < dLen - 2) {
+                dArr[d++] = '\r';
+                dArr[d++] = '\n';
+                cc = 0;
+            }
+        }
+
+        // Pad and encode last bits if source isn't even 24 bits.
+        int left = sLen - eLen; // 0 - 2.
+        if (left > 0) {
+            // Prepare the int
+            int i = ((sArr[eLen] & 0xff) << 10) | (left == 2 ? ((sArr[sLen - 
1] & 0xff) << 2) : 0);
+
+            // Set last four chars
+            dArr[dLen - 4] = CA[i >> 12];
+            dArr[dLen - 3] = CA[(i >>> 6) & 0x3f];
+            dArr[dLen - 2] = left == 2 ? CA[i & 0x3f] : '=';
+            dArr[dLen - 1] = '=';
+        }
+        return dArr;
+    }
+
+    /**
+     * Decodes a BASE64 encoded char array. All illegal characters will be 
ignored and can handle both arrays with
+     * and without line separators.
+     *
+     * @param sArr The source array. <code>null</code> or length 0 will return 
an empty array.
+     * @return The decoded array of bytes. May be of length 0. Will be 
<code>null</code> if the legal characters
+     *         (including '=') isn't divideable by 4.  (I.e. definitely 
corrupted).
+     */
+    public final static byte[] decode(char[] sArr) {
+        // Check special case
+        int sLen = sArr != null ? sArr.length : 0;
+        if (sLen == 0)
+            return new byte[0];
+
+        // Count illegal characters (including '\r', '\n') to know what size 
the returned array will be,
+        // so we don't have to reallocate & copy it later.
+        int sepCnt = 0; // Number of separator characters. (Actually illegal 
characters, but that's a bonus...)
+        for (int i = 0; i < sLen; i++)  // If input is "pure" (I.e. no line 
separators or illegal chars) base64 this loop can be commented out.
+            if (IA[sArr[i]] < 0)
+                sepCnt++;
+
+        // Check so that legal chars (including '=') are evenly divideable by 
4 as specified in RFC 2045.
+        if ((sLen - sepCnt) % 4 != 0)
+            return null;
+
+        int pad = 0;
+        for (int i = sLen; i > 1 && IA[sArr[--i]] <= 0; )
+            if (sArr[i] == '=')
+                pad++;
+
+        int len = ((sLen - sepCnt) * 6 >> 3) - pad;
+
+        byte[] dArr = new byte[len];       // Preallocate byte[] of exact 
length
+
+        for (int s = 0, d = 0; d < len; ) {
+            // Assemble three bytes into an int from four "valid" characters.
+            int i = 0;
+            for (int j = 0; j < 4; j++) {   // j only increased if a valid 
char was found.
+                int c = IA[sArr[s++]];
+                if (c >= 0)
+                    i |= c << (18 - j * 6);
+                else
+                    j--;
+            }
+            // Add the bytes
+            dArr[d++] = (byte) (i >> 16);
+            if (d < len) {
+                dArr[d++] = (byte) (i >> 8);
+                if (d < len)
+                    dArr[d++] = (byte) i;
+            }
+        }
+        return dArr;
+    }
+
+    /**
+     * Decodes a BASE64 encoded char array that is known to be resonably well 
formatted. The method is about twice as
+     * fast as {@link #decode(char[])}. The preconditions are:<br>
+     * + The array must have a line length of 76 chars OR no line separators 
at all (one line).<br>
+     * + Line separator must be "\r\n", as specified in RFC 2045
+     * + The array must not contain illegal characters within the encoded 
string<br>
+     * + The array CAN have illegal characters at the beginning and end, those 
will be dealt with appropriately.<br>
+     *
+     * @param sArr The source array. Length 0 will return an empty array. 
<code>null</code> will throw an exception.
+     * @return The decoded array of bytes. May be of length 0.
+     */
+    public final static byte[] decodeFast(char[] sArr) {
+        // Check special case
+        int sLen = sArr.length;
+        if (sLen == 0)
+            return new byte[0];
+
+        int sIx = 0, eIx = sLen - 1;    // Start and end index after trimming.
+
+        // Trim illegal chars from start
+        while (sIx < eIx && IA[sArr[sIx]] < 0)
+            sIx++;
+
+        // Trim illegal chars from end
+        while (eIx > 0 && IA[sArr[eIx]] < 0)
+            eIx--;
+
+        // get the padding count (=) (0, 1 or 2)
+        int pad = sArr[eIx] == '=' ? (sArr[eIx - 1] == '=' ? 2 : 1) : 0;  // 
Count '=' at end.
+        int cCnt = eIx - sIx + 1;   // Content count including possible 
separators
+        int sepCnt = sLen > 76 ? (sArr[76] == '\r' ? cCnt / 78 : 0) << 1 : 0;
+
+        int len = ((cCnt - sepCnt) * 6 >> 3) - pad; // The number of decoded 
bytes
+        byte[] dArr = new byte[len];       // Preallocate byte[] of exact 
length
+
+        // Decode all but the last 0 - 2 bytes.
+        int d = 0;
+        for (int cc = 0, eLen = (len / 3) * 3; d < eLen; ) {
+            // Assemble three bytes into an int from four "valid" characters.
+            int i = IA[sArr[sIx++]] << 18 | IA[sArr[sIx++]] << 12 | 
IA[sArr[sIx++]] << 6 | IA[sArr[sIx++]];
+
+            // Add the bytes
+            dArr[d++] = (byte) (i >> 16);
+            dArr[d++] = (byte) (i >> 8);
+            dArr[d++] = (byte) i;
+
+            // If line separator, jump over it.
+            if (sepCnt > 0 && ++cc == 19) {
+                sIx += 2;
+                cc = 0;
+            }
+        }
+
+        if (d < len) {
+            // Decode last 1-3 bytes (incl '=') into 1-3 bytes
+            int i = 0;
+            for (int j = 0; sIx <= eIx - pad; j++)
+                i |= IA[sArr[sIx++]] << (18 - j * 6);
+
+            for (int r = 16; d < len; r -= 8)
+                dArr[d++] = (byte) (i >> r);
+        }
+
+        return dArr;
+    }
+
+    // 
****************************************************************************************
+    // *  byte[] version
+    // 
****************************************************************************************
+
+    /**
+     * Encodes a raw byte array into a BASE64 <code>byte[]</code> 
representation i accordance with RFC 2045.
+     *
+     * @param sArr    The bytes to convert. If <code>null</code> or length 0 
an empty array will be returned.
+     * @param lineSep Optional "\r\n" after 76 characters, unless end of 
file.<br>
+     *                No line separator will be in breach of RFC 2045 which 
specifies max 76 per line but will be a
+     *                little faster.
+     * @return A BASE64 encoded array. Never <code>null</code>.
+     */
+    public final static byte[] encodeToByte(byte[] sArr, boolean lineSep) {
+        // Check special case
+        int sLen = sArr != null ? sArr.length : 0;
+        if (sLen == 0)
+            return new byte[0];
+
+        int eLen = (sLen / 3) * 3;                              // Length of 
even 24-bits.
+        int cCnt = ((sLen - 1) / 3 + 1) << 2;                   // Returned 
character count
+        int dLen = cCnt + (lineSep ? (cCnt - 1) / 76 << 1 : 0); // Length of 
returned array
+        byte[] dArr = new byte[dLen];
+
+        // Encode even 24-bits
+        for (int s = 0, d = 0, cc = 0; s < eLen; ) {
+            // Copy next three bytes into lower 24 bits of int, paying 
attension to sign.
+            int i = (sArr[s++] & 0xff) << 16 | (sArr[s++] & 0xff) << 8 | 
(sArr[s++] & 0xff);
+
+            // Encode the int into four chars
+            dArr[d++] = (byte) CA[(i >>> 18) & 0x3f];
+            dArr[d++] = (byte) CA[(i >>> 12) & 0x3f];
+            dArr[d++] = (byte) CA[(i >>> 6) & 0x3f];
+            dArr[d++] = (byte) CA[i & 0x3f];
+
+            // Add optional line separator
+            if (lineSep && ++cc == 19 && d < dLen - 2) {
+                dArr[d++] = '\r';
+                dArr[d++] = '\n';
+                cc = 0;
+            }
+        }
+
+        // Pad and encode last bits if source isn't an even 24 bits.
+        int left = sLen - eLen; // 0 - 2.
+        if (left > 0) {
+            // Prepare the int
+            int i = ((sArr[eLen] & 0xff) << 10) | (left == 2 ? ((sArr[sLen - 
1] & 0xff) << 2) : 0);
+
+            // Set last four chars
+            dArr[dLen - 4] = (byte) CA[i >> 12];
+            dArr[dLen - 3] = (byte) CA[(i >>> 6) & 0x3f];
+            dArr[dLen - 2] = left == 2 ? (byte) CA[i & 0x3f] : (byte) '=';
+            dArr[dLen - 1] = '=';
+        }
+        return dArr;
+    }
+
+    /**
+     * Decodes a BASE64 encoded byte array. All illegal characters will be 
ignored and can handle both arrays with
+     * and without line separators.
+     *
+     * @param sArr The source array. Length 0 will return an empty array. 
<code>null</code> will throw an exception.
+     * @return The decoded array of bytes. May be of length 0. Will be 
<code>null</code> if the legal characters
+     *         (including '=') isn't divideable by 4. (I.e. definitely 
corrupted).
+     */
+    public final static byte[] decode(byte[] sArr) {
+        // Check special case
+        int sLen = sArr.length;
+
+        // Count illegal characters (including '\r', '\n') to know what size 
the returned array will be,
+        // so we don't have to reallocate & copy it later.
+        int sepCnt = 0; // Number of separator characters. (Actually illegal 
characters, but that's a bonus...)
+        for (int i = 0; i < sLen; i++)      // If input is "pure" (I.e. no 
line separators or illegal chars) base64 this loop can be commented out.
+            if (IA[sArr[i] & 0xff] < 0)
+                sepCnt++;
+
+        // Check so that legal chars (including '=') are evenly divideable by 
4 as specified in RFC 2045.
+        if ((sLen - sepCnt) % 4 != 0)
+            return null;
+
+        int pad = 0;
+        for (int i = sLen; i > 1 && IA[sArr[--i] & 0xff] <= 0; )
+            if (sArr[i] == '=')
+                pad++;
+
+        int len = ((sLen - sepCnt) * 6 >> 3) - pad;
+
+        byte[] dArr = new byte[len];       // Preallocate byte[] of exact 
length
+
+        for (int s = 0, d = 0; d < len; ) {
+            // Assemble three bytes into an int from four "valid" characters.
+            int i = 0;
+            for (int j = 0; j < 4; j++) {   // j only increased if a valid 
char was found.
+                int c = IA[sArr[s++] & 0xff];
+                if (c >= 0)
+                    i |= c << (18 - j * 6);
+                else
+                    j--;
+            }
+
+            // Add the bytes
+            dArr[d++] = (byte) (i >> 16);
+            if (d < len) {
+                dArr[d++] = (byte) (i >> 8);
+                if (d < len)
+                    dArr[d++] = (byte) i;
+            }
+        }
+
+        return dArr;
+    }
+
+
+    /**
+     * Decodes a BASE64 encoded byte array that is known to be resonably well 
formatted. The method is about twice as
+     * fast as {@link #decode(byte[])}. The preconditions are:<br>
+     * + The array must have a line length of 76 chars OR no line separators 
at all (one line).<br>
+     * + Line separator must be "\r\n", as specified in RFC 2045
+     * + The array must not contain illegal characters within the encoded 
string<br>
+     * + The array CAN have illegal characters at the beginning and end, those 
will be dealt with appropriately.<br>
+     *
+     * @param sArr The source array. Length 0 will return an empty array. 
<code>null</code> will throw an exception.
+     * @return The decoded array of bytes. May be of length 0.
+     */
+    public final static byte[] decodeFast(byte[] sArr) {
+        // Check special case
+        int sLen = sArr.length;
+        if (sLen == 0)
+            return new byte[0];
+
+        int sIx = 0, eIx = sLen - 1;    // Start and end index after trimming.
+
+        // Trim illegal chars from start
+        while (sIx < eIx && IA[sArr[sIx] & 0xff] < 0)
+            sIx++;
+
+        // Trim illegal chars from end
+        while (eIx > 0 && IA[sArr[eIx] & 0xff] < 0)
+            eIx--;
+
+        // get the padding count (=) (0, 1 or 2)
+        int pad = sArr[eIx] == '=' ? (sArr[eIx - 1] == '=' ? 2 : 1) : 0;  // 
Count '=' at end.
+        int cCnt = eIx - sIx + 1;   // Content count including possible 
separators
+        int sepCnt = sLen > 76 ? (sArr[76] == '\r' ? cCnt / 78 : 0) << 1 : 0;
+
+        int len = ((cCnt - sepCnt) * 6 >> 3) - pad; // The number of decoded 
bytes
+        byte[] dArr = new byte[len];       // Preallocate byte[] of exact 
length
+
+        // Decode all but the last 0 - 2 bytes.
+        int d = 0;
+        for (int cc = 0, eLen = (len / 3) * 3; d < eLen; ) {
+            // Assemble three bytes into an int from four "valid" characters.
+            int i = IA[sArr[sIx++]] << 18 | IA[sArr[sIx++]] << 12 | 
IA[sArr[sIx++]] << 6 | IA[sArr[sIx++]];
+
+            // Add the bytes
+            dArr[d++] = (byte) (i >> 16);
+            dArr[d++] = (byte) (i >> 8);
+            dArr[d++] = (byte) i;
+
+            // If line separator, jump over it.
+            if (sepCnt > 0 && ++cc == 19) {
+                sIx += 2;
+                cc = 0;
+            }
+        }
+
+        if (d < len) {
+            // Decode last 1-3 bytes (incl '=') into 1-3 bytes
+            int i = 0;
+            for (int j = 0; sIx <= eIx - pad; j++)
+                i |= IA[sArr[sIx++]] << (18 - j * 6);
+
+            for (int r = 16; d < len; r -= 8)
+                dArr[d++] = (byte) (i >> r);
+        }
+
+        return dArr;
+    }
+
+    // 
****************************************************************************************
+    // * String version
+    // 
****************************************************************************************
+
+    /**
+     * Encodes a raw byte array into a BASE64 <code>String</code> 
representation i accordance with RFC 2045.
+     *
+     * @param sArr    The bytes to convert. If <code>null</code> or length 0 
an empty array will be returned.
+     * @param lineSep Optional "\r\n" after 76 characters, unless end of 
file.<br>
+     *                No line separator will be in breach of RFC 2045 which 
specifies max 76 per line but will be a
+     *                little faster.
+     * @return A BASE64 encoded array. Never <code>null</code>.
+     */
+    public final static String encodeToString(byte[] sArr, boolean lineSep) {
+        // Reuse char[] since we can't create a String incrementally anyway 
and StringBuffer/Builder would be slower.
+        return new String(encodeToChar(sArr, lineSep));
+    }
+
+    /**
+     * Decodes a BASE64 encoded <code>String</code>. All illegal characters 
will be ignored and can handle both strings with
+     * and without line separators.<br>
+     * <b>Note!</b> It can be up to about 2x the speed to call 
<code>decode(str.toCharArray())</code> instead. That
+     * will create a temporary array though. This version will use 
<code>str.charAt(i)</code> to iterate the string.
+     *
+     * @param str The source string. <code>null</code> or length 0 will return 
an empty array.
+     * @return The decoded array of bytes. May be of length 0. Will be 
<code>null</code> if the legal characters
+     *         (including '=') isn't divideable by 4.  (I.e. definitely 
corrupted).
+     */
+    public final static byte[] decode(String str) {
+        // Check special case
+        int sLen = str != null ? str.length() : 0;
+        if (sLen == 0)
+            return new byte[0];
+
+        // Count illegal characters (including '\r', '\n') to know what size 
the returned array will be,
+        // so we don't have to reallocate & copy it later.
+        int sepCnt = 0; // Number of separator characters. (Actually illegal 
characters, but that's a bonus...)
+        for (int i = 0; i < sLen; i++)  // If input is "pure" (I.e. no line 
separators or illegal chars) base64 this loop can be commented out.
+            if (IA[str.charAt(i)] < 0)
+                sepCnt++;
+
+        // Check so that legal chars (including '=') are evenly divideable by 
4 as specified in RFC 2045.
+        if ((sLen - sepCnt) % 4 != 0)
+            return null;
+
+        // Count '=' at end
+        int pad = 0;
+        for (int i = sLen; i > 1 && IA[str.charAt(--i)] <= 0; )
+            if (str.charAt(i) == '=')
+                pad++;
+
+        int len = ((sLen - sepCnt) * 6 >> 3) - pad;
+
+        byte[] dArr = new byte[len];       // Preallocate byte[] of exact 
length
+
+        for (int s = 0, d = 0; d < len; ) {
+            // Assemble three bytes into an int from four "valid" characters.
+            int i = 0;
+            for (int j = 0; j < 4; j++) {   // j only increased if a valid 
char was found.
+                int c = IA[str.charAt(s++)];
+                if (c >= 0)
+                    i |= c << (18 - j * 6);
+                else
+                    j--;
+            }
+            // Add the bytes
+            dArr[d++] = (byte) (i >> 16);
+            if (d < len) {
+                dArr[d++] = (byte) (i >> 8);
+                if (d < len)
+                    dArr[d++] = (byte) i;
+            }
+        }
+        return dArr;
+    }
+
+    /**
+     * Decodes a BASE64 encoded string that is known to be resonably well 
formatted. The method is about twice as
+     * fast as {@link #decode(String)}. The preconditions are:<br>
+     * + The array must have a line length of 76 chars OR no line separators 
at all (one line).<br>
+     * + Line separator must be "\r\n", as specified in RFC 2045
+     * + The array must not contain illegal characters within the encoded 
string<br>
+     * + The array CAN have illegal characters at the beginning and end, those 
will be dealt with appropriately.<br>
+     *
+     * @param s The source string. Length 0 will return an empty array. 
<code>null</code> will throw an exception.
+     * @return The decoded array of bytes. May be of length 0.
+     */
+    public final static byte[] decodeFast(String s) {
+        // Check special case
+        int sLen = s.length();
+        if (sLen == 0)
+            return new byte[0];
+
+        int sIx = 0, eIx = sLen - 1;    // Start and end index after trimming.
+
+        // Trim illegal chars from start
+        while (sIx < eIx && IA[s.charAt(sIx) & 0xff] < 0)
+            sIx++;
+
+        // Trim illegal chars from end
+        while (eIx > 0 && IA[s.charAt(eIx) & 0xff] < 0)
+            eIx--;
+
+        // get the padding count (=) (0, 1 or 2)
+        int pad = s.charAt(eIx) == '=' ? (s.charAt(eIx - 1) == '=' ? 2 : 1) : 
0;  // Count '=' at end.
+        int cCnt = eIx - sIx + 1;   // Content count including possible 
separators
+        int sepCnt = sLen > 76 ? (s.charAt(76) == '\r' ? cCnt / 78 : 0) << 1 : 
0;
+
+        int len = ((cCnt - sepCnt) * 6 >> 3) - pad; // The number of decoded 
bytes
+        byte[] dArr = new byte[len];       // Preallocate byte[] of exact 
length
+
+        // Decode all but the last 0 - 2 bytes.
+        int d = 0;
+        for (int cc = 0, eLen = (len / 3) * 3; d < eLen; ) {
+            // Assemble three bytes into an int from four "valid" characters.
+            int i = IA[s.charAt(sIx++)] << 18 | IA[s.charAt(sIx++)] << 12 | 
IA[s.charAt(sIx++)] << 6 | IA[s.charAt(sIx++)];
+
+            // Add the bytes
+            dArr[d++] = (byte) (i >> 16);
+            dArr[d++] = (byte) (i >> 8);
+            dArr[d++] = (byte) i;
+
+            // If line separator, jump over it.
+            if (sepCnt > 0 && ++cc == 19) {
+                sIx += 2;
+                cc = 0;
+            }
+        }
+
+        if (d < len) {
+            // Decode last 1-3 bytes (incl '=') into 1-3 bytes
+            int i = 0;
+            for (int j = 0; sIx <= eIx - pad; j++)
+                i |= IA[s.charAt(sIx++)] << (18 - j * 6);
+
+            for (int r = 16; d < len; r -= 8)
+                dArr[d++] = (byte) (i >> r);
+        }
+
+        return dArr;
+    }
+}
\ No newline at end of file

svn commit: r1447443 - in /commons/proper/codec/trunk/src: changes/ main/java/org/apache/commons/codec/binary/

Reply via email to