Added: 
struts/sandbox/trunk/struts2-jsp-plugin/src/main/java/org/apache/struts2/jasper/xmlparser/UCSReader.java
URL: 
http://svn.apache.org/viewvc/struts/sandbox/trunk/struts2-jsp-plugin/src/main/java/org/apache/struts2/jasper/xmlparser/UCSReader.java?rev=800614&view=auto
==============================================================================
--- 
struts/sandbox/trunk/struts2-jsp-plugin/src/main/java/org/apache/struts2/jasper/xmlparser/UCSReader.java
 (added)
+++ 
struts/sandbox/trunk/struts2-jsp-plugin/src/main/java/org/apache/struts2/jasper/xmlparser/UCSReader.java
 Mon Aug  3 23:16:50 2009
@@ -0,0 +1,301 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * 
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.struts2.jasper.xmlparser;
+
+import java.io.InputStream;
+import java.io.IOException;
+import java.io.Reader;
+
+/** 
+ * Reader for UCS-2 and UCS-4 encodings.
+ * (i.e., encodings from ISO-10646-UCS-(2|4)).
+ *
+ * @author Neil Graham, IBM
+ *
+ * @version $Id: UCSReader.java 466606 2006-10-21 23:07:12Z markt $
+ */
+public class UCSReader extends Reader {
+
+    private org.apache.commons.logging.Log log=
+        org.apache.commons.logging.LogFactory.getLog( UCSReader.class );
+    
+    //
+    // Constants
+    //
+
+    /** Default byte buffer size (8192, larger than that of ASCIIReader
+     * since it's reasonable to surmise that the average UCS-4-encoded
+     * file should be 4 times as large as the average ASCII-encoded file). 
+     */
+    public static final int DEFAULT_BUFFER_SIZE = 8192;
+
+    public static final short UCS2LE = 1;
+    public static final short UCS2BE = 2;
+    public static final short UCS4LE = 4;
+    public static final short UCS4BE = 8;
+
+    //
+    // Data
+    //
+
+    /** Input stream. */
+    protected InputStream fInputStream;
+
+    /** Byte buffer. */
+    protected byte[] fBuffer;
+
+    // what kind of data we're dealing with
+    protected short fEncoding;
+
+    //
+    // Constructors
+    //
+
+    /** 
+     * Constructs an ASCII reader from the specified input stream 
+     * using the default buffer size.  The Endian-ness and whether this is
+     * UCS-2 or UCS-4 needs also to be known in advance.
+     *
+     * @param inputStream The input stream.
+     * @param encoding One of UCS2LE, UCS2BE, UCS4LE or UCS4BE.
+     */
+    public UCSReader(InputStream inputStream, short encoding) {
+        this(inputStream, DEFAULT_BUFFER_SIZE, encoding);
+    } // <init>(InputStream, short)
+
+    /** 
+     * Constructs an ASCII reader from the specified input stream 
+     * and buffer size.  The Endian-ness and whether this is
+     * UCS-2 or UCS-4 needs also to be known in advance.
+     *
+     * @param inputStream The input stream.
+     * @param size        The initial buffer size.
+     * @param encoding One of UCS2LE, UCS2BE, UCS4LE or UCS4BE.
+     */
+    public UCSReader(InputStream inputStream, int size, short encoding) {
+        fInputStream = inputStream;
+        fBuffer = new byte[size];
+        fEncoding = encoding;
+    } // <init>(InputStream,int,short)
+
+    //
+    // Reader methods
+    //
+
+    /**
+     * Read a single character.  This method will block until a character is
+     * available, an I/O error occurs, or the end of the stream is reached.
+     *
+     * <p> Subclasses that intend to support efficient single-character input
+     * should override this method.
+     *
+     * @return     The character read, as an integer in the range 0 to 127
+     *             (<tt>0x00-0x7f</tt>), or -1 if the end of the stream has
+     *             been reached
+     *
+     * @exception  IOException  If an I/O error occurs
+     */
+    public int read() throws IOException { 
+        int b0 = fInputStream.read() & 0xff;
+        if (b0 == 0xff)
+            return -1;
+        int b1 = fInputStream.read() & 0xff;
+        if (b1 == 0xff)
+            return -1;
+        if(fEncoding >=4) {
+            int b2 = fInputStream.read() & 0xff;
+            if (b2 == 0xff)
+                return -1;
+            int b3 = fInputStream.read() & 0xff;
+            if (b3 == 0xff)
+                return -1;
+            if (log.isDebugEnabled())
+                log.debug("b0 is " + (b0 & 0xff) + " b1 " + (b1 & 0xff) + " b2 
" + (b2 & 0xff) + " b3 " + (b3 & 0xff));
+            if (fEncoding == UCS4BE)
+                return (b0<<24)+(b1<<16)+(b2<<8)+b3;
+            else
+                return (b3<<24)+(b2<<16)+(b1<<8)+b0;
+        } else { // UCS-2
+            if (fEncoding == UCS2BE)
+                return (b0<<8)+b1;
+            else
+                return (b1<<8)+b0;
+        }
+    } // read():int
+
+    /**
+     * Read characters into a portion of an array.  This method will block
+     * until some input is available, an I/O error occurs, or the end of the
+     * stream is reached.
+     *
+     * @param      ch     Destination buffer
+     * @param      offset Offset at which to start storing characters
+     * @param      length Maximum number of characters to read
+     *
+     * @return     The number of characters read, or -1 if the end of the
+     *             stream has been reached
+     *
+     * @exception  IOException  If an I/O error occurs
+     */
+    public int read(char ch[], int offset, int length) throws IOException {
+        int byteLength = length << ((fEncoding >= 4)?2:1);
+        if (byteLength > fBuffer.length) {
+            byteLength = fBuffer.length;
+        }
+        int count = fInputStream.read(fBuffer, 0, byteLength);
+        if(count == -1) return -1;
+        // try and make count be a multiple of the number of bytes we're 
looking for
+        if(fEncoding >= 4) { // BigEndian
+            // this looks ugly, but it avoids an if at any rate...
+            int numToRead = (4 - (count & 3) & 3);
+            for(int i=0; i<numToRead; i++) {
+                int charRead = fInputStream.read();
+                if(charRead == -1) { // end of input; something likely went 
wrong!A  Pad buffer with nulls.
+                    for (int j = i;j<numToRead; j++)
+                        fBuffer[count+j] = 0;
+                    break;
+                } else {
+                    fBuffer[count+i] = (byte)charRead; 
+                }
+            }
+            count += numToRead;
+        } else {
+            int numToRead = count & 1;
+            if(numToRead != 0) {
+                count++;
+                int charRead = fInputStream.read();
+                if(charRead == -1) { // end of input; something likely went 
wrong!A  Pad buffer with nulls.
+                    fBuffer[count] = 0;
+                } else {
+                    fBuffer[count] = (byte)charRead;
+                }
+            }
+        }
+
+        // now count is a multiple of the right number of bytes
+        int numChars = count >> ((fEncoding >= 4)?2:1);
+        int curPos = 0;
+        for (int i = 0; i < numChars; i++) {
+            int b0 = fBuffer[curPos++] & 0xff;
+            int b1 = fBuffer[curPos++] & 0xff;
+            if(fEncoding >=4) {
+                int b2 = fBuffer[curPos++] & 0xff;
+                int b3 = fBuffer[curPos++] & 0xff;
+                if (fEncoding == UCS4BE)
+                    ch[offset+i] = (char)((b0<<24)+(b1<<16)+(b2<<8)+b3);
+                else
+                    ch[offset+i] = (char)((b3<<24)+(b2<<16)+(b1<<8)+b0);
+            } else { // UCS-2
+                if (fEncoding == UCS2BE)
+                    ch[offset+i] = (char)((b0<<8)+b1);
+                else
+                    ch[offset+i] = (char)((b1<<8)+b0);
+            }
+        }
+        return numChars;
+    } // read(char[],int,int)
+
+    /**
+     * Skip characters.  This method will block until some characters are
+     * available, an I/O error occurs, or the end of the stream is reached.
+     *
+     * @param  n  The number of characters to skip
+     *
+     * @return    The number of characters actually skipped
+     *
+     * @exception  IOException  If an I/O error occurs
+     */
+    public long skip(long n) throws IOException {
+        // charWidth will represent the number of bits to move
+        // n leftward to get num of bytes to skip, and then move the result 
rightward
+        // to get num of chars effectively skipped.
+        // The trick with &'ing, as with elsewhere in this dcode, is
+        // intended to avoid an expensive use of / that might not be optimized
+        // away.
+        int charWidth = (fEncoding >=4)?2:1;
+        long bytesSkipped = fInputStream.skip(n<<charWidth);
+        if((bytesSkipped & (charWidth | 1)) == 0) return bytesSkipped >> 
charWidth;
+        return (bytesSkipped >> charWidth) + 1;
+    } // skip(long):long
+
+    /**
+     * Tell whether this stream is ready to be read.
+     *
+     * @return True if the next read() is guaranteed not to block for input,
+     * false otherwise.  Note that returning false does not guarantee that the
+     * next read will block.
+     *
+     * @exception  IOException  If an I/O error occurs
+     */
+    public boolean ready() throws IOException {
+       return false;
+    } // ready()
+
+    /**
+     * Tell whether this stream supports the mark() operation.
+     */
+    public boolean markSupported() {
+       return fInputStream.markSupported();
+    } // markSupported()
+
+    /**
+     * Mark the present position in the stream.  Subsequent calls to reset()
+     * will attempt to reposition the stream to this point.  Not all
+     * character-input streams support the mark() operation.
+     *
+     * @param  readAheadLimit  Limit on the number of characters that may be
+     *                         read while still preserving the mark.  After
+     *                         reading this many characters, attempting to
+     *                         reset the stream may fail.
+     *
+     * @exception  IOException  If the stream does not support mark(),
+     *                          or if some other I/O error occurs
+     */
+    public void mark(int readAheadLimit) throws IOException {
+       fInputStream.mark(readAheadLimit);
+    } // mark(int)
+
+    /**
+     * Reset the stream.  If the stream has been marked, then attempt to
+     * reposition it at the mark.  If the stream has not been marked, then
+     * attempt to reset it in some way appropriate to the particular stream,
+     * for example by repositioning it to its starting point.  Not all
+     * character-input streams support the reset() operation, and some support
+     * reset() without supporting mark().
+     *
+     * @exception  IOException  If the stream has not been marked,
+     *                          or if the mark has been invalidated,
+     *                          or if the stream does not support reset(),
+     *                          or if some other I/O error occurs
+     */
+    public void reset() throws IOException {
+        fInputStream.reset();
+    } // reset()
+
+    /**
+     * Close the stream.  Once a stream has been closed, further read(),
+     * ready(), mark(), or reset() invocations will throw an IOException.
+     * Closing a previously-closed stream, however, has no effect.
+     *
+     * @exception  IOException  If an I/O error occurs
+     */
+     public void close() throws IOException {
+         fInputStream.close();
+     } // close()
+
+} // class UCSReader

Added: 
struts/sandbox/trunk/struts2-jsp-plugin/src/main/java/org/apache/struts2/jasper/xmlparser/UTF8Reader.java
URL: 
http://svn.apache.org/viewvc/struts/sandbox/trunk/struts2-jsp-plugin/src/main/java/org/apache/struts2/jasper/xmlparser/UTF8Reader.java?rev=800614&view=auto
==============================================================================
--- 
struts/sandbox/trunk/struts2-jsp-plugin/src/main/java/org/apache/struts2/jasper/xmlparser/UTF8Reader.java
 (added)
+++ 
struts/sandbox/trunk/struts2-jsp-plugin/src/main/java/org/apache/struts2/jasper/xmlparser/UTF8Reader.java
 Mon Aug  3 23:16:50 2009
@@ -0,0 +1,635 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * 
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.struts2.jasper.xmlparser;
+
+import java.io.InputStream;
+import java.io.IOException;
+import java.io.Reader;
+import java.io.UTFDataFormatException;
+import org.apache.struts2.jasper.compiler.Localizer;
+
+/**
+ * @author Andy Clark, IBM
+ *
+ * @version $Id: UTF8Reader.java 466606 2006-10-21 23:07:12Z markt $
+ */
+public class UTF8Reader
+    extends Reader {
+
+    private org.apache.commons.logging.Log log=
+        org.apache.commons.logging.LogFactory.getLog( UTF8Reader.class );
+    
+    //
+    // Constants
+    //
+
+    /** Default byte buffer size (2048). */
+    public static final int DEFAULT_BUFFER_SIZE = 2048;
+
+    // debugging
+
+    /** Debug read. */
+    private static final boolean DEBUG_READ = false;
+
+    //
+    // Data
+    //
+
+    /** Input stream. */
+    protected InputStream fInputStream;
+
+    /** Byte buffer. */
+    protected byte[] fBuffer;
+
+    /** Offset into buffer. */
+    protected int fOffset;
+
+    /** Surrogate character. */
+    private int fSurrogate = -1;
+
+    //
+    // Constructors
+    //
+
+    /** 
+     * Constructs a UTF-8 reader from the specified input stream, 
+     * buffer size and MessageFormatter.
+     *
+     * @param inputStream The input stream.
+     * @param size        The initial buffer size.
+     */
+    public UTF8Reader(InputStream inputStream, int size) {
+        fInputStream = inputStream;
+        fBuffer = new byte[size];
+    }
+
+    //
+    // Reader methods
+    //
+
+    /**
+     * Read a single character.  This method will block until a character is
+     * available, an I/O error occurs, or the end of the stream is reached.
+     *
+     * <p> Subclasses that intend to support efficient single-character input
+     * should override this method.
+     *
+     * @return     The character read, as an integer in the range 0 to 16383
+     *             (<tt>0x00-0xffff</tt>), or -1 if the end of the stream has
+     *             been reached
+     *
+     * @exception  IOException  If an I/O error occurs
+     */
+    public int read() throws IOException {
+
+        // decode character
+        int c = fSurrogate;
+        if (fSurrogate == -1) {
+            // NOTE: We use the index into the buffer if there are remaining
+            //       bytes from the last block read. -Ac
+            int index = 0;
+
+            // get first byte
+            int b0 = index == fOffset 
+                   ? fInputStream.read() : fBuffer[index++] & 0x00FF;
+            if (b0 == -1) {
+                return -1;
+            }
+
+            // UTF-8:   [0xxx xxxx]
+            // Unicode: [0000 0000] [0xxx xxxx]
+            if (b0 < 0x80) {
+                c = (char)b0;
+            }
+
+            // UTF-8:   [110y yyyy] [10xx xxxx]
+            // Unicode: [0000 0yyy] [yyxx xxxx]
+            else if ((b0 & 0xE0) == 0xC0) {
+                int b1 = index == fOffset 
+                       ? fInputStream.read() : fBuffer[index++] & 0x00FF;
+                if (b1 == -1) {
+                    expectedByte(2, 2);
+                }
+                if ((b1 & 0xC0) != 0x80) {
+                    invalidByte(2, 2, b1);
+                }
+                c = ((b0 << 6) & 0x07C0) | (b1 & 0x003F);
+            }
+
+            // UTF-8:   [1110 zzzz] [10yy yyyy] [10xx xxxx]
+            // Unicode: [zzzz yyyy] [yyxx xxxx]
+            else if ((b0 & 0xF0) == 0xE0) {
+                int b1 = index == fOffset
+                       ? fInputStream.read() : fBuffer[index++] & 0x00FF;
+                if (b1 == -1) {
+                    expectedByte(2, 3);
+                }
+                if ((b1 & 0xC0) != 0x80) {
+                    invalidByte(2, 3, b1);
+                }
+                int b2 = index == fOffset 
+                       ? fInputStream.read() : fBuffer[index++] & 0x00FF;
+                if (b2 == -1) {
+                    expectedByte(3, 3);
+                }
+                if ((b2 & 0xC0) != 0x80) {
+                    invalidByte(3, 3, b2);
+                }
+                c = ((b0 << 12) & 0xF000) | ((b1 << 6) & 0x0FC0) |
+                    (b2 & 0x003F);
+            }
+
+            // UTF-8:   [1111 0uuu] [10uu zzzz] [10yy yyyy] [10xx xxxx]*
+            // Unicode: [1101 10ww] [wwzz zzyy] (high surrogate)
+            //          [1101 11yy] [yyxx xxxx] (low surrogate)
+            //          * uuuuu = wwww + 1
+            else if ((b0 & 0xF8) == 0xF0) {
+                int b1 = index == fOffset 
+                       ? fInputStream.read() : fBuffer[index++] & 0x00FF;
+                if (b1 == -1) {
+                    expectedByte(2, 4);
+                }
+                if ((b1 & 0xC0) != 0x80) {
+                    invalidByte(2, 3, b1);
+                }
+                int b2 = index == fOffset 
+                       ? fInputStream.read() : fBuffer[index++] & 0x00FF;
+                if (b2 == -1) {
+                    expectedByte(3, 4);
+                }
+                if ((b2 & 0xC0) != 0x80) {
+                    invalidByte(3, 3, b2);
+                }
+                int b3 = index == fOffset 
+                       ? fInputStream.read() : fBuffer[index++] & 0x00FF;
+                if (b3 == -1) {
+                    expectedByte(4, 4);
+                }
+                if ((b3 & 0xC0) != 0x80) {
+                    invalidByte(4, 4, b3);
+                }
+                int uuuuu = ((b0 << 2) & 0x001C) | ((b1 >> 4) & 0x0003);
+                if (uuuuu > 0x10) {
+                    invalidSurrogate(uuuuu);
+                }
+                int wwww = uuuuu - 1;
+                int hs = 0xD800 | 
+                         ((wwww << 6) & 0x03C0) | ((b1 << 2) & 0x003C) | 
+                         ((b2 >> 4) & 0x0003);
+                int ls = 0xDC00 | ((b2 << 6) & 0x03C0) | (b3 & 0x003F);
+                c = hs;
+                fSurrogate = ls;
+            }
+
+            // error
+            else {
+                invalidByte(1, 1, b0);
+            }
+        }
+
+        // use surrogate
+        else {
+            fSurrogate = -1;
+        }
+
+        // return character
+        if (DEBUG_READ) {
+            if (log.isDebugEnabled())
+                log.debug("read(): 0x"+Integer.toHexString(c));
+        }
+        return c;
+
+    } // read():int
+
+    /**
+     * Read characters into a portion of an array.  This method will block
+     * until some input is available, an I/O error occurs, or the end of the
+     * stream is reached.
+     *
+     * @param      ch     Destination buffer
+     * @param      offset Offset at which to start storing characters
+     * @param      length Maximum number of characters to read
+     *
+     * @return     The number of characters read, or -1 if the end of the
+     *             stream has been reached
+     *
+     * @exception  IOException  If an I/O error occurs
+     */
+    public int read(char ch[], int offset, int length) throws IOException {
+
+        // handle surrogate
+        int out = offset;
+        if (fSurrogate != -1) {
+            ch[offset + 1] = (char)fSurrogate;
+            fSurrogate = -1;
+            length--;
+            out++;
+        }
+
+        // read bytes
+        int count = 0;
+        if (fOffset == 0) {
+            // adjust length to read
+            if (length > fBuffer.length) {
+                length = fBuffer.length;
+            }
+
+            // perform read operation
+            count = fInputStream.read(fBuffer, 0, length);
+            if (count == -1) {
+                return -1;
+            }
+            count += out - offset;
+        }
+
+        // skip read; last character was in error
+        // NOTE: Having an offset value other than zero means that there was
+        //       an error in the last character read. In this case, we have
+        //       skipped the read so we don't consume any bytes past the 
+        //       error. By signalling the error on the next block read we
+        //       allow the method to return the most valid characters that
+        //       it can on the previous block read. -Ac
+        else {
+            count = fOffset;
+            fOffset = 0;
+        }
+
+        // convert bytes to characters
+        final int total = count;
+        for (int in = 0; in < total; in++) {
+            int b0 = fBuffer[in] & 0x00FF;
+
+            // UTF-8:   [0xxx xxxx]
+            // Unicode: [0000 0000] [0xxx xxxx]
+            if (b0 < 0x80) {
+                ch[out++] = (char)b0;
+                continue;
+            }
+
+            // UTF-8:   [110y yyyy] [10xx xxxx]
+            // Unicode: [0000 0yyy] [yyxx xxxx]
+            if ((b0 & 0xE0) == 0xC0) {
+                int b1 = -1;
+                if (++in < total) { 
+                    b1 = fBuffer[in] & 0x00FF; 
+                }
+                else {
+                    b1 = fInputStream.read();
+                    if (b1 == -1) {
+                        if (out > offset) {
+                            fBuffer[0] = (byte)b0;
+                            fOffset = 1;
+                            return out - offset;
+                        }
+                        expectedByte(2, 2);
+                    }
+                    count++;
+                }
+                if ((b1 & 0xC0) != 0x80) {
+                    if (out > offset) {
+                        fBuffer[0] = (byte)b0;
+                        fBuffer[1] = (byte)b1;
+                        fOffset = 2;
+                        return out - offset;
+                    }
+                    invalidByte(2, 2, b1);
+                }
+                int c = ((b0 << 6) & 0x07C0) | (b1 & 0x003F);
+                ch[out++] = (char)c;
+                count -= 1;
+                continue;
+            }
+
+            // UTF-8:   [1110 zzzz] [10yy yyyy] [10xx xxxx]
+            // Unicode: [zzzz yyyy] [yyxx xxxx]
+            if ((b0 & 0xF0) == 0xE0) {
+                int b1 = -1;
+                if (++in < total) { 
+                    b1 = fBuffer[in] & 0x00FF; 
+                }
+                else {
+                    b1 = fInputStream.read();
+                    if (b1 == -1) {
+                        if (out > offset) {
+                            fBuffer[0] = (byte)b0;
+                            fOffset = 1;
+                            return out - offset;
+                        }
+                        expectedByte(2, 3);
+                    }
+                    count++;
+                }
+                if ((b1 & 0xC0) != 0x80) {
+                    if (out > offset) {
+                        fBuffer[0] = (byte)b0;
+                        fBuffer[1] = (byte)b1;
+                        fOffset = 2;
+                        return out - offset;
+                    }
+                    invalidByte(2, 3, b1);
+                }
+                int b2 = -1;
+                if (++in < total) { 
+                    b2 = fBuffer[in] & 0x00FF; 
+                }
+                else {
+                    b2 = fInputStream.read();
+                    if (b2 == -1) {
+                        if (out > offset) {
+                            fBuffer[0] = (byte)b0;
+                            fBuffer[1] = (byte)b1;
+                            fOffset = 2;
+                            return out - offset;
+                        }
+                        expectedByte(3, 3);
+                    }
+                    count++;
+                }
+                if ((b2 & 0xC0) != 0x80) {
+                    if (out > offset) {
+                        fBuffer[0] = (byte)b0;
+                        fBuffer[1] = (byte)b1;
+                        fBuffer[2] = (byte)b2;
+                        fOffset = 3;
+                        return out - offset;
+                    }
+                    invalidByte(3, 3, b2);
+                }
+                int c = ((b0 << 12) & 0xF000) | ((b1 << 6) & 0x0FC0) |
+                        (b2 & 0x003F);
+                ch[out++] = (char)c;
+                count -= 2;
+                continue;
+            }
+
+            // UTF-8:   [1111 0uuu] [10uu zzzz] [10yy yyyy] [10xx xxxx]*
+            // Unicode: [1101 10ww] [wwzz zzyy] (high surrogate)
+            //          [1101 11yy] [yyxx xxxx] (low surrogate)
+            //          * uuuuu = wwww + 1
+            if ((b0 & 0xF8) == 0xF0) {
+                int b1 = -1;
+                if (++in < total) { 
+                    b1 = fBuffer[in] & 0x00FF; 
+                }
+                else {
+                    b1 = fInputStream.read();
+                    if (b1 == -1) {
+                        if (out > offset) {
+                            fBuffer[0] = (byte)b0;
+                            fOffset = 1;
+                            return out - offset;
+                        }
+                        expectedByte(2, 4);
+                    }
+                    count++;
+                }
+                if ((b1 & 0xC0) != 0x80) {
+                    if (out > offset) {
+                        fBuffer[0] = (byte)b0;
+                        fBuffer[1] = (byte)b1;
+                        fOffset = 2;
+                        return out - offset;
+                    }
+                    invalidByte(2, 4, b1);
+                }
+                int b2 = -1;
+                if (++in < total) { 
+                    b2 = fBuffer[in] & 0x00FF; 
+                }
+                else {
+                    b2 = fInputStream.read();
+                    if (b2 == -1) {
+                        if (out > offset) {
+                            fBuffer[0] = (byte)b0;
+                            fBuffer[1] = (byte)b1;
+                            fOffset = 2;
+                            return out - offset;
+                        }
+                        expectedByte(3, 4);
+                    }
+                    count++;
+                }
+                if ((b2 & 0xC0) != 0x80) {
+                    if (out > offset) {
+                        fBuffer[0] = (byte)b0;
+                        fBuffer[1] = (byte)b1;
+                        fBuffer[2] = (byte)b2;
+                        fOffset = 3;
+                        return out - offset;
+                    }
+                    invalidByte(3, 4, b2);
+                }
+                int b3 = -1;
+                if (++in < total) { 
+                    b3 = fBuffer[in] & 0x00FF; 
+                }
+                else {
+                    b3 = fInputStream.read();
+                    if (b3 == -1) {
+                        if (out > offset) {
+                            fBuffer[0] = (byte)b0;
+                            fBuffer[1] = (byte)b1;
+                            fBuffer[2] = (byte)b2;
+                            fOffset = 3;
+                            return out - offset;
+                        }
+                        expectedByte(4, 4);
+                    }
+                    count++;
+                }
+                if ((b3 & 0xC0) != 0x80) {
+                    if (out > offset) {
+                        fBuffer[0] = (byte)b0;
+                        fBuffer[1] = (byte)b1;
+                        fBuffer[2] = (byte)b2;
+                        fBuffer[3] = (byte)b3;
+                        fOffset = 4;
+                        return out - offset;
+                    }
+                    invalidByte(4, 4, b2);
+                }
+
+                // decode bytes into surrogate characters
+                int uuuuu = ((b0 << 2) & 0x001C) | ((b1 >> 4) & 0x0003);
+                if (uuuuu > 0x10) {
+                    invalidSurrogate(uuuuu);
+                }
+                int wwww = uuuuu - 1;
+                int zzzz = b1 & 0x000F;
+                int yyyyyy = b2 & 0x003F;
+                int xxxxxx = b3 & 0x003F;
+                int hs = 0xD800 | ((wwww << 6) & 0x03C0) | (zzzz << 2) | 
(yyyyyy >> 4);
+                int ls = 0xDC00 | ((yyyyyy << 6) & 0x03C0) | xxxxxx;
+
+                // set characters
+                ch[out++] = (char)hs;
+                ch[out++] = (char)ls;
+                count -= 2;
+                continue;
+            }
+
+            // error
+            if (out > offset) {
+                fBuffer[0] = (byte)b0;
+                fOffset = 1;
+                return out - offset;
+            }
+            invalidByte(1, 1, b0);
+        }
+
+        // return number of characters converted
+        if (DEBUG_READ) {
+            if (log.isDebugEnabled())
+                log.debug("read(char[],"+offset+','+length+"): count="+count);
+        }
+        return count;
+
+    } // read(char[],int,int)
+
+    /**
+     * Skip characters.  This method will block until some characters are
+     * available, an I/O error occurs, or the end of the stream is reached.
+     *
+     * @param  n  The number of characters to skip
+     *
+     * @return    The number of characters actually skipped
+     *
+     * @exception  IOException  If an I/O error occurs
+     */
+    public long skip(long n) throws IOException {
+
+        long remaining = n;
+        final char[] ch = new char[fBuffer.length];
+        do {
+            int length = ch.length < remaining ? ch.length : (int)remaining;
+            int count = read(ch, 0, length);
+            if (count > 0) {
+                remaining -= count;
+            }
+            else {
+                break;
+            }
+        } while (remaining > 0);
+
+        long skipped = n - remaining;
+        return skipped;
+
+    } // skip(long):long
+
+    /**
+     * Tell whether this stream is ready to be read.
+     *
+     * @return True if the next read() is guaranteed not to block for input,
+     * false otherwise.  Note that returning false does not guarantee that the
+     * next read will block.
+     *
+     * @exception  IOException  If an I/O error occurs
+     */
+    public boolean ready() throws IOException {
+           return false;
+    } // ready()
+
+    /**
+     * Tell whether this stream supports the mark() operation.
+     */
+    public boolean markSupported() {
+           return false;
+    } // markSupported()
+
+    /**
+     * Mark the present position in the stream.  Subsequent calls to reset()
+     * will attempt to reposition the stream to this point.  Not all
+     * character-input streams support the mark() operation.
+     *
+     * @param  readAheadLimit  Limit on the number of characters that may be
+     *                         read while still preserving the mark.  After
+     *                         reading this many characters, attempting to
+     *                         reset the stream may fail.
+     *
+     * @exception  IOException  If the stream does not support mark(),
+     *                          or if some other I/O error occurs
+     */
+    public void mark(int readAheadLimit) throws IOException {
+       throw new IOException(
+                Localizer.getMessage("jsp.error.xml.operationNotSupported",
+                                    "mark()", "UTF-8"));
+    }
+
+    /**
+     * Reset the stream.  If the stream has been marked, then attempt to
+     * reposition it at the mark.  If the stream has not been marked, then
+     * attempt to reset it in some way appropriate to the particular stream,
+     * for example by repositioning it to its starting point.  Not all
+     * character-input streams support the reset() operation, and some support
+     * reset() without supporting mark().
+     *
+     * @exception  IOException  If the stream has not been marked,
+     *                          or if the mark has been invalidated,
+     *                          or if the stream does not support reset(),
+     *                          or if some other I/O error occurs
+     */
+    public void reset() throws IOException {
+        fOffset = 0;
+        fSurrogate = -1;
+    } // reset()
+
+    /**
+     * Close the stream.  Once a stream has been closed, further read(),
+     * ready(), mark(), or reset() invocations will throw an IOException.
+     * Closing a previously-closed stream, however, has no effect.
+     *
+     * @exception  IOException  If an I/O error occurs
+     */
+    public void close() throws IOException {
+        fInputStream.close();
+    } // close()
+
+    //
+    // Private methods
+    //
+
+    /** Throws an exception for expected byte. */
+    private void expectedByte(int position, int count)
+        throws UTFDataFormatException {
+
+        throw new UTFDataFormatException(
+                Localizer.getMessage("jsp.error.xml.expectedByte",
+                                    Integer.toString(position),
+                                    Integer.toString(count)));
+
+    } // expectedByte(int,int,int)
+
+    /** Throws an exception for invalid byte. */
+    private void invalidByte(int position, int count, int c) 
+        throws UTFDataFormatException {
+
+        throw new UTFDataFormatException(
+                Localizer.getMessage("jsp.error.xml.invalidByte",
+                                    Integer.toString(position),
+                                    Integer.toString(count)));
+    } // invalidByte(int,int,int,int)
+
+    /** Throws an exception for invalid surrogate bits. */
+    private void invalidSurrogate(int uuuuu) throws UTFDataFormatException {
+        
+        throw new UTFDataFormatException(
+                Localizer.getMessage("jsp.error.xml.invalidHighSurrogate",
+                                    Integer.toHexString(uuuuu)));
+    } // invalidSurrogate(int)
+
+} // class UTF8Reader


Reply via email to