Author: markt
Date: Thu Feb 23 22:29:12 2012
New Revision: 1292996
URL: http://svn.apache.org/viewvc?rev=1292996&view=rev
Log:
Hooray for Apache Harmony. Copy Harmony's UTF-8 decoder, add the
additional restriction expected by the WebSocket spec and start passing
all the UTF-8 releated tests.
Added:
tomcat/trunk/java/org/apache/catalina/websocket/Utf8Decoder.java
Modified:
tomcat/trunk/java/org/apache/catalina/websocket/StreamInbound.java
Modified: tomcat/trunk/java/org/apache/catalina/websocket/StreamInbound.java
URL:
http://svn.apache.org/viewvc/tomcat/trunk/java/org/apache/catalina/websocket/StreamInbound.java?rev=1292996&r1=1292995&r2=1292996&view=diff
==============================================================================
--- tomcat/trunk/java/org/apache/catalina/websocket/StreamInbound.java
(original)
+++ tomcat/trunk/java/org/apache/catalina/websocket/StreamInbound.java Thu Feb
23 22:29:12 2012
@@ -20,14 +20,12 @@ import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
-import java.nio.charset.CodingErrorAction;
import java.nio.charset.MalformedInputException;
import java.nio.charset.UnmappableCharacterException;
import org.apache.coyote.http11.upgrade.UpgradeInbound;
import org.apache.coyote.http11.upgrade.UpgradeOutbound;
import org.apache.coyote.http11.upgrade.UpgradeProcessor;
-import org.apache.tomcat.util.buf.B2CConverter;
import org.apache.tomcat.util.net.AbstractEndpoint.Handler.SocketState;
/**
@@ -83,10 +81,8 @@ public abstract class StreamInbound impl
onBinaryData(wsIs);
return SocketState.UPGRADED;
} else if (opCode == Constants.OPCODE_TEXT) {
- InputStreamReader r = new InputStreamReader(wsIs,
- B2CConverter.UTF_8.newDecoder()
- .onMalformedInput(CodingErrorAction.REPORT)
- .onUnmappableCharacter(CodingErrorAction.REPORT));
+ InputStreamReader r =
+ new InputStreamReader(wsIs, new Utf8Decoder());
onTextData(r);
return SocketState.UPGRADED;
}
Added: tomcat/trunk/java/org/apache/catalina/websocket/Utf8Decoder.java
URL:
http://svn.apache.org/viewvc/tomcat/trunk/java/org/apache/catalina/websocket/Utf8Decoder.java?rev=1292996&view=auto
==============================================================================
--- tomcat/trunk/java/org/apache/catalina/websocket/Utf8Decoder.java (added)
+++ tomcat/trunk/java/org/apache/catalina/websocket/Utf8Decoder.java Thu Feb 23
22:29:12 2012
@@ -0,0 +1,207 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.catalina.websocket;
+
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.charset.CharsetDecoder;
+import java.nio.charset.CoderResult;
+
+import org.apache.tomcat.util.buf.B2CConverter;
+
+/**
+ * Decodes bytes to UTF-8. Extracted from Apache Harmony and modified to reject
+ * code points from U+D800 to U+DFFF as per RFC3629. The standard Java decoder
+ * does not reject these.
+ */
+public class Utf8Decoder extends CharsetDecoder {
+
+ // The next table contains information about UTF-8 charset and
+ // correspondence of 1st byte to the length of sequence
+ // For information please visit http://www.ietf.org/rfc/rfc3629.txt
+ //
+ // Please note, o means 0, actually.
+ // -------------------------------------------------------------------
+ // 0 1 2 3 Value
+ // -------------------------------------------------------------------
+ // oxxxxxxx 00000000 00000000 0xxxxxxx
+ // 11oyyyyy 1oxxxxxx 00000000 00000yyy yyxxxxxx
+ // 111ozzzz 1oyyyyyy 1oxxxxxx 00000000 zzzzyyyy yyxxxxxx
+ // 1111ouuu 1ouuzzzz 1oyyyyyy 1oxxxxxx 000uuuuu zzzzyyyy yyxxxxxx
+
+ private static final int remainingBytes[] = {
+ // 1owwwwww
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ // 11oyyyyy
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ // 111ozzzz
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ // 1111ouuu
+ 3, 3, 3, 3, 3, 3, 3, 3,
+ // > 11110111
+ -1, -1, -1, -1, -1, -1, -1, -1 };
+
+ private static final int remainingNumbers[] = {
+ 0, // 0 1 2
3
+ 4224, // (01o00000b << 6)+(1o000000b)
+ 401536, // (011o0000b << 12)+(1o000000b << 6)+(1o000000b)
+ 29892736 // (0111o000b << 18)+(1o000000b << 12)+(1o000000b <<
6)+(1o000000b)
+ };
+
+ private static final int lowerEncodingLimit[] = { -1, 0x80, 0x800, 0x10000
};
+
+ public Utf8Decoder() {
+ super(B2CConverter.UTF_8, 1.0f, 1.0f);
+ }
+
+ @Override
+ protected CoderResult decodeLoop(ByteBuffer in, CharBuffer out) {
+ if (in.hasArray() && out.hasArray()) {
+ return decodeHasArray(in, out);
+ }
+ return decodeNotHasArray(in, out);
+ }
+
+ private CoderResult decodeNotHasArray(ByteBuffer in, CharBuffer out) {
+ int outRemaining = out.remaining();
+ int pos = in.position();
+ int limit = in.limit();
+ try {
+ while (pos < limit) {
+ if (outRemaining == 0) {
+ return CoderResult.OVERFLOW;
+ }
+
+ int jchar = in.get();
+ if (jchar < 0) {
+ jchar = jchar & 0x7F;
+ int tail = remainingBytes[jchar];
+ if (tail == -1) {
+ return CoderResult.malformedForLength(1);
+ }
+ if (limit - pos < 1 + tail) {
+ return CoderResult.UNDERFLOW;
+ }
+
+ int nextByte;
+ for (int i = 0; i < tail; i++) {
+ nextByte = in.get() & 0xFF;
+ if ((nextByte & 0xC0) != 0x80) {
+ return CoderResult
+ .malformedForLength(1 + i);
+ }
+ jchar = (jchar << 6) + nextByte;
+ }
+ jchar -= remainingNumbers[tail];
+ if (jchar < lowerEncodingLimit[tail]) {
+ // Should have been encoded in a fewer octets
+ return CoderResult.malformedForLength(1);
+ }
+ pos += tail;
+ }
+ if (jchar <= 0xffff) {
+ out.put((char) jchar);
+ outRemaining--;
+ } else {
+ if (outRemaining < 2) {
+ return CoderResult.OVERFLOW;
+ }
+ out.put((char) ((jchar >> 0xA) + 0xD7C0));
+ out.put((char) ((jchar & 0x3FF) + 0xDC00));
+ outRemaining -= 2;
+ }
+ pos++;
+ }
+ return CoderResult.UNDERFLOW;
+ } finally {
+ in.position(pos);
+ }
+ }
+
+ private CoderResult decodeHasArray(ByteBuffer in, CharBuffer out) {
+ int outRemaining = out.remaining();
+ int pos = in.position();
+ int limit = in.limit();
+ final byte[] bArr = in.array();
+ final char[] cArr = out.array();
+ final int inIndexLimit = limit + in.arrayOffset();
+
+ int inIndex = pos + in.arrayOffset();
+ int outIndex = out.position() + out.arrayOffset();
+
+ // if someone would change the limit in process,
+ // he would face consequences
+ for (; inIndex < inIndexLimit && outRemaining > 0; inIndex++) {
+ int jchar = bArr[inIndex];
+ if (jchar < 0) {
+ jchar = jchar & 0x7F;
+ int tail = remainingBytes[jchar];
+
+ if (tail == -1) {
+ in.position(inIndex - in.arrayOffset());
+ out.position(outIndex - out.arrayOffset());
+ return CoderResult.malformedForLength(1);
+ }
+ if (inIndexLimit - inIndex < 1 + tail) {
+ break;
+ }
+
+ for (int i = 0; i < tail; i++) {
+ int nextByte = bArr[inIndex + i + 1] & 0xFF;
+ if ((nextByte & 0xC0) != 0x80) {
+ in.position(inIndex - in.arrayOffset());
+ out.position(outIndex - out.arrayOffset());
+ return CoderResult.malformedForLength(1 + i);
+ }
+ jchar = (jchar << 6) + nextByte;
+ }
+ jchar -= remainingNumbers[tail];
+ if (jchar < lowerEncodingLimit[tail]) {
+ // Should have been encoded in fewer octets
+ in.position(inIndex - in.arrayOffset());
+ out.position(outIndex - out.arrayOffset());
+ return CoderResult.malformedForLength(1);
+ }
+ inIndex += tail;
+ }
+ // Note: This is the additional test added
+ if (jchar >= 0xD800 && jchar <=0xDFFF) {
+ return CoderResult.unmappableForLength(3);
+ }
+ if (jchar <= 0xffff) {
+ cArr[outIndex++] = (char) jchar;
+ outRemaining--;
+ } else {
+ if (outRemaining < 2) {
+ return CoderResult.OVERFLOW;
+ }
+ cArr[outIndex++] = (char) ((jchar >> 0xA) + 0xD7C0);
+ cArr[outIndex++] = (char) ((jchar & 0x3FF) + 0xDC00);
+ outRemaining -= 2;
+ }
+ }
+ in.position(inIndex - in.arrayOffset());
+ out.position(outIndex - out.arrayOffset());
+ return (outRemaining == 0 && inIndex < inIndexLimit) ?
+ CoderResult.OVERFLOW :
+ CoderResult.UNDERFLOW;
+ }
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]