Author: rjung Date: Sun Nov 13 16:14:02 2011 New Revision: 1201452 URL: http://svn.apache.org/viewvc?rev=1201452&view=rev Log: Clean up handling multi-byte chars in the connectors. Backport of r1201069+1201087 from trunk resp. r1201076+r1201088 from TC7 plus making the JK connector consistent with the newer ones.
Modified: tomcat/tc6.0.x/trunk/STATUS.txt tomcat/tc6.0.x/trunk/java/org/apache/coyote/ajp/AjpMessage.java tomcat/tc6.0.x/trunk/java/org/apache/coyote/http11/InternalAprOutputBuffer.java tomcat/tc6.0.x/trunk/java/org/apache/coyote/http11/InternalNioOutputBuffer.java tomcat/tc6.0.x/trunk/java/org/apache/coyote/http11/InternalOutputBuffer.java tomcat/tc6.0.x/trunk/java/org/apache/jk/common/JkInputStream.java tomcat/tc6.0.x/trunk/java/org/apache/jk/common/MsgAjp.java tomcat/tc6.0.x/trunk/webapps/docs/changelog.xml Modified: tomcat/tc6.0.x/trunk/STATUS.txt URL: http://svn.apache.org/viewvc/tomcat/tc6.0.x/trunk/STATUS.txt?rev=1201452&r1=1201451&r2=1201452&view=diff ============================================================================== --- tomcat/tc6.0.x/trunk/STATUS.txt (original) +++ tomcat/tc6.0.x/trunk/STATUS.txt Sun Nov 13 16:14:02 2011 @@ -95,13 +95,6 @@ PATCHES PROPOSED TO BACKPORT: -1: -* Clean up handling multi-byte chars in the connectors. - Backport of r1201069+1201087 from trunk resp. r1201076+r1201088 from TC7 - plus making the JK connector consistent with the Coyote one. - http://people.apache.org/~rjung/patches/connectors_multi-byte_handling_cleanup-v2.patch - +1: rjung, kkolinko, jfclere - -1: - PATCHES/ISSUES THAT ARE STALLED * Backport JSP unloading patch (BZ48358). Modified: tomcat/tc6.0.x/trunk/java/org/apache/coyote/ajp/AjpMessage.java URL: http://svn.apache.org/viewvc/tomcat/tc6.0.x/trunk/java/org/apache/coyote/ajp/AjpMessage.java?rev=1201452&r1=1201451&r2=1201452&view=diff ============================================================================== --- tomcat/tc6.0.x/trunk/java/org/apache/coyote/ajp/AjpMessage.java (original) +++ tomcat/tc6.0.x/trunk/java/org/apache/coyote/ajp/AjpMessage.java Sun Nov 13 16:14:02 2011 @@ -218,9 +218,7 @@ public class AjpMessage { // but is the only consistent approach within the current // servlet framework. It must suffice until servlet output // streams properly encode their output. - if ((c <= 31) && (c != 9)) { - c = ' '; - } else if (c == 127) { + if (((c <= 31) && (c != 9)) || c == 127 || c > 255) { c = ' '; } appendByte(c); @@ -253,9 +251,7 @@ public class AjpMessage { // but is the only consistent approach within the current // servlet framework. It must suffice until servlet output // streams properly encode their output. - if ((c <= 31) && (c != 9)) { - c = ' '; - } else if (c == 127) { + if (((c <= 31) && (c != 9)) || c == 127 || c > 255) { c = ' '; } appendByte(c); Modified: tomcat/tc6.0.x/trunk/java/org/apache/coyote/http11/InternalAprOutputBuffer.java URL: http://svn.apache.org/viewvc/tomcat/tc6.0.x/trunk/java/org/apache/coyote/http11/InternalAprOutputBuffer.java?rev=1201452&r1=1201451&r2=1201452&view=diff ============================================================================== --- tomcat/tc6.0.x/trunk/java/org/apache/coyote/http11/InternalAprOutputBuffer.java (original) +++ tomcat/tc6.0.x/trunk/java/org/apache/coyote/http11/InternalAprOutputBuffer.java Sun Nov 13 16:14:02 2011 @@ -634,9 +634,7 @@ public class InternalAprOutputBuffer // but is the only consistent approach within the current // servlet framework. It must suffice until servlet output // streams properly encode their output. - if ((c <= 31) && (c != 9)) { - c = ' '; - } else if (c == 127) { + if (((c <= 31) && (c != 9)) || c == 127 || c > 255) { c = ' '; } buf[pos++] = (byte) c; @@ -681,9 +679,7 @@ public class InternalAprOutputBuffer // but is the only consistent approach within the current // servlet framework. It must suffice until servlet output // streams properly encode their output. - if ((c <= 31) && (c != 9)) { - c = ' '; - } else if (c == 127) { + if (((c <= 31) && (c != 9)) || c == 127 || c > 255) { c = ' '; } buf[pos++] = (byte) c; Modified: tomcat/tc6.0.x/trunk/java/org/apache/coyote/http11/InternalNioOutputBuffer.java URL: http://svn.apache.org/viewvc/tomcat/tc6.0.x/trunk/java/org/apache/coyote/http11/InternalNioOutputBuffer.java?rev=1201452&r1=1201451&r2=1201452&view=diff ============================================================================== --- tomcat/tc6.0.x/trunk/java/org/apache/coyote/http11/InternalNioOutputBuffer.java (original) +++ tomcat/tc6.0.x/trunk/java/org/apache/coyote/http11/InternalNioOutputBuffer.java Sun Nov 13 16:14:02 2011 @@ -713,9 +713,7 @@ public class InternalNioOutputBuffer // but is the only consistent approach within the current // servlet framework. It must suffice until servlet output // streams properly encode their output. - if ((c <= 31) && (c != 9)) { - c = ' '; - } else if (c == 127) { + if (((c <= 31) && (c != 9)) || c == 127 || c > 255) { c = ' '; } buf[pos++] = (byte) c; @@ -760,9 +758,7 @@ public class InternalNioOutputBuffer // but is the only consistent approach within the current // servlet framework. It must suffice until servlet output // streams properly encode their output. - if ((c <= 31) && (c != 9)) { - c = ' '; - } else if (c == 127) { + if (((c <= 31) && (c != 9)) || c == 127 || c > 255) { c = ' '; } buf[pos++] = (byte) c; Modified: tomcat/tc6.0.x/trunk/java/org/apache/coyote/http11/InternalOutputBuffer.java URL: http://svn.apache.org/viewvc/tomcat/tc6.0.x/trunk/java/org/apache/coyote/http11/InternalOutputBuffer.java?rev=1201452&r1=1201451&r2=1201452&view=diff ============================================================================== --- tomcat/tc6.0.x/trunk/java/org/apache/coyote/http11/InternalOutputBuffer.java (original) +++ tomcat/tc6.0.x/trunk/java/org/apache/coyote/http11/InternalOutputBuffer.java Sun Nov 13 16:14:02 2011 @@ -679,9 +679,7 @@ public class InternalOutputBuffer // but is the only consistent approach within the current // servlet framework. It must suffice until servlet output // streams properly encode their output. - if ((c <= 31) && (c != 9)) { - c = ' '; - } else if (c == 127) { + if (((c <= 31) && (c != 9)) || c == 127 || c > 255) { c = ' '; } buf[pos++] = (byte) c; @@ -726,9 +724,7 @@ public class InternalOutputBuffer // but is the only consistent approach within the current // servlet framework. It must suffice until servlet output // streams properly encode their output. - if ((c <= 31) && (c != 9)) { - c = ' '; - } else if (c == 127) { + if (((c <= 31) && (c != 9)) || c == 127 || c > 255) { c = ' '; } buf[pos++] = (byte) c; Modified: tomcat/tc6.0.x/trunk/java/org/apache/jk/common/JkInputStream.java URL: http://svn.apache.org/viewvc/tomcat/tc6.0.x/trunk/java/org/apache/jk/common/JkInputStream.java?rev=1201452&r1=1201451&r2=1201452&view=diff ============================================================================== --- tomcat/tc6.0.x/trunk/java/org/apache/jk/common/JkInputStream.java (original) +++ tomcat/tc6.0.x/trunk/java/org/apache/jk/common/JkInputStream.java Sun Nov 13 16:14:02 2011 @@ -316,11 +316,9 @@ public class JkInputStream implements In MessageBytes hN=headers.getName(i); // no header to sc conversion - there's little benefit // on this direction - c2b.convert ( hN ); outputMsg.appendBytes( hN ); MessageBytes hV=headers.getValue(i); - c2b.convert( hV ); outputMsg.appendBytes( hV ); } mc.getSource().send( outputMsg, mc ); Modified: tomcat/tc6.0.x/trunk/java/org/apache/jk/common/MsgAjp.java URL: http://svn.apache.org/viewvc/tomcat/tc6.0.x/trunk/java/org/apache/jk/common/MsgAjp.java?rev=1201452&r1=1201451&r2=1201452&view=diff ============================================================================== --- tomcat/tc6.0.x/trunk/java/org/apache/jk/common/MsgAjp.java (original) +++ tomcat/tc6.0.x/trunk/java/org/apache/jk/common/MsgAjp.java Sun Nov 13 16:14:02 2011 @@ -21,6 +21,7 @@ import java.io.IOException; import org.apache.jk.core.Msg; import org.apache.tomcat.util.buf.ByteChunk; +import org.apache.tomcat.util.buf.CharChunk; import org.apache.tomcat.util.buf.MessageBytes; /** @@ -149,10 +150,15 @@ public class MsgAjp extends Msg { appendByte(0); return; } - - // XXX Convert !! - ByteChunk bc= mb.getByteChunk(); - appendByteChunk(bc); + if (mb.getType() == MessageBytes.T_BYTES) { + ByteChunk bc = mb.getByteChunk(); + appendByteChunk(bc); + } else if (mb.getType() == MessageBytes.T_CHARS) { + CharChunk cc = mb.getCharChunk(); + appendCharChunk(cc); + } else { + appendString(mb.toString()); + } } public void appendByteChunk(ByteChunk bc) throws IOException { @@ -171,6 +177,66 @@ public class MsgAjp extends Msg { appendByte(0); } + /** + * Write a CharChunk out at the current write position. + * A null CharChunk is encoded as a string with length 0. + */ + private void appendCharChunk(CharChunk cc) { + if (cc == null) { + log.error("appendCharChunk() null"); + appendInt(0); + appendByte(0); + return; + } + int start = cc.getStart(); + int end = cc.getEnd(); + appendInt(end - start); + char[] cbuf = cc.getBuffer(); + for (int i = start; i < end; i++) { + char c = cbuf[i]; + // Note: This is clearly incorrect for many strings, + // but is the only consistent approach within the current + // servlet framework. It must suffice until servlet output + // streams properly encode their output. + if (((c <= 31) && (c != 9)) || c == 127 || c > 255) { + c = ' '; + } + appendByte((byte)c); + } + appendByte(0); + } + + /** + * Write a String out at the current write position. Strings are + * encoded with the length in two bytes first, then the string, and + * then a terminating \0 (which is <B>not</B> included in the + * encoded length). The terminator is for the convenience of the C + * code, where it saves a round of copying. A null string is + * encoded as a string with length 0. + */ + private void appendString(String str) { + if (str == null) { + log.error("appendString() null"); + appendInt(0); + appendByte(0); + return; + } + int len = str.length(); + appendInt(len); + for (int i = 0; i < len; i++) { + char c = str.charAt (i); + // Note: This is clearly incorrect for many strings, + // but is the only consistent approach within the current + // servlet framework. It must suffice until servlet output + // streams properly encode their output. + if (((c <= 31) && (c != 9)) || c == 127 || c > 255) { + c = ' '; + } + appendByte((byte)c); + } + appendByte(0); + } + /** * Copy a chunk of bytes into the packet, starting at the current * write position. The chunk of bytes is encoded with the length Modified: tomcat/tc6.0.x/trunk/webapps/docs/changelog.xml URL: http://svn.apache.org/viewvc/tomcat/tc6.0.x/trunk/webapps/docs/changelog.xml?rev=1201452&r1=1201451&r2=1201452&view=diff ============================================================================== --- tomcat/tc6.0.x/trunk/webapps/docs/changelog.xml (original) +++ tomcat/tc6.0.x/trunk/webapps/docs/changelog.xml Sun Nov 13 16:14:02 2011 @@ -175,6 +175,9 @@ Allow the BIO HTTP connector to be used with SSL when running under Java 7. (markt) </fix> + <fix> + Improve multi-byte character handling in all connectors. (rjung) + </fix> </changelog> </subsection> <subsection name="Jasper"> --------------------------------------------------------------------- To unsubscribe, e-mail: dev-unsubscr...@tomcat.apache.org For additional commands, e-mail: dev-h...@tomcat.apache.org