Author: markt
Date: Tue Mar 5 14:04:57 2013
New Revision: 1452797
URL: http://svn.apache.org/r1452797
Log:
Switch to Tomcat's internal UTF-8 decoder for URIs and request bodies.
Improve handling of trailing invalid byte sequences.
Replace rather than fail for invalid URIs.
Added:
tomcat/tc7.0.x/trunk/test/org/apache/catalina/connector/TestInputBuffer.java
- copied, changed from r1452501,
tomcat/trunk/test/org/apache/catalina/connector/TestInputBuffer.java
Modified:
tomcat/tc7.0.x/trunk/ (props changed)
tomcat/tc7.0.x/trunk/java/org/apache/catalina/connector/CoyoteAdapter.java
tomcat/tc7.0.x/trunk/java/org/apache/catalina/connector/InputBuffer.java
tomcat/tc7.0.x/trunk/java/org/apache/tomcat/util/buf/B2CConverter.java
tomcat/tc7.0.x/trunk/test/org/apache/catalina/connector/TestCoyoteAdapter.java
tomcat/tc7.0.x/trunk/test/org/apache/catalina/websocket/TestWebSocket.java
tomcat/tc7.0.x/trunk/test/org/apache/tomcat/util/buf/TestB2CConverter.java
tomcat/tc7.0.x/trunk/webapps/docs/changelog.xml
Propchange: tomcat/tc7.0.x/trunk/
------------------------------------------------------------------------------
Merged
/tomcat/trunk:r1451434,1451938-1451939,1451947,1451955-1451956,1452295,1452501,1452707,1452719,1452721
Modified:
tomcat/tc7.0.x/trunk/java/org/apache/catalina/connector/CoyoteAdapter.java
URL:
http://svn.apache.org/viewvc/tomcat/tc7.0.x/trunk/java/org/apache/catalina/connector/CoyoteAdapter.java?rev=1452797&r1=1452796&r2=1452797&view=diff
==============================================================================
--- tomcat/tc7.0.x/trunk/java/org/apache/catalina/connector/CoyoteAdapter.java
(original)
+++ tomcat/tc7.0.x/trunk/java/org/apache/catalina/connector/CoyoteAdapter.java
Tue Mar 5 14:04:57 2013
@@ -23,6 +23,7 @@ import java.util.EnumSet;
import javax.servlet.RequestDispatcher;
import javax.servlet.SessionTrackingMode;
+import javax.servlet.http.HttpServletResponse;
import org.apache.catalina.Context;
import org.apache.catalina.Host;
@@ -980,30 +981,30 @@ public class CoyoteAdapter implements Ad
B2CConverter conv = request.getURIConverter();
try {
if (conv == null) {
- conv = new B2CConverter(enc);
+ conv = new B2CConverter(enc, true);
request.setURIConverter(conv);
} else {
conv.recycle();
}
} catch (IOException e) {
- // Ignore
log.error("Invalid URI encoding; using HTTP default");
connector.setURIEncoding(null);
}
if (conv != null) {
try {
- conv.convert(bc, cc);
- uri.setChars(cc.getBuffer(), cc.getStart(),
- cc.getLength());
+ conv.convert(bc, cc, true);
+ uri.setChars(cc.getBuffer(), cc.getStart(),
cc.getLength());
return;
- } catch (IOException e) {
- log.error("Invalid URI character encoding; trying ascii");
- cc.recycle();
+ } catch (IOException ioe) {
+ // Should never happen as B2CConverter should replace
+ // problematic characters
+ request.getResponse().sendError(
+ HttpServletResponse.SC_BAD_REQUEST);
}
}
}
- // Default encoding: fast conversion
+ // Default encoding: fast conversion for ISO-8859-1
byte[] bbuf = bc.getBuffer();
char[] cbuf = cc.getBuffer();
int start = bc.getStart();
@@ -1011,7 +1012,6 @@ public class CoyoteAdapter implements Ad
cbuf[i] = (char) (bbuf[i + start] & 0xff);
}
uri.setChars(cbuf, 0, length);
-
}
Modified:
tomcat/tc7.0.x/trunk/java/org/apache/catalina/connector/InputBuffer.java
URL:
http://svn.apache.org/viewvc/tomcat/tc7.0.x/trunk/java/org/apache/catalina/connector/InputBuffer.java?rev=1452797&r1=1452796&r2=1452797&view=diff
==============================================================================
--- tomcat/tc7.0.x/trunk/java/org/apache/catalina/connector/InputBuffer.java
(original)
+++ tomcat/tc7.0.x/trunk/java/org/apache/catalina/connector/InputBuffer.java
Tue Mar 5 14:04:57 2013
@@ -347,10 +347,12 @@ public class InputBuffer extends Reader
setConverter();
}
+ boolean eof = false;
+
if (bb.getLength() <= 0) {
int nRead = realReadBytes(bb.getBytes(), 0, bb.getBytes().length);
if (nRead < 0) {
- return -1;
+ eof = true;
}
}
@@ -369,10 +371,13 @@ public class InputBuffer extends Reader
}
state = CHAR_STATE;
- conv.convert(bb, cb);
-
- return cb.getLength();
+ conv.convert(bb, cb, eof);
+ if (cb.getLength() == 0 && eof) {
+ return -1;
+ } else {
+ return cb.getLength();
+ }
}
Modified: tomcat/tc7.0.x/trunk/java/org/apache/tomcat/util/buf/B2CConverter.java
URL:
http://svn.apache.org/viewvc/tomcat/tc7.0.x/trunk/java/org/apache/tomcat/util/buf/B2CConverter.java?rev=1452797&r1=1452796&r2=1452797&view=diff
==============================================================================
--- tomcat/tc7.0.x/trunk/java/org/apache/tomcat/util/buf/B2CConverter.java
(original)
+++ tomcat/tc7.0.x/trunk/java/org/apache/tomcat/util/buf/B2CConverter.java Tue
Mar 5 14:04:57 2013
@@ -23,6 +23,7 @@ import java.nio.CharBuffer;
import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder;
import java.nio.charset.CoderResult;
+import java.nio.charset.CodingErrorAction;
import java.util.HashMap;
import java.util.Locale;
import java.util.Map;
@@ -103,9 +104,30 @@ public class B2CConverter {
private final ByteBuffer leftovers;
public B2CConverter(String encoding) throws IOException {
+ this(encoding, false);
+ }
+
+ public B2CConverter(String encoding, boolean replaceOnError)
+ throws IOException {
byte[] left = new byte[LEFTOVER_SIZE];
leftovers = ByteBuffer.wrap(left);
- decoder = getCharset(encoding).newDecoder();
+ CodingErrorAction action;
+ if (replaceOnError) {
+ action = CodingErrorAction.REPLACE;
+ } else {
+ action = CodingErrorAction.REPORT;
+ }
+ Charset charset = getCharset(encoding);
+ // Special case. Use the Apache Harmony based UTF-8 decoder because it
+ // - a) rejects invalid sequences that the JVM decoder does not
+ // - b) fails faster for some invalid sequences
+ if (charset.equals(UTF_8)) {
+ decoder = new Utf8Decoder();
+ } else {
+ decoder = charset.newDecoder();
+ }
+ decoder.onMalformedInput(action);
+ decoder.onUnmappableCharacter(action);
}
/**
@@ -116,17 +138,14 @@ public class B2CConverter {
leftovers.position(0);
}
- public boolean isUndeflow() {
- return (leftovers.position() > 0);
- }
-
/**
* Convert the given bytes to characters.
*
* @param bc byte input
* @param cc char output
+ * @param endOfInput Is this all of the available data
*/
- public void convert(ByteChunk bc, CharChunk cc)
+ public void convert(ByteChunk bc, CharChunk cc, boolean endOfInput)
throws IOException {
if ((bb == null) || (bb.array() != bc.getBuffer())) {
// Create a new byte buffer if anything changed
@@ -153,7 +172,7 @@ public class B2CConverter {
do {
leftovers.put(bc.substractB());
leftovers.flip();
- result = decoder.decode(leftovers, cb, false);
+ result = decoder.decode(leftovers, cb, endOfInput);
leftovers.position(leftovers.limit());
leftovers.limit(leftovers.array().length);
} while (result.isUnderflow() && (cb.position() == pos));
@@ -165,7 +184,7 @@ public class B2CConverter {
}
// Do the decoding and get the results into the byte chunk and the char
// chunk
- result = decoder.decode(bb, cb, false);
+ result = decoder.decode(bb, cb, endOfInput);
if (result.isError() || result.isMalformed()) {
result.throwException();
} else if (result.isOverflow()) {
Modified:
tomcat/tc7.0.x/trunk/test/org/apache/catalina/connector/TestCoyoteAdapter.java
URL:
http://svn.apache.org/viewvc/tomcat/tc7.0.x/trunk/test/org/apache/catalina/connector/TestCoyoteAdapter.java?rev=1452797&r1=1452796&r2=1452797&view=diff
==============================================================================
---
tomcat/tc7.0.x/trunk/test/org/apache/catalina/connector/TestCoyoteAdapter.java
(original)
+++
tomcat/tc7.0.x/trunk/test/org/apache/catalina/connector/TestCoyoteAdapter.java
Tue Mar 5 14:04:57 2013
@@ -25,9 +25,7 @@ import javax.servlet.http.HttpServlet;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.fail;
-
+import org.junit.Assert;
import org.junit.Test;
import org.apache.catalina.Context;
@@ -89,7 +87,7 @@ public class TestCoyoteAdapter extends T
File foo = new File(docBase, "foo");
addDeleteOnTearDown(foo);
if (!foo.mkdirs() && !foo.isDirectory()) {
- fail("Unable to create foo directory in docBase");
+ Assert.fail("Unable to create foo directory in docBase");
}
Context ctx = tomcat.addContext("", docBase.getAbsolutePath());
@@ -122,12 +120,12 @@ public class TestCoyoteAdapter extends T
tomcat.start();
ByteChunk res = getUrl("http://localhost:" + getPort() + path);
- assertEquals(expected, res.toString());
+ Assert.assertEquals(expected, res.toString());
}
private void testPath(String path, String expected) throws Exception {
ByteChunk res = getUrl("http://localhost:" + getPort() + path);
- assertEquals(expected, res.toString());
+ Assert.assertEquals(expected, res.toString());
}
private static class PathParamServlet extends HttpServlet {
@@ -176,6 +174,80 @@ public class TestCoyoteAdapter extends T
tomcat.start();
ByteChunk res = getUrl("http://localhost:" + getPort() + path);
- assertEquals(expected, res.toString());
+ Assert.assertEquals(expected, res.toString());
+ }
+
+ @Test
+ public void testBug54602a() throws Exception {
+ // No UTF-8
+ doTestUriDecoding("/foo", "UTF-8", "/foo");
+ }
+
+ @Test
+ public void testBug54602b() throws Exception {
+ // Valid UTF-8
+ doTestUriDecoding("/foo%c4%87", "UTF-8", "/foo\u0107");
+ }
+
+ @Test
+ public void testBug54602c() throws Exception {
+ // Partial UTF-8
+ doTestUriDecoding("/foo%c4", "UTF-8", "/foo\uFFFD");
+ }
+
+ @Test
+ public void testBug54602d() throws Exception {
+ // Invalid UTF-8
+ doTestUriDecoding("/foo%ff", "UTF-8", "/foo\uFFFD");
+ }
+
+ @Test
+ public void testBug54602e() throws Exception {
+ // Invalid UTF-8
+ doTestUriDecoding("/foo%ed%a0%80", "UTF-8", "/foo\uFFFD\uFFFD\uFFFD");
+ }
+
+ private void doTestUriDecoding(String path, String encoding,
+ String expectedPathInfo) throws Exception{
+
+ // Setup Tomcat instance
+ Tomcat tomcat = getTomcatInstance();
+
+ tomcat.getConnector().setURIEncoding(encoding);
+
+ // Must have a real docBase - just use temp
+ Context ctx =
+ tomcat.addContext("/", System.getProperty("java.io.tmpdir"));
+
+ PathInfoServlet servlet = new PathInfoServlet();
+ Tomcat.addServlet(ctx, "servlet", servlet);
+ ctx.addServletMapping("/*", "servlet");
+
+ tomcat.start();
+
+ int rc = getUrl("http://localhost:" + getPort() + path,
+ new ByteChunk(), null);
+ Assert.assertEquals(HttpServletResponse.SC_OK, rc);
+
+ Assert.assertEquals(expectedPathInfo, servlet.getPathInfo());
+ }
+
+ private static class PathInfoServlet extends HttpServlet {
+
+ private static final long serialVersionUID = 1L;
+
+ private String pathInfo = null;
+
+ public String getPathInfo() {
+ return pathInfo;
+ }
+
+ @Override
+ protected void doGet(HttpServletRequest req, HttpServletResponse resp)
+ throws ServletException, IOException {
+
+ // Not thread safe
+ pathInfo = req.getPathInfo();
+ }
}
}
Copied:
tomcat/tc7.0.x/trunk/test/org/apache/catalina/connector/TestInputBuffer.java
(from r1452501,
tomcat/trunk/test/org/apache/catalina/connector/TestInputBuffer.java)
URL:
http://svn.apache.org/viewvc/tomcat/tc7.0.x/trunk/test/org/apache/catalina/connector/TestInputBuffer.java?p2=tomcat/tc7.0.x/trunk/test/org/apache/catalina/connector/TestInputBuffer.java&p1=tomcat/trunk/test/org/apache/catalina/connector/TestInputBuffer.java&r1=1452501&r2=1452797&rev=1452797&view=diff
==============================================================================
--- tomcat/trunk/test/org/apache/catalina/connector/TestInputBuffer.java
(original)
+++
tomcat/tc7.0.x/trunk/test/org/apache/catalina/connector/TestInputBuffer.java
Tue Mar 5 14:04:57 2013
@@ -33,8 +33,8 @@ import org.apache.catalina.startup.Tomca
import org.apache.catalina.startup.TomcatBaseTest;
import org.apache.tomcat.util.buf.B2CConverter;
import org.apache.tomcat.util.buf.ByteChunk;
-import org.apache.tomcat.util.buf.TestUtf8Extended;
-import org.apache.tomcat.util.buf.TestUtf8Extended.Utf8TestCase;
+import org.apache.tomcat.util.buf.TestUtf8;
+import org.apache.tomcat.util.buf.TestUtf8.Utf8TestCase;
public class TestInputBuffer extends TomcatBaseTest {
@@ -48,7 +48,7 @@ public class TestInputBuffer extends Tom
tomcat.getConnector().setProperty("soTimeout", "300000");
tomcat.start();
- for (Utf8TestCase testCase : TestUtf8Extended.TEST_CASES) {
+ for (Utf8TestCase testCase : TestUtf8.TEST_CASES) {
String expected = null;
if (testCase.invalidIndex == -1) {
expected = testCase.outputReplaced;
@@ -61,8 +61,6 @@ public class TestInputBuffer extends Tom
private void doUtf8BodyTest(String description, int[] input,
String expected) throws Exception {
- System.out.println(description);
-
byte[] bytes = new byte[input.length];
for (int i = 0; i < input.length; i++) {
bytes[i] = (byte) input[i];
Modified:
tomcat/tc7.0.x/trunk/test/org/apache/catalina/websocket/TestWebSocket.java
URL:
http://svn.apache.org/viewvc/tomcat/tc7.0.x/trunk/test/org/apache/catalina/websocket/TestWebSocket.java?rev=1452797&r1=1452796&r2=1452797&view=diff
==============================================================================
--- tomcat/tc7.0.x/trunk/test/org/apache/catalina/websocket/TestWebSocket.java
(original)
+++ tomcat/tc7.0.x/trunk/test/org/apache/catalina/websocket/TestWebSocket.java
Tue Mar 5 14:04:57 2013
@@ -428,7 +428,7 @@ public class TestWebSocket extends Tomca
bc.setEnd(len);
B2CConverter b2c = new B2CConverter("UTF-8");
- b2c.convert(bc, cc);
+ b2c.convert(bc, cc, true);
return cc.toString();
}
Modified:
tomcat/tc7.0.x/trunk/test/org/apache/tomcat/util/buf/TestB2CConverter.java
URL:
http://svn.apache.org/viewvc/tomcat/tc7.0.x/trunk/test/org/apache/tomcat/util/buf/TestB2CConverter.java?rev=1452797&r1=1452796&r2=1452797&view=diff
==============================================================================
--- tomcat/tc7.0.x/trunk/test/org/apache/tomcat/util/buf/TestB2CConverter.java
(original)
+++ tomcat/tc7.0.x/trunk/test/org/apache/tomcat/util/buf/TestB2CConverter.java
Tue Mar 5 14:04:57 2013
@@ -17,6 +17,7 @@
package org.apache.tomcat.util.buf;
import java.nio.charset.Charset;
+import java.nio.charset.MalformedInputException;
import org.junit.Assert;
import org.junit.Test;
@@ -26,6 +27,10 @@ public class TestB2CConverter {
private static final byte[] UTF16_MESSAGE =
new byte[] {-2, -1, 0, 65, 0, 66, 0, 67};
+ private static final byte[] UTF8_INVALID = new byte[] {-8, -69, -73, -77};
+
+ private static final byte[] UTF8_PARTIAL = new byte[] {-50};
+
@Test
public void testSingleMessage() throws Exception {
testMessages(1);
@@ -45,12 +50,12 @@ public class TestB2CConverter {
B2CConverter conv = new B2CConverter("UTF-16");
ByteChunk bc = new ByteChunk();
- CharChunk cc = new CharChunk();
+ CharChunk cc = new CharChunk(32);
for (int i = 0; i < msgCount; i++) {
bc.append(UTF16_MESSAGE, 0, UTF16_MESSAGE.length);
- conv.convert(bc, cc);
+ conv.convert(bc, cc, true);
Assert.assertEquals("ABC", cc.toString());
bc.recycle();
cc.recycle();
@@ -83,4 +88,52 @@ public class TestB2CConverter {
Assert.assertTrue("Limit needs to be at least " + maxLeftover,
maxLeftover <= B2CConverter.LEFTOVER_SIZE);
}
+
+ // TODO Work-around bug in UTF8 decoder
+ //@Test(expected=MalformedInputException.class)
+ public void testBug54602a() throws Exception {
+ // Check invalid input is rejected straight away
+ B2CConverter conv = new B2CConverter("UTF-8");
+ ByteChunk bc = new ByteChunk();
+ CharChunk cc = new CharChunk();
+
+ bc.append(UTF8_INVALID, 0, UTF8_INVALID.length);
+ cc.allocate(bc.getLength(), -1);
+
+ conv.convert(bc, cc, false);
+ }
+
+ @Test(expected=MalformedInputException.class)
+ public void testBug54602b() throws Exception {
+ // Check partial input is rejected
+ B2CConverter conv = new B2CConverter("UTF-8");
+ ByteChunk bc = new ByteChunk();
+ CharChunk cc = new CharChunk();
+
+ bc.append(UTF8_PARTIAL, 0, UTF8_PARTIAL.length);
+ cc.allocate(bc.getLength(), -1);
+
+ conv.convert(bc, cc, true);
+ }
+
+ @Test
+ public void testBug54602c() throws Exception {
+ // Check partial input is rejected once it is known to be all available
+ B2CConverter conv = new B2CConverter("UTF-8");
+ ByteChunk bc = new ByteChunk();
+ CharChunk cc = new CharChunk();
+
+ bc.append(UTF8_PARTIAL, 0, UTF8_PARTIAL.length);
+ cc.allocate(bc.getLength(), -1);
+
+ conv.convert(bc, cc, false);
+
+ Exception e = null;
+ try {
+ conv.convert(bc, cc, true);
+ } catch (MalformedInputException mie) {
+ e = mie;
+ }
+ Assert.assertNotNull(e);
+ }
}
Modified: tomcat/tc7.0.x/trunk/webapps/docs/changelog.xml
URL:
http://svn.apache.org/viewvc/tomcat/tc7.0.x/trunk/webapps/docs/changelog.xml?rev=1452797&r1=1452796&r2=1452797&view=diff
==============================================================================
--- tomcat/tc7.0.x/trunk/webapps/docs/changelog.xml (original)
+++ tomcat/tc7.0.x/trunk/webapps/docs/changelog.xml Tue Mar 5 14:04:57 2013
@@ -96,6 +96,18 @@
between requests to ensure an error in one request does not trigger a
failure in the next request. (markt)
</fix>
+ <fix>
+ Use the newly added improved UTF-8 decoder for decoding UTF-8 encoded
+ URIs and UTF-8 encoded request bodies. Invalid UTF-8 URIs will not
+ cause an error but will make use of the replacement character when an
+ error is detected. This will allow web applications to handle the URI
+ which will most likely result in a 404 response. The fall-back to
+ decoding with ISO-8859-1 if UTF-8 decoding fails has been removed.
+ Invalid UTF-8 sequences in a request body will trigger an IOException.
+ The way the decoder is used has also been improved. The notable change
+ is that invalid sequences at the end of the input now trigger an error
+ rather than being silebtly swallowed. (markt)
+ </fix>
</changelog>
</subsection>
<subsection name="Coyote">
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]