Author: markt Date: Tue Mar 5 14:04:57 2013 New Revision: 1452797 URL: http://svn.apache.org/r1452797 Log: Switch to Tomcat's internal UTF-8 decoder for URIs and request bodies. Improve handling of trailing invalid byte sequences. Replace rather than fail for invalid URIs.
Added: tomcat/tc7.0.x/trunk/test/org/apache/catalina/connector/TestInputBuffer.java - copied, changed from r1452501, tomcat/trunk/test/org/apache/catalina/connector/TestInputBuffer.java Modified: tomcat/tc7.0.x/trunk/ (props changed) tomcat/tc7.0.x/trunk/java/org/apache/catalina/connector/CoyoteAdapter.java tomcat/tc7.0.x/trunk/java/org/apache/catalina/connector/InputBuffer.java tomcat/tc7.0.x/trunk/java/org/apache/tomcat/util/buf/B2CConverter.java tomcat/tc7.0.x/trunk/test/org/apache/catalina/connector/TestCoyoteAdapter.java tomcat/tc7.0.x/trunk/test/org/apache/catalina/websocket/TestWebSocket.java tomcat/tc7.0.x/trunk/test/org/apache/tomcat/util/buf/TestB2CConverter.java tomcat/tc7.0.x/trunk/webapps/docs/changelog.xml Propchange: tomcat/tc7.0.x/trunk/ ------------------------------------------------------------------------------ Merged /tomcat/trunk:r1451434,1451938-1451939,1451947,1451955-1451956,1452295,1452501,1452707,1452719,1452721 Modified: tomcat/tc7.0.x/trunk/java/org/apache/catalina/connector/CoyoteAdapter.java URL: http://svn.apache.org/viewvc/tomcat/tc7.0.x/trunk/java/org/apache/catalina/connector/CoyoteAdapter.java?rev=1452797&r1=1452796&r2=1452797&view=diff ============================================================================== --- tomcat/tc7.0.x/trunk/java/org/apache/catalina/connector/CoyoteAdapter.java (original) +++ tomcat/tc7.0.x/trunk/java/org/apache/catalina/connector/CoyoteAdapter.java Tue Mar 5 14:04:57 2013 @@ -23,6 +23,7 @@ import java.util.EnumSet; import javax.servlet.RequestDispatcher; import javax.servlet.SessionTrackingMode; +import javax.servlet.http.HttpServletResponse; import org.apache.catalina.Context; import org.apache.catalina.Host; @@ -980,30 +981,30 @@ public class CoyoteAdapter implements Ad B2CConverter conv = request.getURIConverter(); try { if (conv == null) { - conv = new B2CConverter(enc); + conv = new B2CConverter(enc, true); request.setURIConverter(conv); } else { conv.recycle(); } } catch (IOException e) { - // Ignore log.error("Invalid URI encoding; using HTTP default"); connector.setURIEncoding(null); } if (conv != null) { try { - conv.convert(bc, cc); - uri.setChars(cc.getBuffer(), cc.getStart(), - cc.getLength()); + conv.convert(bc, cc, true); + uri.setChars(cc.getBuffer(), cc.getStart(), cc.getLength()); return; - } catch (IOException e) { - log.error("Invalid URI character encoding; trying ascii"); - cc.recycle(); + } catch (IOException ioe) { + // Should never happen as B2CConverter should replace + // problematic characters + request.getResponse().sendError( + HttpServletResponse.SC_BAD_REQUEST); } } } - // Default encoding: fast conversion + // Default encoding: fast conversion for ISO-8859-1 byte[] bbuf = bc.getBuffer(); char[] cbuf = cc.getBuffer(); int start = bc.getStart(); @@ -1011,7 +1012,6 @@ public class CoyoteAdapter implements Ad cbuf[i] = (char) (bbuf[i + start] & 0xff); } uri.setChars(cbuf, 0, length); - } Modified: tomcat/tc7.0.x/trunk/java/org/apache/catalina/connector/InputBuffer.java URL: http://svn.apache.org/viewvc/tomcat/tc7.0.x/trunk/java/org/apache/catalina/connector/InputBuffer.java?rev=1452797&r1=1452796&r2=1452797&view=diff ============================================================================== --- tomcat/tc7.0.x/trunk/java/org/apache/catalina/connector/InputBuffer.java (original) +++ tomcat/tc7.0.x/trunk/java/org/apache/catalina/connector/InputBuffer.java Tue Mar 5 14:04:57 2013 @@ -347,10 +347,12 @@ public class InputBuffer extends Reader setConverter(); } + boolean eof = false; + if (bb.getLength() <= 0) { int nRead = realReadBytes(bb.getBytes(), 0, bb.getBytes().length); if (nRead < 0) { - return -1; + eof = true; } } @@ -369,10 +371,13 @@ public class InputBuffer extends Reader } state = CHAR_STATE; - conv.convert(bb, cb); - - return cb.getLength(); + conv.convert(bb, cb, eof); + if (cb.getLength() == 0 && eof) { + return -1; + } else { + return cb.getLength(); + } } Modified: tomcat/tc7.0.x/trunk/java/org/apache/tomcat/util/buf/B2CConverter.java URL: http://svn.apache.org/viewvc/tomcat/tc7.0.x/trunk/java/org/apache/tomcat/util/buf/B2CConverter.java?rev=1452797&r1=1452796&r2=1452797&view=diff ============================================================================== --- tomcat/tc7.0.x/trunk/java/org/apache/tomcat/util/buf/B2CConverter.java (original) +++ tomcat/tc7.0.x/trunk/java/org/apache/tomcat/util/buf/B2CConverter.java Tue Mar 5 14:04:57 2013 @@ -23,6 +23,7 @@ import java.nio.CharBuffer; import java.nio.charset.Charset; import java.nio.charset.CharsetDecoder; import java.nio.charset.CoderResult; +import java.nio.charset.CodingErrorAction; import java.util.HashMap; import java.util.Locale; import java.util.Map; @@ -103,9 +104,30 @@ public class B2CConverter { private final ByteBuffer leftovers; public B2CConverter(String encoding) throws IOException { + this(encoding, false); + } + + public B2CConverter(String encoding, boolean replaceOnError) + throws IOException { byte[] left = new byte[LEFTOVER_SIZE]; leftovers = ByteBuffer.wrap(left); - decoder = getCharset(encoding).newDecoder(); + CodingErrorAction action; + if (replaceOnError) { + action = CodingErrorAction.REPLACE; + } else { + action = CodingErrorAction.REPORT; + } + Charset charset = getCharset(encoding); + // Special case. Use the Apache Harmony based UTF-8 decoder because it + // - a) rejects invalid sequences that the JVM decoder does not + // - b) fails faster for some invalid sequences + if (charset.equals(UTF_8)) { + decoder = new Utf8Decoder(); + } else { + decoder = charset.newDecoder(); + } + decoder.onMalformedInput(action); + decoder.onUnmappableCharacter(action); } /** @@ -116,17 +138,14 @@ public class B2CConverter { leftovers.position(0); } - public boolean isUndeflow() { - return (leftovers.position() > 0); - } - /** * Convert the given bytes to characters. * * @param bc byte input * @param cc char output + * @param endOfInput Is this all of the available data */ - public void convert(ByteChunk bc, CharChunk cc) + public void convert(ByteChunk bc, CharChunk cc, boolean endOfInput) throws IOException { if ((bb == null) || (bb.array() != bc.getBuffer())) { // Create a new byte buffer if anything changed @@ -153,7 +172,7 @@ public class B2CConverter { do { leftovers.put(bc.substractB()); leftovers.flip(); - result = decoder.decode(leftovers, cb, false); + result = decoder.decode(leftovers, cb, endOfInput); leftovers.position(leftovers.limit()); leftovers.limit(leftovers.array().length); } while (result.isUnderflow() && (cb.position() == pos)); @@ -165,7 +184,7 @@ public class B2CConverter { } // Do the decoding and get the results into the byte chunk and the char // chunk - result = decoder.decode(bb, cb, false); + result = decoder.decode(bb, cb, endOfInput); if (result.isError() || result.isMalformed()) { result.throwException(); } else if (result.isOverflow()) { Modified: tomcat/tc7.0.x/trunk/test/org/apache/catalina/connector/TestCoyoteAdapter.java URL: http://svn.apache.org/viewvc/tomcat/tc7.0.x/trunk/test/org/apache/catalina/connector/TestCoyoteAdapter.java?rev=1452797&r1=1452796&r2=1452797&view=diff ============================================================================== --- tomcat/tc7.0.x/trunk/test/org/apache/catalina/connector/TestCoyoteAdapter.java (original) +++ tomcat/tc7.0.x/trunk/test/org/apache/catalina/connector/TestCoyoteAdapter.java Tue Mar 5 14:04:57 2013 @@ -25,9 +25,7 @@ import javax.servlet.http.HttpServlet; import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletResponse; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.fail; - +import org.junit.Assert; import org.junit.Test; import org.apache.catalina.Context; @@ -89,7 +87,7 @@ public class TestCoyoteAdapter extends T File foo = new File(docBase, "foo"); addDeleteOnTearDown(foo); if (!foo.mkdirs() && !foo.isDirectory()) { - fail("Unable to create foo directory in docBase"); + Assert.fail("Unable to create foo directory in docBase"); } Context ctx = tomcat.addContext("", docBase.getAbsolutePath()); @@ -122,12 +120,12 @@ public class TestCoyoteAdapter extends T tomcat.start(); ByteChunk res = getUrl("http://localhost:" + getPort() + path); - assertEquals(expected, res.toString()); + Assert.assertEquals(expected, res.toString()); } private void testPath(String path, String expected) throws Exception { ByteChunk res = getUrl("http://localhost:" + getPort() + path); - assertEquals(expected, res.toString()); + Assert.assertEquals(expected, res.toString()); } private static class PathParamServlet extends HttpServlet { @@ -176,6 +174,80 @@ public class TestCoyoteAdapter extends T tomcat.start(); ByteChunk res = getUrl("http://localhost:" + getPort() + path); - assertEquals(expected, res.toString()); + Assert.assertEquals(expected, res.toString()); + } + + @Test + public void testBug54602a() throws Exception { + // No UTF-8 + doTestUriDecoding("/foo", "UTF-8", "/foo"); + } + + @Test + public void testBug54602b() throws Exception { + // Valid UTF-8 + doTestUriDecoding("/foo%c4%87", "UTF-8", "/foo\u0107"); + } + + @Test + public void testBug54602c() throws Exception { + // Partial UTF-8 + doTestUriDecoding("/foo%c4", "UTF-8", "/foo\uFFFD"); + } + + @Test + public void testBug54602d() throws Exception { + // Invalid UTF-8 + doTestUriDecoding("/foo%ff", "UTF-8", "/foo\uFFFD"); + } + + @Test + public void testBug54602e() throws Exception { + // Invalid UTF-8 + doTestUriDecoding("/foo%ed%a0%80", "UTF-8", "/foo\uFFFD\uFFFD\uFFFD"); + } + + private void doTestUriDecoding(String path, String encoding, + String expectedPathInfo) throws Exception{ + + // Setup Tomcat instance + Tomcat tomcat = getTomcatInstance(); + + tomcat.getConnector().setURIEncoding(encoding); + + // Must have a real docBase - just use temp + Context ctx = + tomcat.addContext("/", System.getProperty("java.io.tmpdir")); + + PathInfoServlet servlet = new PathInfoServlet(); + Tomcat.addServlet(ctx, "servlet", servlet); + ctx.addServletMapping("/*", "servlet"); + + tomcat.start(); + + int rc = getUrl("http://localhost:" + getPort() + path, + new ByteChunk(), null); + Assert.assertEquals(HttpServletResponse.SC_OK, rc); + + Assert.assertEquals(expectedPathInfo, servlet.getPathInfo()); + } + + private static class PathInfoServlet extends HttpServlet { + + private static final long serialVersionUID = 1L; + + private String pathInfo = null; + + public String getPathInfo() { + return pathInfo; + } + + @Override + protected void doGet(HttpServletRequest req, HttpServletResponse resp) + throws ServletException, IOException { + + // Not thread safe + pathInfo = req.getPathInfo(); + } } } Copied: tomcat/tc7.0.x/trunk/test/org/apache/catalina/connector/TestInputBuffer.java (from r1452501, tomcat/trunk/test/org/apache/catalina/connector/TestInputBuffer.java) URL: http://svn.apache.org/viewvc/tomcat/tc7.0.x/trunk/test/org/apache/catalina/connector/TestInputBuffer.java?p2=tomcat/tc7.0.x/trunk/test/org/apache/catalina/connector/TestInputBuffer.java&p1=tomcat/trunk/test/org/apache/catalina/connector/TestInputBuffer.java&r1=1452501&r2=1452797&rev=1452797&view=diff ============================================================================== --- tomcat/trunk/test/org/apache/catalina/connector/TestInputBuffer.java (original) +++ tomcat/tc7.0.x/trunk/test/org/apache/catalina/connector/TestInputBuffer.java Tue Mar 5 14:04:57 2013 @@ -33,8 +33,8 @@ import org.apache.catalina.startup.Tomca import org.apache.catalina.startup.TomcatBaseTest; import org.apache.tomcat.util.buf.B2CConverter; import org.apache.tomcat.util.buf.ByteChunk; -import org.apache.tomcat.util.buf.TestUtf8Extended; -import org.apache.tomcat.util.buf.TestUtf8Extended.Utf8TestCase; +import org.apache.tomcat.util.buf.TestUtf8; +import org.apache.tomcat.util.buf.TestUtf8.Utf8TestCase; public class TestInputBuffer extends TomcatBaseTest { @@ -48,7 +48,7 @@ public class TestInputBuffer extends Tom tomcat.getConnector().setProperty("soTimeout", "300000"); tomcat.start(); - for (Utf8TestCase testCase : TestUtf8Extended.TEST_CASES) { + for (Utf8TestCase testCase : TestUtf8.TEST_CASES) { String expected = null; if (testCase.invalidIndex == -1) { expected = testCase.outputReplaced; @@ -61,8 +61,6 @@ public class TestInputBuffer extends Tom private void doUtf8BodyTest(String description, int[] input, String expected) throws Exception { - System.out.println(description); - byte[] bytes = new byte[input.length]; for (int i = 0; i < input.length; i++) { bytes[i] = (byte) input[i]; Modified: tomcat/tc7.0.x/trunk/test/org/apache/catalina/websocket/TestWebSocket.java URL: http://svn.apache.org/viewvc/tomcat/tc7.0.x/trunk/test/org/apache/catalina/websocket/TestWebSocket.java?rev=1452797&r1=1452796&r2=1452797&view=diff ============================================================================== --- tomcat/tc7.0.x/trunk/test/org/apache/catalina/websocket/TestWebSocket.java (original) +++ tomcat/tc7.0.x/trunk/test/org/apache/catalina/websocket/TestWebSocket.java Tue Mar 5 14:04:57 2013 @@ -428,7 +428,7 @@ public class TestWebSocket extends Tomca bc.setEnd(len); B2CConverter b2c = new B2CConverter("UTF-8"); - b2c.convert(bc, cc); + b2c.convert(bc, cc, true); return cc.toString(); } Modified: tomcat/tc7.0.x/trunk/test/org/apache/tomcat/util/buf/TestB2CConverter.java URL: http://svn.apache.org/viewvc/tomcat/tc7.0.x/trunk/test/org/apache/tomcat/util/buf/TestB2CConverter.java?rev=1452797&r1=1452796&r2=1452797&view=diff ============================================================================== --- tomcat/tc7.0.x/trunk/test/org/apache/tomcat/util/buf/TestB2CConverter.java (original) +++ tomcat/tc7.0.x/trunk/test/org/apache/tomcat/util/buf/TestB2CConverter.java Tue Mar 5 14:04:57 2013 @@ -17,6 +17,7 @@ package org.apache.tomcat.util.buf; import java.nio.charset.Charset; +import java.nio.charset.MalformedInputException; import org.junit.Assert; import org.junit.Test; @@ -26,6 +27,10 @@ public class TestB2CConverter { private static final byte[] UTF16_MESSAGE = new byte[] {-2, -1, 0, 65, 0, 66, 0, 67}; + private static final byte[] UTF8_INVALID = new byte[] {-8, -69, -73, -77}; + + private static final byte[] UTF8_PARTIAL = new byte[] {-50}; + @Test public void testSingleMessage() throws Exception { testMessages(1); @@ -45,12 +50,12 @@ public class TestB2CConverter { B2CConverter conv = new B2CConverter("UTF-16"); ByteChunk bc = new ByteChunk(); - CharChunk cc = new CharChunk(); + CharChunk cc = new CharChunk(32); for (int i = 0; i < msgCount; i++) { bc.append(UTF16_MESSAGE, 0, UTF16_MESSAGE.length); - conv.convert(bc, cc); + conv.convert(bc, cc, true); Assert.assertEquals("ABC", cc.toString()); bc.recycle(); cc.recycle(); @@ -83,4 +88,52 @@ public class TestB2CConverter { Assert.assertTrue("Limit needs to be at least " + maxLeftover, maxLeftover <= B2CConverter.LEFTOVER_SIZE); } + + // TODO Work-around bug in UTF8 decoder + //@Test(expected=MalformedInputException.class) + public void testBug54602a() throws Exception { + // Check invalid input is rejected straight away + B2CConverter conv = new B2CConverter("UTF-8"); + ByteChunk bc = new ByteChunk(); + CharChunk cc = new CharChunk(); + + bc.append(UTF8_INVALID, 0, UTF8_INVALID.length); + cc.allocate(bc.getLength(), -1); + + conv.convert(bc, cc, false); + } + + @Test(expected=MalformedInputException.class) + public void testBug54602b() throws Exception { + // Check partial input is rejected + B2CConverter conv = new B2CConverter("UTF-8"); + ByteChunk bc = new ByteChunk(); + CharChunk cc = new CharChunk(); + + bc.append(UTF8_PARTIAL, 0, UTF8_PARTIAL.length); + cc.allocate(bc.getLength(), -1); + + conv.convert(bc, cc, true); + } + + @Test + public void testBug54602c() throws Exception { + // Check partial input is rejected once it is known to be all available + B2CConverter conv = new B2CConverter("UTF-8"); + ByteChunk bc = new ByteChunk(); + CharChunk cc = new CharChunk(); + + bc.append(UTF8_PARTIAL, 0, UTF8_PARTIAL.length); + cc.allocate(bc.getLength(), -1); + + conv.convert(bc, cc, false); + + Exception e = null; + try { + conv.convert(bc, cc, true); + } catch (MalformedInputException mie) { + e = mie; + } + Assert.assertNotNull(e); + } } Modified: tomcat/tc7.0.x/trunk/webapps/docs/changelog.xml URL: http://svn.apache.org/viewvc/tomcat/tc7.0.x/trunk/webapps/docs/changelog.xml?rev=1452797&r1=1452796&r2=1452797&view=diff ============================================================================== --- tomcat/tc7.0.x/trunk/webapps/docs/changelog.xml (original) +++ tomcat/tc7.0.x/trunk/webapps/docs/changelog.xml Tue Mar 5 14:04:57 2013 @@ -96,6 +96,18 @@ between requests to ensure an error in one request does not trigger a failure in the next request. (markt) </fix> + <fix> + Use the newly added improved UTF-8 decoder for decoding UTF-8 encoded + URIs and UTF-8 encoded request bodies. Invalid UTF-8 URIs will not + cause an error but will make use of the replacement character when an + error is detected. This will allow web applications to handle the URI + which will most likely result in a 404 response. The fall-back to + decoding with ISO-8859-1 if UTF-8 decoding fails has been removed. + Invalid UTF-8 sequences in a request body will trigger an IOException. + The way the decoder is used has also been improved. The notable change + is that invalid sequences at the end of the input now trigger an error + rather than being silebtly swallowed. (markt) + </fix> </changelog> </subsection> <subsection name="Coyote"> --------------------------------------------------------------------- To unsubscribe, e-mail: dev-unsubscr...@tomcat.apache.org For additional commands, e-mail: dev-h...@tomcat.apache.org