Author: markt
Date: Tue Mar  5 14:04:57 2013
New Revision: 1452797

URL: http://svn.apache.org/r1452797
Log:
Switch to Tomcat's internal UTF-8 decoder for URIs and request bodies.
Improve handling of trailing invalid byte sequences.
Replace rather than fail for invalid URIs.

Added:
    tomcat/tc7.0.x/trunk/test/org/apache/catalina/connector/TestInputBuffer.java
      - copied, changed from r1452501, 
tomcat/trunk/test/org/apache/catalina/connector/TestInputBuffer.java
Modified:
    tomcat/tc7.0.x/trunk/   (props changed)
    tomcat/tc7.0.x/trunk/java/org/apache/catalina/connector/CoyoteAdapter.java
    tomcat/tc7.0.x/trunk/java/org/apache/catalina/connector/InputBuffer.java
    tomcat/tc7.0.x/trunk/java/org/apache/tomcat/util/buf/B2CConverter.java
    
tomcat/tc7.0.x/trunk/test/org/apache/catalina/connector/TestCoyoteAdapter.java
    tomcat/tc7.0.x/trunk/test/org/apache/catalina/websocket/TestWebSocket.java
    tomcat/tc7.0.x/trunk/test/org/apache/tomcat/util/buf/TestB2CConverter.java
    tomcat/tc7.0.x/trunk/webapps/docs/changelog.xml

Propchange: tomcat/tc7.0.x/trunk/
------------------------------------------------------------------------------
  Merged 
/tomcat/trunk:r1451434,1451938-1451939,1451947,1451955-1451956,1452295,1452501,1452707,1452719,1452721

Modified: 
tomcat/tc7.0.x/trunk/java/org/apache/catalina/connector/CoyoteAdapter.java
URL: 
http://svn.apache.org/viewvc/tomcat/tc7.0.x/trunk/java/org/apache/catalina/connector/CoyoteAdapter.java?rev=1452797&r1=1452796&r2=1452797&view=diff
==============================================================================
--- tomcat/tc7.0.x/trunk/java/org/apache/catalina/connector/CoyoteAdapter.java 
(original)
+++ tomcat/tc7.0.x/trunk/java/org/apache/catalina/connector/CoyoteAdapter.java 
Tue Mar  5 14:04:57 2013
@@ -23,6 +23,7 @@ import java.util.EnumSet;
 
 import javax.servlet.RequestDispatcher;
 import javax.servlet.SessionTrackingMode;
+import javax.servlet.http.HttpServletResponse;
 
 import org.apache.catalina.Context;
 import org.apache.catalina.Host;
@@ -980,30 +981,30 @@ public class CoyoteAdapter implements Ad
             B2CConverter conv = request.getURIConverter();
             try {
                 if (conv == null) {
-                    conv = new B2CConverter(enc);
+                    conv = new B2CConverter(enc, true);
                     request.setURIConverter(conv);
                 } else {
                     conv.recycle();
                 }
             } catch (IOException e) {
-                // Ignore
                 log.error("Invalid URI encoding; using HTTP default");
                 connector.setURIEncoding(null);
             }
             if (conv != null) {
                 try {
-                    conv.convert(bc, cc);
-                    uri.setChars(cc.getBuffer(), cc.getStart(),
-                                 cc.getLength());
+                    conv.convert(bc, cc, true);
+                    uri.setChars(cc.getBuffer(), cc.getStart(), 
cc.getLength());
                     return;
-                } catch (IOException e) {
-                    log.error("Invalid URI character encoding; trying ascii");
-                    cc.recycle();
+                } catch (IOException ioe) {
+                    // Should never happen as B2CConverter should replace
+                    // problematic characters
+                    request.getResponse().sendError(
+                            HttpServletResponse.SC_BAD_REQUEST);
                 }
             }
         }
 
-        // Default encoding: fast conversion
+        // Default encoding: fast conversion for ISO-8859-1
         byte[] bbuf = bc.getBuffer();
         char[] cbuf = cc.getBuffer();
         int start = bc.getStart();
@@ -1011,7 +1012,6 @@ public class CoyoteAdapter implements Ad
             cbuf[i] = (char) (bbuf[i + start] & 0xff);
         }
         uri.setChars(cbuf, 0, length);
-
     }
 
 

Modified: 
tomcat/tc7.0.x/trunk/java/org/apache/catalina/connector/InputBuffer.java
URL: 
http://svn.apache.org/viewvc/tomcat/tc7.0.x/trunk/java/org/apache/catalina/connector/InputBuffer.java?rev=1452797&r1=1452796&r2=1452797&view=diff
==============================================================================
--- tomcat/tc7.0.x/trunk/java/org/apache/catalina/connector/InputBuffer.java 
(original)
+++ tomcat/tc7.0.x/trunk/java/org/apache/catalina/connector/InputBuffer.java 
Tue Mar  5 14:04:57 2013
@@ -347,10 +347,12 @@ public class InputBuffer extends Reader
             setConverter();
         }
 
+        boolean eof = false;
+
         if (bb.getLength() <= 0) {
             int nRead = realReadBytes(bb.getBytes(), 0, bb.getBytes().length);
             if (nRead < 0) {
-                return -1;
+                eof = true;
             }
         }
 
@@ -369,10 +371,13 @@ public class InputBuffer extends Reader
         }
 
         state = CHAR_STATE;
-        conv.convert(bb, cb);
-
-        return cb.getLength();
+        conv.convert(bb, cb, eof);
 
+        if (cb.getLength() == 0 && eof) {
+            return -1;
+        } else {
+            return cb.getLength();
+        }
     }
 
 

Modified: tomcat/tc7.0.x/trunk/java/org/apache/tomcat/util/buf/B2CConverter.java
URL: 
http://svn.apache.org/viewvc/tomcat/tc7.0.x/trunk/java/org/apache/tomcat/util/buf/B2CConverter.java?rev=1452797&r1=1452796&r2=1452797&view=diff
==============================================================================
--- tomcat/tc7.0.x/trunk/java/org/apache/tomcat/util/buf/B2CConverter.java 
(original)
+++ tomcat/tc7.0.x/trunk/java/org/apache/tomcat/util/buf/B2CConverter.java Tue 
Mar  5 14:04:57 2013
@@ -23,6 +23,7 @@ import java.nio.CharBuffer;
 import java.nio.charset.Charset;
 import java.nio.charset.CharsetDecoder;
 import java.nio.charset.CoderResult;
+import java.nio.charset.CodingErrorAction;
 import java.util.HashMap;
 import java.util.Locale;
 import java.util.Map;
@@ -103,9 +104,30 @@ public class B2CConverter {
     private final ByteBuffer leftovers;
 
     public B2CConverter(String encoding) throws IOException {
+        this(encoding, false);
+    }
+
+    public B2CConverter(String encoding, boolean replaceOnError)
+            throws IOException {
         byte[] left = new byte[LEFTOVER_SIZE];
         leftovers = ByteBuffer.wrap(left);
-        decoder = getCharset(encoding).newDecoder();
+        CodingErrorAction action;
+        if (replaceOnError) {
+            action = CodingErrorAction.REPLACE;
+        } else {
+            action = CodingErrorAction.REPORT;
+        }
+        Charset charset = getCharset(encoding);
+        // Special case. Use the Apache Harmony based UTF-8 decoder because it
+        // - a) rejects invalid sequences that the JVM decoder does not
+        // - b) fails faster for some invalid sequences
+        if (charset.equals(UTF_8)) {
+            decoder = new Utf8Decoder();
+        } else {
+            decoder = charset.newDecoder();
+        }
+        decoder.onMalformedInput(action);
+        decoder.onUnmappableCharacter(action);
     }
 
     /** 
@@ -116,17 +138,14 @@ public class B2CConverter {
         leftovers.position(0);
     }
 
-    public boolean isUndeflow() {
-        return (leftovers.position() > 0);
-    }
-
     /**
      * Convert the given bytes to characters.
      * 
      * @param bc byte input
      * @param cc char output
+     * @param endOfInput    Is this all of the available data
      */
-    public void convert(ByteChunk bc, CharChunk cc) 
+    public void convert(ByteChunk bc, CharChunk cc, boolean endOfInput)
             throws IOException {
         if ((bb == null) || (bb.array() != bc.getBuffer())) {
             // Create a new byte buffer if anything changed
@@ -153,7 +172,7 @@ public class B2CConverter {
             do {
                 leftovers.put(bc.substractB());
                 leftovers.flip();
-                result = decoder.decode(leftovers, cb, false);
+                result = decoder.decode(leftovers, cb, endOfInput);
                 leftovers.position(leftovers.limit());
                 leftovers.limit(leftovers.array().length);
             } while (result.isUnderflow() && (cb.position() == pos));
@@ -165,7 +184,7 @@ public class B2CConverter {
         }
         // Do the decoding and get the results into the byte chunk and the char
         // chunk
-        result = decoder.decode(bb, cb, false);
+        result = decoder.decode(bb, cb, endOfInput);
         if (result.isError() || result.isMalformed()) {
             result.throwException();
         } else if (result.isOverflow()) {

Modified: 
tomcat/tc7.0.x/trunk/test/org/apache/catalina/connector/TestCoyoteAdapter.java
URL: 
http://svn.apache.org/viewvc/tomcat/tc7.0.x/trunk/test/org/apache/catalina/connector/TestCoyoteAdapter.java?rev=1452797&r1=1452796&r2=1452797&view=diff
==============================================================================
--- 
tomcat/tc7.0.x/trunk/test/org/apache/catalina/connector/TestCoyoteAdapter.java 
(original)
+++ 
tomcat/tc7.0.x/trunk/test/org/apache/catalina/connector/TestCoyoteAdapter.java 
Tue Mar  5 14:04:57 2013
@@ -25,9 +25,7 @@ import javax.servlet.http.HttpServlet;
 import javax.servlet.http.HttpServletRequest;
 import javax.servlet.http.HttpServletResponse;
 
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.fail;
-
+import org.junit.Assert;
 import org.junit.Test;
 
 import org.apache.catalina.Context;
@@ -89,7 +87,7 @@ public class TestCoyoteAdapter extends T
         File foo = new File(docBase, "foo");
         addDeleteOnTearDown(foo);
         if (!foo.mkdirs() && !foo.isDirectory()) {
-            fail("Unable to create foo directory in docBase");
+            Assert.fail("Unable to create foo directory in docBase");
         }
 
         Context ctx = tomcat.addContext("", docBase.getAbsolutePath());
@@ -122,12 +120,12 @@ public class TestCoyoteAdapter extends T
         tomcat.start();
 
         ByteChunk res = getUrl("http://localhost:"; + getPort() + path);
-        assertEquals(expected, res.toString());
+        Assert.assertEquals(expected, res.toString());
     }
 
     private void testPath(String path, String expected) throws Exception {
         ByteChunk res = getUrl("http://localhost:"; + getPort() + path);
-        assertEquals(expected, res.toString());
+        Assert.assertEquals(expected, res.toString());
     }
 
     private static class PathParamServlet extends HttpServlet {
@@ -176,6 +174,80 @@ public class TestCoyoteAdapter extends T
         tomcat.start();
 
         ByteChunk res = getUrl("http://localhost:"; + getPort() + path);
-        assertEquals(expected, res.toString());
+        Assert.assertEquals(expected, res.toString());
+    }
+
+    @Test
+    public void testBug54602a() throws Exception {
+        // No UTF-8
+        doTestUriDecoding("/foo", "UTF-8", "/foo");
+    }
+
+    @Test
+    public void testBug54602b() throws Exception {
+        // Valid UTF-8
+        doTestUriDecoding("/foo%c4%87", "UTF-8", "/foo\u0107");
+    }
+
+    @Test
+    public void testBug54602c() throws Exception {
+        // Partial UTF-8
+        doTestUriDecoding("/foo%c4", "UTF-8", "/foo\uFFFD");
+    }
+
+    @Test
+    public void testBug54602d() throws Exception {
+        // Invalid UTF-8
+        doTestUriDecoding("/foo%ff", "UTF-8", "/foo\uFFFD");
+    }
+
+    @Test
+    public void testBug54602e() throws Exception {
+        // Invalid UTF-8
+        doTestUriDecoding("/foo%ed%a0%80", "UTF-8", "/foo\uFFFD\uFFFD\uFFFD");
+    }
+
+    private void doTestUriDecoding(String path, String encoding,
+            String expectedPathInfo) throws Exception{
+
+        // Setup Tomcat instance
+        Tomcat tomcat = getTomcatInstance();
+
+        tomcat.getConnector().setURIEncoding(encoding);
+
+        // Must have a real docBase - just use temp
+        Context ctx =
+            tomcat.addContext("/", System.getProperty("java.io.tmpdir"));
+
+        PathInfoServlet servlet = new PathInfoServlet();
+        Tomcat.addServlet(ctx, "servlet", servlet);
+        ctx.addServletMapping("/*", "servlet");
+
+        tomcat.start();
+
+        int rc = getUrl("http://localhost:"; + getPort() + path,
+                new ByteChunk(), null);
+        Assert.assertEquals(HttpServletResponse.SC_OK, rc);
+
+        Assert.assertEquals(expectedPathInfo, servlet.getPathInfo());
+    }
+
+    private static class PathInfoServlet extends HttpServlet {
+
+        private static final long serialVersionUID = 1L;
+
+        private String pathInfo = null;
+
+        public String getPathInfo() {
+            return pathInfo;
+        }
+
+        @Override
+        protected void doGet(HttpServletRequest req, HttpServletResponse resp)
+                throws ServletException, IOException {
+
+            // Not thread safe
+            pathInfo = req.getPathInfo();
+        }
     }
 }

Copied: 
tomcat/tc7.0.x/trunk/test/org/apache/catalina/connector/TestInputBuffer.java 
(from r1452501, 
tomcat/trunk/test/org/apache/catalina/connector/TestInputBuffer.java)
URL: 
http://svn.apache.org/viewvc/tomcat/tc7.0.x/trunk/test/org/apache/catalina/connector/TestInputBuffer.java?p2=tomcat/tc7.0.x/trunk/test/org/apache/catalina/connector/TestInputBuffer.java&p1=tomcat/trunk/test/org/apache/catalina/connector/TestInputBuffer.java&r1=1452501&r2=1452797&rev=1452797&view=diff
==============================================================================
--- tomcat/trunk/test/org/apache/catalina/connector/TestInputBuffer.java 
(original)
+++ 
tomcat/tc7.0.x/trunk/test/org/apache/catalina/connector/TestInputBuffer.java 
Tue Mar  5 14:04:57 2013
@@ -33,8 +33,8 @@ import org.apache.catalina.startup.Tomca
 import org.apache.catalina.startup.TomcatBaseTest;
 import org.apache.tomcat.util.buf.B2CConverter;
 import org.apache.tomcat.util.buf.ByteChunk;
-import org.apache.tomcat.util.buf.TestUtf8Extended;
-import org.apache.tomcat.util.buf.TestUtf8Extended.Utf8TestCase;
+import org.apache.tomcat.util.buf.TestUtf8;
+import org.apache.tomcat.util.buf.TestUtf8.Utf8TestCase;
 
 public class TestInputBuffer extends TomcatBaseTest {
 
@@ -48,7 +48,7 @@ public class TestInputBuffer extends Tom
         tomcat.getConnector().setProperty("soTimeout", "300000");
         tomcat.start();
 
-        for (Utf8TestCase testCase : TestUtf8Extended.TEST_CASES) {
+        for (Utf8TestCase testCase : TestUtf8.TEST_CASES) {
             String expected = null;
             if (testCase.invalidIndex == -1) {
                 expected = testCase.outputReplaced;
@@ -61,8 +61,6 @@ public class TestInputBuffer extends Tom
     private void doUtf8BodyTest(String description, int[] input,
             String expected) throws Exception {
 
-        System.out.println(description);
-
         byte[] bytes = new byte[input.length];
         for (int i = 0; i < input.length; i++) {
             bytes[i] = (byte) input[i];

Modified: 
tomcat/tc7.0.x/trunk/test/org/apache/catalina/websocket/TestWebSocket.java
URL: 
http://svn.apache.org/viewvc/tomcat/tc7.0.x/trunk/test/org/apache/catalina/websocket/TestWebSocket.java?rev=1452797&r1=1452796&r2=1452797&view=diff
==============================================================================
--- tomcat/tc7.0.x/trunk/test/org/apache/catalina/websocket/TestWebSocket.java 
(original)
+++ tomcat/tc7.0.x/trunk/test/org/apache/catalina/websocket/TestWebSocket.java 
Tue Mar  5 14:04:57 2013
@@ -428,7 +428,7 @@ public class TestWebSocket extends Tomca
             bc.setEnd(len);
 
             B2CConverter b2c = new B2CConverter("UTF-8");
-            b2c.convert(bc, cc);
+            b2c.convert(bc, cc, true);
 
             return cc.toString();
         }

Modified: 
tomcat/tc7.0.x/trunk/test/org/apache/tomcat/util/buf/TestB2CConverter.java
URL: 
http://svn.apache.org/viewvc/tomcat/tc7.0.x/trunk/test/org/apache/tomcat/util/buf/TestB2CConverter.java?rev=1452797&r1=1452796&r2=1452797&view=diff
==============================================================================
--- tomcat/tc7.0.x/trunk/test/org/apache/tomcat/util/buf/TestB2CConverter.java 
(original)
+++ tomcat/tc7.0.x/trunk/test/org/apache/tomcat/util/buf/TestB2CConverter.java 
Tue Mar  5 14:04:57 2013
@@ -17,6 +17,7 @@
 package org.apache.tomcat.util.buf;
 
 import java.nio.charset.Charset;
+import java.nio.charset.MalformedInputException;
 
 import org.junit.Assert;
 import org.junit.Test;
@@ -26,6 +27,10 @@ public class TestB2CConverter {
     private static final byte[] UTF16_MESSAGE =
             new byte[] {-2, -1, 0, 65, 0, 66, 0, 67};
 
+    private static final byte[] UTF8_INVALID = new byte[] {-8, -69, -73, -77};
+
+    private static final byte[] UTF8_PARTIAL = new byte[] {-50};
+
     @Test
     public void testSingleMessage() throws Exception {
         testMessages(1);
@@ -45,12 +50,12 @@ public class TestB2CConverter {
         B2CConverter conv = new B2CConverter("UTF-16");
 
         ByteChunk bc = new ByteChunk();
-        CharChunk cc = new CharChunk();
+        CharChunk cc = new CharChunk(32);
 
 
         for (int i = 0; i < msgCount; i++) {
             bc.append(UTF16_MESSAGE, 0, UTF16_MESSAGE.length);
-            conv.convert(bc, cc);
+            conv.convert(bc, cc, true);
             Assert.assertEquals("ABC", cc.toString());
             bc.recycle();
             cc.recycle();
@@ -83,4 +88,52 @@ public class TestB2CConverter {
         Assert.assertTrue("Limit needs to be at least " + maxLeftover,
                 maxLeftover <= B2CConverter.LEFTOVER_SIZE);
     }
+
+    // TODO Work-around bug in UTF8 decoder
+    //@Test(expected=MalformedInputException.class)
+    public void testBug54602a() throws Exception {
+        // Check invalid input is rejected straight away
+        B2CConverter conv = new B2CConverter("UTF-8");
+        ByteChunk bc = new ByteChunk();
+        CharChunk cc = new CharChunk();
+
+        bc.append(UTF8_INVALID, 0, UTF8_INVALID.length);
+        cc.allocate(bc.getLength(), -1);
+
+        conv.convert(bc, cc, false);
+    }
+
+    @Test(expected=MalformedInputException.class)
+    public void testBug54602b() throws Exception {
+        // Check partial input is rejected
+        B2CConverter conv = new B2CConverter("UTF-8");
+        ByteChunk bc = new ByteChunk();
+        CharChunk cc = new CharChunk();
+
+        bc.append(UTF8_PARTIAL, 0, UTF8_PARTIAL.length);
+        cc.allocate(bc.getLength(), -1);
+
+        conv.convert(bc, cc, true);
+    }
+
+    @Test
+    public void testBug54602c() throws Exception {
+        // Check partial input is rejected once it is known to be all available
+        B2CConverter conv = new B2CConverter("UTF-8");
+        ByteChunk bc = new ByteChunk();
+        CharChunk cc = new CharChunk();
+
+        bc.append(UTF8_PARTIAL, 0, UTF8_PARTIAL.length);
+        cc.allocate(bc.getLength(), -1);
+
+        conv.convert(bc, cc, false);
+
+        Exception e = null;
+        try {
+            conv.convert(bc, cc, true);
+        } catch (MalformedInputException mie) {
+            e = mie;
+        }
+        Assert.assertNotNull(e);
+    }
 }

Modified: tomcat/tc7.0.x/trunk/webapps/docs/changelog.xml
URL: 
http://svn.apache.org/viewvc/tomcat/tc7.0.x/trunk/webapps/docs/changelog.xml?rev=1452797&r1=1452796&r2=1452797&view=diff
==============================================================================
--- tomcat/tc7.0.x/trunk/webapps/docs/changelog.xml (original)
+++ tomcat/tc7.0.x/trunk/webapps/docs/changelog.xml Tue Mar  5 14:04:57 2013
@@ -96,6 +96,18 @@
         between requests to ensure an error in one request does not trigger a
         failure in the next request. (markt)
       </fix>
+      <fix>
+        Use the newly added improved UTF-8 decoder for decoding UTF-8 encoded
+        URIs and UTF-8 encoded request bodies. Invalid UTF-8 URIs will not
+        cause an error but will make use of the replacement character when an
+        error is detected. This will allow web applications to handle the URI
+        which will most likely result in a 404 response. The fall-back to
+        decoding with ISO-8859-1 if UTF-8 decoding fails has been removed.
+        Invalid UTF-8 sequences in a request body will trigger an IOException.
+        The way the decoder is used has also been improved. The notable change
+        is that invalid sequences at the end of the input now trigger an error
+        rather than being silebtly swallowed. (markt)
+      </fix>
     </changelog>
   </subsection>
   <subsection name="Coyote">



---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscr...@tomcat.apache.org
For additional commands, e-mail: dev-h...@tomcat.apache.org

Reply via email to