Author: markt
Date: Fri Apr 13 16:33:33 2018
New Revision: 1829086

URL: http://svn.apache.org/viewvc?rev=1829086&view=rev
Log:
Fix a rare edge case that is unlikely to occur in real usage. This edge case 
meant that writing long streams of UTF-8 characters to the HTTP response that 
consisted almost entirely of surrogate pairs could result in one surrogate pair 
being dropped.

Modified:
    tomcat/trunk/java/org/apache/catalina/connector/OutputBuffer.java
    tomcat/trunk/test/org/apache/catalina/connector/TestOutputBuffer.java
    tomcat/trunk/webapps/docs/changelog.xml

Modified: tomcat/trunk/java/org/apache/catalina/connector/OutputBuffer.java
URL: 
http://svn.apache.org/viewvc/tomcat/trunk/java/org/apache/catalina/connector/OutputBuffer.java?rev=1829086&r1=1829085&r2=1829086&view=diff
==============================================================================
--- tomcat/trunk/java/org/apache/catalina/connector/OutputBuffer.java (original)
+++ tomcat/trunk/java/org/apache/catalina/connector/OutputBuffer.java Fri Apr 
13 16:33:33 2018
@@ -431,6 +431,16 @@ public class OutputBuffer extends Writer
             }
             if (from.remaining() > 0) {
                 flushByteBuffer();
+            } else if (conv.isUndeflow() && bb.limit() > bb.capacity() - 4) {
+                // Handle an edge case. There are no more chars to write at the
+                // moment but there is a leftover character in the converter
+                // which must be part of a surrogate pair. The byte buffer does
+                // not have enough space left to output the bytes for this pair
+                // once it is complete )it will require 4 bytes) so flush now 
to
+                // prevent the bytes for the leftover char and the rest of the
+                // surrogate pair yet to be written from being lost.
+                // See TestOutputBuffer#testUtf8SurrogateBody()
+                flushByteBuffer();
             }
         }
 

Modified: tomcat/trunk/test/org/apache/catalina/connector/TestOutputBuffer.java
URL: 
http://svn.apache.org/viewvc/tomcat/trunk/test/org/apache/catalina/connector/TestOutputBuffer.java?rev=1829086&r1=1829085&r2=1829086&view=diff
==============================================================================
--- tomcat/trunk/test/org/apache/catalina/connector/TestOutputBuffer.java 
(original)
+++ tomcat/trunk/test/org/apache/catalina/connector/TestOutputBuffer.java Fri 
Apr 13 16:33:33 2018
@@ -19,6 +19,7 @@ package org.apache.catalina.connector;
 import java.io.BufferedWriter;
 import java.io.IOException;
 import java.io.Writer;
+import java.nio.charset.StandardCharsets;
 
 import javax.servlet.ServletException;
 import javax.servlet.http.HttpServlet;
@@ -156,4 +157,58 @@ public class TestOutputBuffer extends To
             w.write("OK");
         }
     }
+
+
+    @Test
+    public void testUtf8SurrogateBody() throws Exception {
+        // Create test data. This is carefully constructed to trigger the edge
+        // case. Small variations may cause the test to miss the edge case.
+        StringBuffer sb = new StringBuffer();
+        sb.append("a");
+
+        for (int i = 0x10000; i < 0x11000; i++) {
+            char[] chars = Character.toChars(i);
+            sb.append(chars);
+        }
+        String data = sb.toString();
+
+        Tomcat tomcat = getTomcatInstance();
+        Context root = tomcat.addContext("", TEMP_DIR);
+        Tomcat.addServlet(root, "Test", new Utf8WriteChars(data));
+        root.addServletMappingDecoded("/test", "Test");
+
+        tomcat.start();
+
+        ByteChunk bc = new ByteChunk();
+        getUrl("http://localhost:"; + getPort() + "/test", bc, null);
+
+        bc.setCharset(StandardCharsets.UTF_8);
+        Assert.assertEquals(data, bc.toString());
+    }
+
+
+    private static class Utf8WriteChars extends HttpServlet {
+
+        private static final long serialVersionUID = 1L;
+
+        private final char[] chars;
+
+        public Utf8WriteChars(String data) {
+            chars = data.toCharArray();
+        }
+
+        @Override
+        protected void doGet(HttpServletRequest req, HttpServletResponse resp)
+                throws ServletException, IOException {
+
+            resp.setCharacterEncoding("UTF-8");
+            resp.setContentType("text/plain");
+            Writer w = resp.getWriter();
+
+            for (int i = 0; i < chars.length; i++) {
+                w.write(chars[i]);
+            }
+        }
+    }
+
 }

Modified: tomcat/trunk/webapps/docs/changelog.xml
URL: 
http://svn.apache.org/viewvc/tomcat/trunk/webapps/docs/changelog.xml?rev=1829086&r1=1829085&r2=1829086&view=diff
==============================================================================
--- tomcat/trunk/webapps/docs/changelog.xml (original)
+++ tomcat/trunk/webapps/docs/changelog.xml Fri Apr 13 16:33:33 2018
@@ -62,6 +62,12 @@
         type="javax.sql.XADataSource"</code>. Patch provided by Masafumi Miura.
         (csutherl)
       </fix>
+      <fix>
+        Fix a rare edge case that is unlikely to occur in real usage. This edge
+        case meant that writing long streams of UTF-8 characters to the HTTP
+        response that consisted almost entirely of surrogate pairs could result
+        in one surrogate pair being dropped. (markt)
+      </fix>
     </changelog>
   </subsection>
   <subsection name="Coyote">



---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscr...@tomcat.apache.org
For additional commands, e-mail: dev-h...@tomcat.apache.org

Reply via email to