This is an automated email from the ASF dual-hosted git repository.

markt pushed a commit to branch 8.5.x
in repository https://gitbox.apache.org/repos/asf/tomcat.git


The following commit(s) were added to refs/heads/8.5.x by this push:
     new f9ddfe9  Add a check that the URIEncoding is a superset of US-ASCII.
f9ddfe9 is described below

commit f9ddfe9e378bb31dc458b1c814ea9ca440580a9a
Author: Mark Thomas <ma...@apache.org>
AuthorDate: Fri Mar 13 11:36:54 2020 +0000

    Add a check that the URIEncoding is a superset of US-ASCII.
    
    This is a requirement of RFC7230, section 3.
---
 java/org/apache/catalina/connector/Connector.java  | 10 ++-
 .../catalina/connector/LocalStrings.properties     |  1 +
 java/org/apache/tomcat/util/buf/CharsetUtil.java   | 58 ++++++++++++++
 .../apache/tomcat/util/buf/TestCharsetUtil.java    | 89 ++++++++++++++++++++++
 webapps/docs/changelog.xml                         |  9 +++
 5 files changed, 164 insertions(+), 3 deletions(-)

diff --git a/java/org/apache/catalina/connector/Connector.java 
b/java/org/apache/catalina/connector/Connector.java
index 518893c..23488a9 100644
--- a/java/org/apache/catalina/connector/Connector.java
+++ b/java/org/apache/catalina/connector/Connector.java
@@ -41,6 +41,7 @@ import org.apache.juli.logging.Log;
 import org.apache.juli.logging.LogFactory;
 import org.apache.tomcat.util.IntrospectionUtils;
 import org.apache.tomcat.util.buf.B2CConverter;
+import org.apache.tomcat.util.buf.CharsetUtil;
 import org.apache.tomcat.util.net.SSLHostConfig;
 import org.apache.tomcat.util.net.openssl.OpenSSLImplementation;
 import org.apache.tomcat.util.res.StringManager;
@@ -805,10 +806,13 @@ public class Connector extends LifecycleMBeanBase  {
      */
     public void setURIEncoding(String URIEncoding) {
         try {
-            uriCharset = B2CConverter.getCharset(URIEncoding);
+             Charset charset = B2CConverter.getCharset(URIEncoding);
+             if (!CharsetUtil.isAsciiSuperset(charset)) {
+                 log.error(sm.getString("coyoteConnector.notAsciiSuperset", 
URIEncoding));
+             }
+             uriCharset = charset;
         } catch (UnsupportedEncodingException e) {
-            log.error(sm.getString("coyoteConnector.invalidEncoding",
-                    URIEncoding, uriCharset.name()), e);
+            log.error(sm.getString("coyoteConnector.invalidEncoding", 
URIEncoding, uriCharset.name()), e);
         }
         setProperty("uRIEncoding", URIEncoding);
     }
diff --git a/java/org/apache/catalina/connector/LocalStrings.properties 
b/java/org/apache/catalina/connector/LocalStrings.properties
index 97e284c..364fd66 100644
--- a/java/org/apache/catalina/connector/LocalStrings.properties
+++ b/java/org/apache/catalina/connector/LocalStrings.properties
@@ -24,6 +24,7 @@ coyoteAdapter.nullRequest=An asynchronous dispatch may only 
happen on an existin
 
 coyoteConnector.invalidEncoding=The encoding [{0}] is not recognised by the 
JRE. The Connector will continue to use [{1}]
 coyoteConnector.invalidPort=The connector cannot start since the specified 
port value of [{0}] is invalid
+coyoteConnector.notAsciiSuperset=The encoding [{0}] is not a superset of ASCII 
as required by RFC 7230. This may have unexpected side effects
 coyoteConnector.parseBodyMethodNoTrace=TRACE method MUST NOT include an entity 
(see RFC 2616 Section 9.6)
 coyoteConnector.protocolHandlerDestroyFailed=Protocol handler destroy failed
 coyoteConnector.protocolHandlerInitializationFailed=Protocol handler 
initialization failed
diff --git a/java/org/apache/tomcat/util/buf/CharsetUtil.java 
b/java/org/apache/tomcat/util/buf/CharsetUtil.java
new file mode 100644
index 0000000..fc0a09e
--- /dev/null
+++ b/java/org/apache/tomcat/util/buf/CharsetUtil.java
@@ -0,0 +1,58 @@
+/*
+ *  Licensed to the Apache Software Foundation (ASF) under one or more
+ *  contributor license agreements.  See the NOTICE file distributed with
+ *  this work for additional information regarding copyright ownership.
+ *  The ASF licenses this file to You under the Apache License, Version 2.0
+ *  (the "License"); you may not use this file except in compliance with
+ *  the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+package org.apache.tomcat.util.buf;
+
+import java.nio.BufferUnderflowException;
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.charset.CharacterCodingException;
+import java.nio.charset.Charset;
+import java.nio.charset.CharsetDecoder;
+
+public class CharsetUtil {
+
+    private CharsetUtil() {
+        // Utility class. Hide default constructor.
+    }
+
+
+    public static boolean isAsciiSuperset(Charset charset) {
+        // Bytes 0x00 to 0x7F must decode to the first 128 Unicode characters
+        CharsetDecoder decoder = charset.newDecoder();
+        ByteBuffer inBytes = ByteBuffer.allocate(1);
+        CharBuffer outChars;
+        for (int i = 0; i < 128; i++) {
+            inBytes.clear();
+            inBytes.put((byte) i);
+            inBytes.flip();
+            try {
+                outChars = decoder.decode(inBytes);
+            } catch (CharacterCodingException e) {
+                return false;
+            }
+            try {
+                if (outChars.get() != i) {
+                    return false;
+                }
+            } catch (BufferUnderflowException e) {
+                return false;
+            }
+        }
+
+        return true;
+    }
+}
diff --git a/test/org/apache/tomcat/util/buf/TestCharsetUtil.java 
b/test/org/apache/tomcat/util/buf/TestCharsetUtil.java
new file mode 100644
index 0000000..b6b7790
--- /dev/null
+++ b/test/org/apache/tomcat/util/buf/TestCharsetUtil.java
@@ -0,0 +1,89 @@
+/*
+ *  Licensed to the Apache Software Foundation (ASF) under one or more
+ *  contributor license agreements.  See the NOTICE file distributed with
+ *  this work for additional information regarding copyright ownership.
+ *  The ASF licenses this file to You under the Apache License, Version 2.0
+ *  (the "License"); you may not use this file except in compliance with
+ *  the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+package org.apache.tomcat.util.buf;
+
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.charset.CharacterCodingException;
+import java.nio.charset.Charset;
+import java.nio.charset.CharsetEncoder;
+import java.nio.charset.StandardCharsets;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+public class TestCharsetUtil {
+
+    /*
+     * Check the standard character sets return the expected values
+     */
+    @Test
+    public void testIsAsciiSupersetStandardCharsets() {
+        
Assert.assertTrue(CharsetUtil.isAsciiSuperset(StandardCharsets.US_ASCII));
+        
Assert.assertTrue(CharsetUtil.isAsciiSuperset(StandardCharsets.ISO_8859_1));
+        Assert.assertTrue(CharsetUtil.isAsciiSuperset(StandardCharsets.UTF_8));
+
+        
Assert.assertFalse(CharsetUtil.isAsciiSuperset(StandardCharsets.UTF_16));
+        
Assert.assertFalse(CharsetUtil.isAsciiSuperset(StandardCharsets.UTF_16BE));
+        
Assert.assertFalse(CharsetUtil.isAsciiSuperset(StandardCharsets.UTF_16LE));
+    }
+
+
+    /*
+     * More comprehensive test that checks that, part from where the encoding
+     * overlaps with ASCII, no valid ASCII bytes are used.
+     */
+    @Test
+    public void testIsAcsiiSupersetAll() {
+        for (Charset charset : Charset.availableCharsets().values()) {
+            System.out.println("Testing: " + charset.name());
+
+            if (CharsetUtil.isAsciiSuperset(charset)) {
+                // Run a more in-depth check to make sure
+                // Encoding Unicode 128 onwards should never generate bytes 0 
to 127.
+                CharsetEncoder encoder = charset.newEncoder();
+                CharBuffer inChars = CharBuffer.allocate(8);
+                ByteBuffer outBytes;
+
+                for (int i = 128; i < Character.MAX_CODE_POINT; i++) {
+                    inChars.clear();
+                    char[] chars = Character.toChars(i);
+                    for (char c : chars) {
+                        inChars.append(c);
+                    }
+                    inChars.flip();
+                    try {
+                        outBytes = encoder.encode(inChars);
+                    } catch (CharacterCodingException e) {
+                        // Ignore. The encoding can't handle the codepoint. 
That is fine.
+                        continue;
+                    }
+                    outBytes.flip();
+                    while (outBytes.hasRemaining()) {
+                        byte b = outBytes.get();
+                        // All bytes should have the highest bit set
+                        if ((b & 0x80) == 0) {
+                            Assert.fail("[" + charset.name() + " is not a 
superset of ASCII");
+                        }
+                    }
+                }
+            } else {
+                System.out.println("Not: " + charset.name());
+            }
+        }
+    }
+}
diff --git a/webapps/docs/changelog.xml b/webapps/docs/changelog.xml
index 214ea83..4666378 100644
--- a/webapps/docs/changelog.xml
+++ b/webapps/docs/changelog.xml
@@ -45,6 +45,15 @@
   issues do not "pop up" wrt. others).
 -->
 <section name="Tomcat 8.5.54 (markt)" rtext="in development">
+  <subsection name="Coyote">
+    <changelog>
+      <add>
+        When configuring an HTTP Connector, warn if the encoding specified for
+        <code>URIEncoding</code> is not a superset of US-ASCII as required by
+        RFC7230. (markt)
+      </add>
+    </changelog>
+  </subsection>
 </section>
 <section name="Tomcat 8.5.53 (markt)" rtext="release in progress]">
   <subsection name="Coyote">


---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscr...@tomcat.apache.org
For additional commands, e-mail: dev-h...@tomcat.apache.org

Reply via email to