This is an automated email from the ASF dual-hosted git repository.

markt pushed a commit to branch 7.0.x
in repository https://gitbox.apache.org/repos/asf/tomcat.git


The following commit(s) were added to refs/heads/7.0.x by this push:
     new fcf3697  Add a check that the URIEncoding is a superset of US-ASCII.
fcf3697 is described below

commit fcf369777467532f093197657caf12005f590931
Author: Mark Thomas <ma...@apache.org>
AuthorDate: Fri Mar 13 11:36:54 2020 +0000

    Add a check that the URIEncoding is a superset of US-ASCII.
    
    This is a requirement of RFC7230, section 3.
---
 java/org/apache/catalina/connector/Connector.java  | 13 +++-
 .../catalina/connector/LocalStrings.properties     |  1 +
 java/org/apache/tomcat/util/buf/CharsetUtil.java   | 58 +++++++++++++++++
 .../apache/tomcat/util/buf/TestCharsetUtil.java    | 73 ++++++++++++++++++++++
 webapps/docs/changelog.xml                         |  9 +++
 5 files changed, 153 insertions(+), 1 deletion(-)

diff --git a/java/org/apache/catalina/connector/Connector.java 
b/java/org/apache/catalina/connector/Connector.java
index 7d7d11d..28ad468 100644
--- a/java/org/apache/catalina/connector/Connector.java
+++ b/java/org/apache/catalina/connector/Connector.java
@@ -16,7 +16,9 @@
  */
 package org.apache.catalina.connector;
 
+import java.io.UnsupportedEncodingException;
 import java.net.InetAddress;
+import java.nio.charset.Charset;
 import java.util.Arrays;
 import java.util.HashMap;
 import java.util.HashSet;
@@ -35,6 +37,8 @@ import org.apache.coyote.ProtocolHandler;
 import org.apache.juli.logging.Log;
 import org.apache.juli.logging.LogFactory;
 import org.apache.tomcat.util.IntrospectionUtils;
+import org.apache.tomcat.util.buf.B2CConverter;
+import org.apache.tomcat.util.buf.CharsetUtil;
 import org.apache.tomcat.util.http.mapper.Mapper;
 import org.apache.tomcat.util.res.StringManager;
 
@@ -793,7 +797,14 @@ public class Connector extends LifecycleMBeanBase  {
      */
     public void setURIEncoding(String URIEncoding) {
         this.URIEncoding = URIEncoding;
-        setProperty("uRIEncoding", URIEncoding);
+        try {
+             Charset charset = B2CConverter.getCharset(URIEncoding);
+             if (!CharsetUtil.isAsciiSuperset(charset)) {
+                 log.error(sm.getString("coyoteConnector.notAsciiSuperset", 
URIEncoding));
+             }
+        } catch (UnsupportedEncodingException e) {
+            // Ignore. A warning will be logged in the CoyoteAdapter
+        }
     }
 
 
diff --git a/java/org/apache/catalina/connector/LocalStrings.properties 
b/java/org/apache/catalina/connector/LocalStrings.properties
index e8f9f46..1336726 100644
--- a/java/org/apache/catalina/connector/LocalStrings.properties
+++ b/java/org/apache/catalina/connector/LocalStrings.properties
@@ -28,6 +28,7 @@ coyoteAdapter.parsePathParam=Unable to parse the path 
parameters using encoding
 coyoteConnector.MapperRegistration=register Mapper: [{0}]
 coyoteConnector.cannotRegisterProtocol=Cannot register MBean for the Protocol
 coyoteConnector.invalidPort=The connector cannot start since the specified 
port value of [{0}] is invalid
+coyoteConnector.notAsciiSuperset=The encoding [{0}] is not a superset of ASCII 
as required by RFC 7230. This may have unexpected side effects
 coyoteConnector.parseBodyMethodNoTrace=TRACE method MUST NOT include an entity 
(see RFC 2616 Section 9.6)
 coyoteConnector.protocolHandlerDestroyFailed=Protocol handler destroy failed
 coyoteConnector.protocolHandlerInitializationFailed=Protocol handler 
initialization failed
diff --git a/java/org/apache/tomcat/util/buf/CharsetUtil.java 
b/java/org/apache/tomcat/util/buf/CharsetUtil.java
new file mode 100644
index 0000000..fc0a09e
--- /dev/null
+++ b/java/org/apache/tomcat/util/buf/CharsetUtil.java
@@ -0,0 +1,58 @@
+/*
+ *  Licensed to the Apache Software Foundation (ASF) under one or more
+ *  contributor license agreements.  See the NOTICE file distributed with
+ *  this work for additional information regarding copyright ownership.
+ *  The ASF licenses this file to You under the Apache License, Version 2.0
+ *  (the "License"); you may not use this file except in compliance with
+ *  the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+package org.apache.tomcat.util.buf;
+
+import java.nio.BufferUnderflowException;
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.charset.CharacterCodingException;
+import java.nio.charset.Charset;
+import java.nio.charset.CharsetDecoder;
+
+public class CharsetUtil {
+
+    private CharsetUtil() {
+        // Utility class. Hide default constructor.
+    }
+
+
+    public static boolean isAsciiSuperset(Charset charset) {
+        // Bytes 0x00 to 0x7F must decode to the first 128 Unicode characters
+        CharsetDecoder decoder = charset.newDecoder();
+        ByteBuffer inBytes = ByteBuffer.allocate(1);
+        CharBuffer outChars;
+        for (int i = 0; i < 128; i++) {
+            inBytes.clear();
+            inBytes.put((byte) i);
+            inBytes.flip();
+            try {
+                outChars = decoder.decode(inBytes);
+            } catch (CharacterCodingException e) {
+                return false;
+            }
+            try {
+                if (outChars.get() != i) {
+                    return false;
+                }
+            } catch (BufferUnderflowException e) {
+                return false;
+            }
+        }
+
+        return true;
+    }
+}
diff --git a/test/org/apache/tomcat/util/buf/TestCharsetUtil.java 
b/test/org/apache/tomcat/util/buf/TestCharsetUtil.java
new file mode 100644
index 0000000..08f8a55
--- /dev/null
+++ b/test/org/apache/tomcat/util/buf/TestCharsetUtil.java
@@ -0,0 +1,73 @@
+/*
+ *  Licensed to the Apache Software Foundation (ASF) under one or more
+ *  contributor license agreements.  See the NOTICE file distributed with
+ *  this work for additional information regarding copyright ownership.
+ *  The ASF licenses this file to You under the Apache License, Version 2.0
+ *  (the "License"); you may not use this file except in compliance with
+ *  the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+package org.apache.tomcat.util.buf;
+
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.charset.CharacterCodingException;
+import java.nio.charset.Charset;
+import java.nio.charset.CharsetEncoder;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+public class TestCharsetUtil {
+
+    /*
+     * More comprehensive test that checks that, part from where the encoding
+     * overlaps with ASCII, no valid ASCII bytes are used.
+     */
+    @Test
+    public void testIsAcsiiSupersetAll() {
+        for (Charset charset : Charset.availableCharsets().values()) {
+            System.out.println("Testing: " + charset.name());
+
+            if (CharsetUtil.isAsciiSuperset(charset)) {
+                // Run a more in-depth check to make sure
+                // Encoding Unicode 128 onwards should never generate bytes 0 
to 127.
+                CharsetEncoder encoder = charset.newEncoder();
+                CharBuffer inChars = CharBuffer.allocate(8);
+                ByteBuffer outBytes;
+
+                for (int i = 128; i < Character.MAX_CODE_POINT; i++) {
+                    inChars.clear();
+                    char[] chars = Character.toChars(i);
+                    for (char c : chars) {
+                        inChars.append(c);
+                    }
+                    inChars.flip();
+                    try {
+                        outBytes = encoder.encode(inChars);
+                    } catch (CharacterCodingException e) {
+                        // Ignore. The encoding can't handle the codepoint. 
That is fine.
+                        continue;
+                    }
+                    outBytes.flip();
+                    while (outBytes.hasRemaining()) {
+                        byte b = outBytes.get();
+                        // All bytes should have the highest bit set
+                        if ((b & 0x80) == 0) {
+                            Assert.fail("[" + charset.name() + " is not a 
superset of ASCII");
+                        }
+                    }
+                }
+            } else {
+                System.out.println("Not: " + charset.name());
+            }
+        }
+    }
+}
diff --git a/webapps/docs/changelog.xml b/webapps/docs/changelog.xml
index 5ae6894..2ac611c 100644
--- a/webapps/docs/changelog.xml
+++ b/webapps/docs/changelog.xml
@@ -60,6 +60,15 @@
   issues do not "pop up" wrt. others).
 -->
 <section name="Tomcat 7.0.104 (violetagg)">
+  <subsection name="Coyote">
+    <changelog>
+      <add>
+        When configuring an HTTP Connector, warn if the encoding specified for
+        <code>URIEncoding</code> is not a superset of US-ASCII as required by
+        RFC7230. (markt)
+      </add>
+    </changelog>
+  </subsection>
 </section>
 <section name="Tomcat 7.0.103 (violetagg)">
   <subsection name="Catalina">


---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscr...@tomcat.apache.org
For additional commands, e-mail: dev-h...@tomcat.apache.org

Reply via email to