This is an automated email from the ASF dual-hosted git repository. markt pushed a commit to branch 8.5.x in repository https://gitbox.apache.org/repos/asf/tomcat.git
The following commit(s) were added to refs/heads/8.5.x by this push: new f9ddfe9 Add a check that the URIEncoding is a superset of US-ASCII. f9ddfe9 is described below commit f9ddfe9e378bb31dc458b1c814ea9ca440580a9a Author: Mark Thomas <ma...@apache.org> AuthorDate: Fri Mar 13 11:36:54 2020 +0000 Add a check that the URIEncoding is a superset of US-ASCII. This is a requirement of RFC7230, section 3. --- java/org/apache/catalina/connector/Connector.java | 10 ++- .../catalina/connector/LocalStrings.properties | 1 + java/org/apache/tomcat/util/buf/CharsetUtil.java | 58 ++++++++++++++ .../apache/tomcat/util/buf/TestCharsetUtil.java | 89 ++++++++++++++++++++++ webapps/docs/changelog.xml | 9 +++ 5 files changed, 164 insertions(+), 3 deletions(-) diff --git a/java/org/apache/catalina/connector/Connector.java b/java/org/apache/catalina/connector/Connector.java index 518893c..23488a9 100644 --- a/java/org/apache/catalina/connector/Connector.java +++ b/java/org/apache/catalina/connector/Connector.java @@ -41,6 +41,7 @@ import org.apache.juli.logging.Log; import org.apache.juli.logging.LogFactory; import org.apache.tomcat.util.IntrospectionUtils; import org.apache.tomcat.util.buf.B2CConverter; +import org.apache.tomcat.util.buf.CharsetUtil; import org.apache.tomcat.util.net.SSLHostConfig; import org.apache.tomcat.util.net.openssl.OpenSSLImplementation; import org.apache.tomcat.util.res.StringManager; @@ -805,10 +806,13 @@ public class Connector extends LifecycleMBeanBase { */ public void setURIEncoding(String URIEncoding) { try { - uriCharset = B2CConverter.getCharset(URIEncoding); + Charset charset = B2CConverter.getCharset(URIEncoding); + if (!CharsetUtil.isAsciiSuperset(charset)) { + log.error(sm.getString("coyoteConnector.notAsciiSuperset", URIEncoding)); + } + uriCharset = charset; } catch (UnsupportedEncodingException e) { - log.error(sm.getString("coyoteConnector.invalidEncoding", - URIEncoding, uriCharset.name()), e); + log.error(sm.getString("coyoteConnector.invalidEncoding", URIEncoding, uriCharset.name()), e); } setProperty("uRIEncoding", URIEncoding); } diff --git a/java/org/apache/catalina/connector/LocalStrings.properties b/java/org/apache/catalina/connector/LocalStrings.properties index 97e284c..364fd66 100644 --- a/java/org/apache/catalina/connector/LocalStrings.properties +++ b/java/org/apache/catalina/connector/LocalStrings.properties @@ -24,6 +24,7 @@ coyoteAdapter.nullRequest=An asynchronous dispatch may only happen on an existin coyoteConnector.invalidEncoding=The encoding [{0}] is not recognised by the JRE. The Connector will continue to use [{1}] coyoteConnector.invalidPort=The connector cannot start since the specified port value of [{0}] is invalid +coyoteConnector.notAsciiSuperset=The encoding [{0}] is not a superset of ASCII as required by RFC 7230. This may have unexpected side effects coyoteConnector.parseBodyMethodNoTrace=TRACE method MUST NOT include an entity (see RFC 2616 Section 9.6) coyoteConnector.protocolHandlerDestroyFailed=Protocol handler destroy failed coyoteConnector.protocolHandlerInitializationFailed=Protocol handler initialization failed diff --git a/java/org/apache/tomcat/util/buf/CharsetUtil.java b/java/org/apache/tomcat/util/buf/CharsetUtil.java new file mode 100644 index 0000000..fc0a09e --- /dev/null +++ b/java/org/apache/tomcat/util/buf/CharsetUtil.java @@ -0,0 +1,58 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.tomcat.util.buf; + +import java.nio.BufferUnderflowException; +import java.nio.ByteBuffer; +import java.nio.CharBuffer; +import java.nio.charset.CharacterCodingException; +import java.nio.charset.Charset; +import java.nio.charset.CharsetDecoder; + +public class CharsetUtil { + + private CharsetUtil() { + // Utility class. Hide default constructor. + } + + + public static boolean isAsciiSuperset(Charset charset) { + // Bytes 0x00 to 0x7F must decode to the first 128 Unicode characters + CharsetDecoder decoder = charset.newDecoder(); + ByteBuffer inBytes = ByteBuffer.allocate(1); + CharBuffer outChars; + for (int i = 0; i < 128; i++) { + inBytes.clear(); + inBytes.put((byte) i); + inBytes.flip(); + try { + outChars = decoder.decode(inBytes); + } catch (CharacterCodingException e) { + return false; + } + try { + if (outChars.get() != i) { + return false; + } + } catch (BufferUnderflowException e) { + return false; + } + } + + return true; + } +} diff --git a/test/org/apache/tomcat/util/buf/TestCharsetUtil.java b/test/org/apache/tomcat/util/buf/TestCharsetUtil.java new file mode 100644 index 0000000..b6b7790 --- /dev/null +++ b/test/org/apache/tomcat/util/buf/TestCharsetUtil.java @@ -0,0 +1,89 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.tomcat.util.buf; + +import java.nio.ByteBuffer; +import java.nio.CharBuffer; +import java.nio.charset.CharacterCodingException; +import java.nio.charset.Charset; +import java.nio.charset.CharsetEncoder; +import java.nio.charset.StandardCharsets; + +import org.junit.Assert; +import org.junit.Test; + +public class TestCharsetUtil { + + /* + * Check the standard character sets return the expected values + */ + @Test + public void testIsAsciiSupersetStandardCharsets() { + Assert.assertTrue(CharsetUtil.isAsciiSuperset(StandardCharsets.US_ASCII)); + Assert.assertTrue(CharsetUtil.isAsciiSuperset(StandardCharsets.ISO_8859_1)); + Assert.assertTrue(CharsetUtil.isAsciiSuperset(StandardCharsets.UTF_8)); + + Assert.assertFalse(CharsetUtil.isAsciiSuperset(StandardCharsets.UTF_16)); + Assert.assertFalse(CharsetUtil.isAsciiSuperset(StandardCharsets.UTF_16BE)); + Assert.assertFalse(CharsetUtil.isAsciiSuperset(StandardCharsets.UTF_16LE)); + } + + + /* + * More comprehensive test that checks that, part from where the encoding + * overlaps with ASCII, no valid ASCII bytes are used. + */ + @Test + public void testIsAcsiiSupersetAll() { + for (Charset charset : Charset.availableCharsets().values()) { + System.out.println("Testing: " + charset.name()); + + if (CharsetUtil.isAsciiSuperset(charset)) { + // Run a more in-depth check to make sure + // Encoding Unicode 128 onwards should never generate bytes 0 to 127. + CharsetEncoder encoder = charset.newEncoder(); + CharBuffer inChars = CharBuffer.allocate(8); + ByteBuffer outBytes; + + for (int i = 128; i < Character.MAX_CODE_POINT; i++) { + inChars.clear(); + char[] chars = Character.toChars(i); + for (char c : chars) { + inChars.append(c); + } + inChars.flip(); + try { + outBytes = encoder.encode(inChars); + } catch (CharacterCodingException e) { + // Ignore. The encoding can't handle the codepoint. That is fine. + continue; + } + outBytes.flip(); + while (outBytes.hasRemaining()) { + byte b = outBytes.get(); + // All bytes should have the highest bit set + if ((b & 0x80) == 0) { + Assert.fail("[" + charset.name() + " is not a superset of ASCII"); + } + } + } + } else { + System.out.println("Not: " + charset.name()); + } + } + } +} diff --git a/webapps/docs/changelog.xml b/webapps/docs/changelog.xml index 214ea83..4666378 100644 --- a/webapps/docs/changelog.xml +++ b/webapps/docs/changelog.xml @@ -45,6 +45,15 @@ issues do not "pop up" wrt. others). --> <section name="Tomcat 8.5.54 (markt)" rtext="in development"> + <subsection name="Coyote"> + <changelog> + <add> + When configuring an HTTP Connector, warn if the encoding specified for + <code>URIEncoding</code> is not a superset of US-ASCII as required by + RFC7230. (markt) + </add> + </changelog> + </subsection> </section> <section name="Tomcat 8.5.53 (markt)" rtext="release in progress]"> <subsection name="Coyote"> --------------------------------------------------------------------- To unsubscribe, e-mail: dev-unsubscr...@tomcat.apache.org For additional commands, e-mail: dev-h...@tomcat.apache.org