Author: markt Date: Fri Apr 28 18:18:06 2017 New Revision: 1793121 URL: http://svn.apache.org/viewvc?rev=1793121&view=rev Log: Update the default URIEncoding for a Connector to UTF-8 as required by the Servlet 4.0 specification. Start to move towards using Charset rather than String internally to reduce the number of calls required to B2CConverter.getCharset() during a request.
Modified: tomcat/trunk/java/org/apache/catalina/connector/Connector.java tomcat/trunk/java/org/apache/catalina/connector/CoyoteAdapter.java tomcat/trunk/java/org/apache/catalina/connector/LocalStrings.properties tomcat/trunk/java/org/apache/catalina/core/ApplicationPushBuilder.java tomcat/trunk/test/org/apache/catalina/core/TestApplicationPushBuilder.java tomcat/trunk/webapps/docs/changelog.xml Modified: tomcat/trunk/java/org/apache/catalina/connector/Connector.java URL: http://svn.apache.org/viewvc/tomcat/trunk/java/org/apache/catalina/connector/Connector.java?rev=1793121&r1=1793120&r2=1793121&view=diff ============================================================================== --- tomcat/trunk/java/org/apache/catalina/connector/Connector.java (original) +++ tomcat/trunk/java/org/apache/catalina/connector/Connector.java Fri Apr 28 18:18:06 2017 @@ -16,14 +16,16 @@ */ package org.apache.catalina.connector; +import java.io.UnsupportedEncodingException; import java.net.InetAddress; +import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; import java.util.Arrays; import java.util.HashSet; import java.util.Locale; import javax.management.ObjectName; -import org.apache.catalina.Globals; import org.apache.catalina.LifecycleException; import org.apache.catalina.LifecycleState; import org.apache.catalina.Service; @@ -37,6 +39,7 @@ import org.apache.coyote.http11.Abstract import org.apache.juli.logging.Log; import org.apache.juli.logging.LogFactory; import org.apache.tomcat.util.IntrospectionUtils; +import org.apache.tomcat.util.buf.B2CConverter; import org.apache.tomcat.util.net.SSLHostConfig; import org.apache.tomcat.util.net.openssl.OpenSSLImplementation; import org.apache.tomcat.util.res.StringManager; @@ -105,11 +108,6 @@ public class Connector extends Lifecycle this.protocolHandler = p; } - if (!Globals.STRICT_SERVLET_COMPLIANCE) { - URIEncoding = "UTF-8"; - URIEncodingLower = URIEncoding.toLowerCase(Locale.ENGLISH); - } - // Default for Connector depends on this system property setThrowOnFailure(Boolean.getBoolean("org.apache.catalina.startup.EXIT_ON_INIT_FAILURE")); } @@ -263,11 +261,23 @@ public class Connector extends Lifecycle /** * URI encoding. + * + * @deprecated This will be removed in 9.0.x onwards */ + @Deprecated protected String URIEncoding = null; + + + /** + * @deprecated This will be removed in 9.0.x onwards + */ + @Deprecated protected String URIEncodingLower = null; + private Charset uriCharset = StandardCharsets.UTF_8; + + /** * URI encoding as body. */ @@ -689,33 +699,45 @@ public class Connector extends Lifecycle /** - * @return the character encoding to be used for the URI using the original - * case. + * @return the name of character encoding to be used for the URI using the + * original case. */ public String getURIEncoding() { - return this.URIEncoding; + return uriCharset.name(); } /** * @return the character encoding to be used for the URI using lower case. + * + * @deprecated This will be removed in 9.0.x onwards */ + @Deprecated public String getURIEncodingLower() { - return this.URIEncodingLower; + return uriCharset.name().toLowerCase(Locale.ENGLISH); } /** + * + * @return The Charset to use to convert raw URI bytes (after %nn decoding) + * to characters. This will never be null + */ + public Charset getURICharset() { + return uriCharset; + } + + /** * Set the URI encoding to be used for the URI. * * @param URIEncoding The new URI character encoding. */ public void setURIEncoding(String URIEncoding) { - this.URIEncoding = URIEncoding; - if (URIEncoding == null) { - URIEncodingLower = null; - } else { - this.URIEncodingLower = URIEncoding.toLowerCase(Locale.ENGLISH); + try { + uriCharset = B2CConverter.getCharset(URIEncoding); + } catch (UnsupportedEncodingException e) { + log.warn(sm.getString("coyoteConnector.invalidEncoding", + URIEncoding, uriCharset.name()), e); } } Modified: tomcat/trunk/java/org/apache/catalina/connector/CoyoteAdapter.java URL: http://svn.apache.org/viewvc/tomcat/trunk/java/org/apache/catalina/connector/CoyoteAdapter.java?rev=1793121&r1=1793120&r2=1793121&view=diff ============================================================================== --- tomcat/trunk/java/org/apache/catalina/connector/CoyoteAdapter.java (original) +++ tomcat/trunk/java/org/apache/catalina/connector/CoyoteAdapter.java Fri Apr 28 18:18:06 2017 @@ -17,7 +17,6 @@ package org.apache.catalina.connector; import java.io.IOException; -import java.io.UnsupportedEncodingException; import java.nio.charset.Charset; import java.util.EnumSet; import java.util.concurrent.atomic.AtomicBoolean; @@ -892,24 +891,14 @@ public class CoyoteAdapter implements Ad // What encoding to use? Some platforms, eg z/os, use a default // encoding that doesn't give the expected result so be explicit - String enc = connector.getURIEncodingLower(); - if (enc == null) { - enc = "iso-8859-1"; - } - Charset charset = null; - try { - charset = B2CConverter.getCharsetLower(enc); - } catch (UnsupportedEncodingException e1) { - log.warn(sm.getString("coyoteAdapter.parsePathParam", - enc)); - } + Charset charset = connector.getURICharset(); if (log.isDebugEnabled()) { log.debug(sm.getString("coyoteAdapter.debug", "uriBC", uriBC.toString())); log.debug(sm.getString("coyoteAdapter.debug", "semicolon", String.valueOf(semicolon))); - log.debug(sm.getString("coyoteAdapter.debug", "enc", enc)); + log.debug(sm.getString("coyoteAdapter.debug", "enc", charset.name())); } while (semicolon > -1) { Modified: tomcat/trunk/java/org/apache/catalina/connector/LocalStrings.properties URL: http://svn.apache.org/viewvc/tomcat/trunk/java/org/apache/catalina/connector/LocalStrings.properties?rev=1793121&r1=1793120&r2=1793121&view=diff ============================================================================== --- tomcat/trunk/java/org/apache/catalina/connector/LocalStrings.properties (original) +++ tomcat/trunk/java/org/apache/catalina/connector/LocalStrings.properties Fri Apr 28 18:18:06 2017 @@ -22,6 +22,7 @@ coyoteAdapter.debug=The variable [{0}] h coyoteAdapter.invalidEncoding=Invalid URI encoding, using HTTP default coyoteAdapter.parsePathParam=Unable to parse the path parameters using encoding [{0}]. The path parameters in the URL will be ignored. +coyoteConnector.invalidEncoding=The encoding [{0}] is not recognised by the JRE. The Connector will continue to use [{1}] coyoteConnector.invalidPort=The connector cannot start since the specified port value of [{0}] is invalid coyoteConnector.protocolHandlerDestroyFailed=Protocol handler destroy failed coyoteConnector.protocolHandlerInitializationFailed=Protocol handler initialization failed Modified: tomcat/trunk/java/org/apache/catalina/core/ApplicationPushBuilder.java URL: http://svn.apache.org/viewvc/tomcat/trunk/java/org/apache/catalina/core/ApplicationPushBuilder.java?rev=1793121&r1=1793120&r2=1793121&view=diff ============================================================================== --- tomcat/trunk/java/org/apache/catalina/core/ApplicationPushBuilder.java (original) +++ tomcat/trunk/java/org/apache/catalina/core/ApplicationPushBuilder.java Fri Apr 28 18:18:06 2017 @@ -16,7 +16,6 @@ */ package org.apache.catalina.core; -import java.io.UnsupportedEncodingException; import java.nio.charset.Charset; import java.util.ArrayList; import java.util.Collections; @@ -39,7 +38,6 @@ import org.apache.catalina.Context; import org.apache.catalina.connector.Request; import org.apache.catalina.util.SessionConfig; import org.apache.coyote.ActionCode; -import org.apache.tomcat.util.buf.B2CConverter; import org.apache.tomcat.util.buf.HexUtils; import org.apache.tomcat.util.collections.CaseInsensitiveKeyMap; import org.apache.tomcat.util.http.CookieProcessor; @@ -347,7 +345,7 @@ public class ApplicationPushBuilder impl // Undecoded path - just %nn encoded pushTarget.requestURI().setString(pushPath); pushTarget.decodedURI().setString(decode(pushPath, - catalinaRequest.getConnector().getURIEncodingLower())); + catalinaRequest.getConnector().getURICharset())); // Query string if (pushQueryString == null && queryString != null) { @@ -373,7 +371,7 @@ public class ApplicationPushBuilder impl // Package private so it can be tested. charsetName must be in lower case. - static String decode(String input, String charsetName) { + static String decode(String input, Charset charset) { int start = input.indexOf('%'); int end = 0; @@ -382,15 +380,6 @@ public class ApplicationPushBuilder impl return input; } - Charset charset; - try { - charset = B2CConverter.getCharsetLower(charsetName); - } catch (UnsupportedEncodingException uee) { - // Impossible since original request would have triggered an error - // before reaching here - throw new IllegalStateException(uee); - } - StringBuilder result = new StringBuilder(input.length()); while (start != -1) { // Found the start of a %nn sequence. Copy everything form the last @@ -401,7 +390,7 @@ public class ApplicationPushBuilder impl while (end <input.length() && input.charAt(end) == '%') { end += 3; } - result.append(decode(input.substring(start, end), charset)); + result.append(decodePercentSequence(input.substring(start, end), charset)); start = input.indexOf('%', end); } // Append the remaining text @@ -411,11 +400,11 @@ public class ApplicationPushBuilder impl } - private static String decode(String percentSequence, Charset charset) { - byte[] bytes = new byte[percentSequence.length()/3]; + private static String decodePercentSequence(String sequence, Charset charset) { + byte[] bytes = new byte[sequence.length()/3]; for (int i = 0; i < bytes.length; i += 3) { - bytes[i] = (byte) (HexUtils.getDec(percentSequence.charAt(1 + 3 * i)) << 4 + - HexUtils.getDec(percentSequence.charAt(2 + 3 * i))); + bytes[i] = (byte) (HexUtils.getDec(sequence.charAt(1 + 3 * i)) << 4 + + HexUtils.getDec(sequence.charAt(2 + 3 * i))); } return new String(bytes, charset); Modified: tomcat/trunk/test/org/apache/catalina/core/TestApplicationPushBuilder.java URL: http://svn.apache.org/viewvc/tomcat/trunk/test/org/apache/catalina/core/TestApplicationPushBuilder.java?rev=1793121&r1=1793120&r2=1793121&view=diff ============================================================================== --- tomcat/trunk/test/org/apache/catalina/core/TestApplicationPushBuilder.java (original) +++ tomcat/trunk/test/org/apache/catalina/core/TestApplicationPushBuilder.java Fri Apr 28 18:18:06 2017 @@ -16,6 +16,9 @@ */ package org.apache.catalina.core; +import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; + import org.junit.Assert; import org.junit.Test; @@ -23,31 +26,31 @@ public class TestApplicationPushBuilder @Test public void test01() { - doTest("foo", "utf-8", "foo"); + doTest("foo", StandardCharsets.UTF_8, "foo"); } @Test public void test02() { - doTest("/foo", "utf-8", "/foo"); + doTest("/foo", StandardCharsets.UTF_8, "/foo"); } @Test public void test03() { - doTest("%20foo", "utf-8", " foo"); + doTest("%20foo", StandardCharsets.UTF_8, " foo"); } @Test public void test04() { - doTest("fo%20o", "utf-8", "fo o"); + doTest("fo%20o", StandardCharsets.UTF_8, "fo o"); } @Test public void test05() { - doTest("foo%20", "utf-8", "foo "); + doTest("foo%20", StandardCharsets.UTF_8, "foo "); } - private void doTest(String input, String charset, String expected) { + private void doTest(String input, Charset charset, String expected) { String result = ApplicationPushBuilder.decode(input, charset); Assert.assertEquals(expected, result); } Modified: tomcat/trunk/webapps/docs/changelog.xml URL: http://svn.apache.org/viewvc/tomcat/trunk/webapps/docs/changelog.xml?rev=1793121&r1=1793120&r2=1793121&view=diff ============================================================================== --- tomcat/trunk/webapps/docs/changelog.xml (original) +++ tomcat/trunk/webapps/docs/changelog.xml Fri Apr 28 18:18:06 2017 @@ -72,6 +72,11 @@ <code>hostName</code> is configured (because <code>*</code> is a reserved character for JMX object names). (markt) </fix> + <update> + Update the default <code>URIEncoding</code> for a <code>Connector</code> + to <code>UTF-8</code> as required by the Servlet 4.0 specification. + (markt) + </update> </changelog> </subsection> <subsection name="Jasper"> --------------------------------------------------------------------- To unsubscribe, e-mail: dev-unsubscr...@tomcat.apache.org For additional commands, e-mail: dev-h...@tomcat.apache.org