Author: markt Date: Wed Apr 25 15:31:48 2018 New Revision: 1830087 URL: http://svn.apache.org/viewvc?rev=1830087&view=rev Log: Fix https://bz.apache.org/bugzilla/show_bug.cgi?id=62273 Implement configuration options to work-around specification non-compliant user agents (including all the major browsers) that do not correctly %nn encode URI paths and query strings as required by RFC 7230 and RFC 3986.
Modified: tomcat/trunk/java/org/apache/coyote/http11/AbstractHttp11Protocol.java tomcat/trunk/java/org/apache/coyote/http11/Http11InputBuffer.java tomcat/trunk/java/org/apache/coyote/http11/Http11Processor.java tomcat/trunk/java/org/apache/tomcat/util/http/parser/HttpParser.java tomcat/trunk/webapps/docs/changelog.xml tomcat/trunk/webapps/docs/config/http.xml Modified: tomcat/trunk/java/org/apache/coyote/http11/AbstractHttp11Protocol.java URL: http://svn.apache.org/viewvc/tomcat/trunk/java/org/apache/coyote/http11/AbstractHttp11Protocol.java?rev=1830087&r1=1830086&r2=1830087&view=diff ============================================================================== --- tomcat/trunk/java/org/apache/coyote/http11/AbstractHttp11Protocol.java (original) +++ tomcat/trunk/java/org/apache/coyote/http11/AbstractHttp11Protocol.java Wed Apr 25 15:31:48 2018 @@ -95,6 +95,24 @@ public abstract class AbstractHttp11Prot // ------------------------------------------------ HTTP specific properties // ------------------------------------------ managed in the ProtocolHandler + private String relaxedPathChars = null; + public String getRelaxedPathChars() { + return relaxedPathChars; + } + public void setRelaxedPathChars(String relaxedPathChars) { + this.relaxedPathChars = relaxedPathChars; + } + + + private String relaxedQueryChars = null; + public String getRelaxedQueryChars() { + return relaxedQueryChars; + } + public void setRelaxedQueryChars(String relaxedQueryChars) { + this.relaxedQueryChars = relaxedQueryChars; + } + + private boolean allowHostHeaderMismatch = false; /** * Will Tomcat accept an HTTP 1.1 request where the host header does not Modified: tomcat/trunk/java/org/apache/coyote/http11/Http11InputBuffer.java URL: http://svn.apache.org/viewvc/tomcat/trunk/java/org/apache/coyote/http11/Http11InputBuffer.java?rev=1830087&r1=1830086&r2=1830087&view=diff ============================================================================== --- tomcat/trunk/java/org/apache/coyote/http11/Http11InputBuffer.java (original) +++ tomcat/trunk/java/org/apache/coyote/http11/Http11InputBuffer.java Wed Apr 25 15:31:48 2018 @@ -133,6 +133,7 @@ public class Http11InputBuffer implement private int parsingRequestLineQPos = -1; private HeaderParsePosition headerParsePos; private final HeaderParseData headerData = new HeaderParseData(); + private final HttpParser httpParser; /** * Maximum allowed size of the HTTP request line plus headers plus any @@ -149,13 +150,14 @@ public class Http11InputBuffer implement // ----------------------------------------------------------- Constructors public Http11InputBuffer(Request request, int headerBufferSize, - boolean rejectIllegalHeaderName) { + boolean rejectIllegalHeaderName, HttpParser httpParser) { this.request = request; headers = request.getMimeHeaders(); this.headerBufferSize = headerBufferSize; this.rejectIllegalHeaderName = rejectIllegalHeaderName; + this.httpParser = httpParser; filterLibrary = new InputFilter[0]; activeFilters = new InputFilter[0]; @@ -456,10 +458,10 @@ public class Http11InputBuffer implement end = pos; } else if (chr == Constants.QUESTION && parsingRequestLineQPos == -1) { parsingRequestLineQPos = pos; - } else if (parsingRequestLineQPos != -1 && !HttpParser.isQuery(chr)) { + } else if (parsingRequestLineQPos != -1 && !httpParser.isQueryRelaxed(chr)) { // %nn decoding will be checked at the point of decoding throw new IllegalArgumentException(sm.getString("iib.invalidRequestTarget")); - } else if (HttpParser.isNotRequestTarget(chr)) { + } else if (httpParser.isNotRequestTargetRelaxed(chr)) { // This is a general check that aims to catch problems early // Detailed checking of each part of the request target will // happen in Http11Processor#prepareRequest() Modified: tomcat/trunk/java/org/apache/coyote/http11/Http11Processor.java URL: http://svn.apache.org/viewvc/tomcat/trunk/java/org/apache/coyote/http11/Http11Processor.java?rev=1830087&r1=1830086&r2=1830087&view=diff ============================================================================== --- tomcat/trunk/java/org/apache/coyote/http11/Http11Processor.java (original) +++ tomcat/trunk/java/org/apache/coyote/http11/Http11Processor.java Wed Apr 25 15:31:48 2018 @@ -87,6 +87,9 @@ public class Http11Processor extends Abs private final Http11OutputBuffer outputBuffer; + private final HttpParser httpParser; + + /** * Tracks how many internal filters are in the filter library so they * are skipped when looking for pluggable filters. @@ -150,8 +153,11 @@ public class Http11Processor extends Abs userDataHelper = new UserDataHelper(log); + httpParser = new HttpParser(protocol.getRelaxedPathChars(), + protocol.getRelaxedQueryChars()); + inputBuffer = new Http11InputBuffer(request, protocol.getMaxHttpHeaderSize(), - protocol.getRejectIllegalHeaderName()); + protocol.getRejectIllegalHeaderName(), httpParser); request.setInputBuffer(inputBuffer); outputBuffer = new Http11OutputBuffer(response, protocol.getMaxHttpHeaderSize()); @@ -754,7 +760,7 @@ public class Http11Processor extends Abs // Validate the characters in the URI. %nn decoding will be checked at // the point of decoding. for (int i = uriBC.getStart(); i < uriBC.getEnd(); i++) { - if (!HttpParser.isAbsolutePath(uriB[i])) { + if (!httpParser.isAbsolutePathRelaxed(uriB[i])) { response.setStatus(400); setErrorState(ErrorState.CLOSE_CLEAN, null); if (log.isDebugEnabled()) { Modified: tomcat/trunk/java/org/apache/tomcat/util/http/parser/HttpParser.java URL: http://svn.apache.org/viewvc/tomcat/trunk/java/org/apache/tomcat/util/http/parser/HttpParser.java?rev=1830087&r1=1830086&r2=1830087&view=diff ============================================================================== --- tomcat/trunk/java/org/apache/tomcat/util/http/parser/HttpParser.java (original) +++ tomcat/trunk/java/org/apache/tomcat/util/http/parser/HttpParser.java Wed Apr 25 15:31:48 2018 @@ -36,21 +36,24 @@ import org.apache.tomcat.util.res.String */ public class HttpParser { + private static final StringManager sm = StringManager.getManager(HttpParser.class); + private static final int ARRAY_SIZE = 128; private static final boolean[] IS_CONTROL = new boolean[ARRAY_SIZE]; private static final boolean[] IS_SEPARATOR = new boolean[ARRAY_SIZE]; private static final boolean[] IS_TOKEN = new boolean[ARRAY_SIZE]; private static final boolean[] IS_HEX = new boolean[ARRAY_SIZE]; - private static final boolean[] IS_NOT_REQUEST_TARGET = new boolean[ARRAY_SIZE]; private static final boolean[] IS_HTTP_PROTOCOL = new boolean[ARRAY_SIZE]; private static final boolean[] IS_ALPHA = new boolean[ARRAY_SIZE]; private static final boolean[] IS_NUMERIC = new boolean[ARRAY_SIZE]; private static final boolean[] IS_UNRESERVED = new boolean[ARRAY_SIZE]; private static final boolean[] IS_SUBDELIM = new boolean[ARRAY_SIZE]; private static final boolean[] IS_USERINFO = new boolean[ARRAY_SIZE]; - private static final boolean[] IS_ABSOLUTEPATH = new boolean[ARRAY_SIZE]; - private static final boolean[] IS_QUERY = new boolean[ARRAY_SIZE]; + private static final boolean[] IS_RELAXABLE = new boolean[ARRAY_SIZE]; + + private static final HttpParser DEFAULT; + static { for (int i = 0; i < ARRAY_SIZE; i++) { @@ -77,15 +80,6 @@ public class HttpParser { IS_HEX[i] = true; } - // Not valid for request target. - // Combination of multiple rules from RFC7230 and RFC 3986. Must be - // ASCII, no controls plus a few additional characters excluded - if (IS_CONTROL[i] || i > 127 || - i == ' ' || i == '\"' || i == '#' || i == '<' || i == '>' || i == '\\' || - i == '^' || i == '`' || i == '{' || i == '|' || i == '}') { - IS_NOT_REQUEST_TARGET[i] = true; - } - // Not valid for HTTP protocol // "HTTP/" DIGIT "." DIGIT if (i == 'H' || i == 'T' || i == 'P' || i == '/' || i == '.' || (i >= '0' && i <= '9')) { @@ -104,8 +98,8 @@ public class HttpParser { IS_UNRESERVED[i] = true; } - if (i == '!' || i == '$' || i == '&' || i == '\'' || i == '(' || i == ')' || i == '*' || i == '+' || - i == ',' || i == ';' || i == '=') { + if (i == '!' || i == '$' || i == '&' || i == '\'' || i == '(' || i == ')' || i == '*' || + i == '+' || i == ',' || i == ';' || i == '=') { IS_SUBDELIM[i] = true; } @@ -114,6 +108,35 @@ public class HttpParser { IS_USERINFO[i] = true; } + // The characters that are normally not permitted for which the + // restrictions may be relaxed when used in the path and/or query + // string + if (i == '\"' || i == '<' || i == '>' || i == '[' || i == '\\' || i == ']' || + i == '^' || i == '`' || i == '{' || i == '|' || i == '}') { + IS_RELAXABLE[i] = true; + } + } + + DEFAULT = new HttpParser(null, null); + } + + + private final boolean[] IS_NOT_REQUEST_TARGET = new boolean[ARRAY_SIZE]; + private final boolean[] IS_ABSOLUTEPATH_RELAXED = new boolean[ARRAY_SIZE]; + private final boolean[] IS_QUERY_RELAXED = new boolean[ARRAY_SIZE]; + + + public HttpParser(String relaxedPathChars, String relaxedQueryChars) { + for (int i = 0; i < ARRAY_SIZE; i++) { + // Not valid for request target. + // Combination of multiple rules from RFC7230 and RFC 3986. Must be + // ASCII, no controls plus a few additional characters excluded + if (IS_CONTROL[i] || i > 127 || + i == ' ' || i == '\"' || i == '#' || i == '<' || i == '>' || i == '\\' || + i == '^' || i == '`' || i == '{' || i == '|' || i == '}') { + IS_NOT_REQUEST_TARGET[i] = true; + } + /* * absolute-path = 1*( "/" segment ) * segment = *pchar @@ -122,7 +145,7 @@ public class HttpParser { * Note pchar allows everything userinfo allows plus "@" */ if (IS_USERINFO[i] || i == '@' || i == '/') { - IS_ABSOLUTEPATH[i] = true; + IS_ABSOLUTEPATH_RELAXED[i] = true; } /* @@ -130,14 +153,47 @@ public class HttpParser { * * Note query allows everything absolute-path allows plus "?" */ - if (IS_ABSOLUTEPATH[i] || i == '?') { - IS_QUERY[i] = true; + if (IS_ABSOLUTEPATH_RELAXED[i] || i == '?') { + IS_QUERY_RELAXED[i] = true; } } + + relax(IS_ABSOLUTEPATH_RELAXED, relaxedPathChars); + relax(IS_QUERY_RELAXED, relaxedQueryChars); } - private static final StringManager sm = StringManager.getManager(HttpParser.class); + public boolean isNotRequestTargetRelaxed(int c) { + // Fast for valid request target characters, slower for some incorrect + // ones + try { + return IS_NOT_REQUEST_TARGET[c]; + } catch (ArrayIndexOutOfBoundsException ex) { + return true; + } + } + + + public boolean isAbsolutePathRelaxed(int c) { + // Fast for valid user info characters, slower for some incorrect + // ones + try { + return IS_ABSOLUTEPATH_RELAXED[c]; + } catch (ArrayIndexOutOfBoundsException ex) { + return false; + } + } + + + public boolean isQueryRelaxed(int c) { + // Fast for valid user info characters, slower for some incorrect + // ones + try { + return IS_QUERY_RELAXED[c]; + } catch (ArrayIndexOutOfBoundsException ex) { + return false; + } + } public static String unquote(String input) { @@ -192,13 +248,7 @@ public class HttpParser { public static boolean isNotRequestTarget(int c) { - // Fast for valid request target characters, slower for some incorrect - // ones - try { - return IS_NOT_REQUEST_TARGET[c]; - } catch (ArrayIndexOutOfBoundsException ex) { - return true; - } + return DEFAULT.isNotRequestTargetRelaxed(c); } @@ -246,25 +296,24 @@ public class HttpParser { } - public static boolean isAbsolutePath(int c) { + private static boolean isRelaxable(int c) { // Fast for valid user info characters, slower for some incorrect // ones try { - return IS_ABSOLUTEPATH[c]; + return IS_RELAXABLE[c]; } catch (ArrayIndexOutOfBoundsException ex) { return false; } } + public static boolean isAbsolutePath(int c) { + return DEFAULT.isAbsolutePathRelaxed(c); + } + + public static boolean isQuery(int c) { - // Fast for valid user info characters, slower for some incorrect - // ones - try { - return IS_QUERY[c]; - } catch (ArrayIndexOutOfBoundsException ex) { - return false; - } + return DEFAULT.isQueryRelaxed(c); } @@ -773,12 +822,27 @@ public class HttpParser { } } + + private void relax(boolean[] flags, String relaxedChars) { + if (relaxedChars != null && relaxedChars.length() > 0) { + char[] chars = relaxedChars.toCharArray(); + for (char c : chars) { + if (isRelaxable(c)) { + flags[c] = true; + IS_NOT_REQUEST_TARGET[c] = false; + } + } + } + } + + private enum AllowsEnd { NEVER, FIRST, ALWAYS } + private enum DomainParseState { NEW( true, false, false, AllowsEnd.NEVER, AllowsEnd.NEVER, " at the start of"), ALL_ALPHA( true, true, true, AllowsEnd.ALWAYS, AllowsEnd.ALWAYS, " after a letter in"), Modified: tomcat/trunk/webapps/docs/changelog.xml URL: http://svn.apache.org/viewvc/tomcat/trunk/webapps/docs/changelog.xml?rev=1830087&r1=1830086&r2=1830087&view=diff ============================================================================== --- tomcat/trunk/webapps/docs/changelog.xml (original) +++ tomcat/trunk/webapps/docs/changelog.xml Wed Apr 25 15:31:48 2018 @@ -72,6 +72,12 @@ Update the internal fork of Apache Commons BCEL to r1829827 to add early access Java 11 support to the annotation scanning code. (markt) </add> + <add> + <bug>62273</bug>: Implement configuration options to work-around + specification non-compliant user agents (including all the major + browsers) that do not correctly %nn encode URI paths and query strings + as required by RFC 7230 and RFC 3986. (markt) + </add> <fix> <bug>62297</bug>: Enable the <code>CrawlerSessionManagerValve</code> to correctly handle bots that crawl multiple hosts and/or web applications Modified: tomcat/trunk/webapps/docs/config/http.xml URL: http://svn.apache.org/viewvc/tomcat/trunk/webapps/docs/config/http.xml?rev=1830087&r1=1830086&r2=1830087&view=diff ============================================================================== --- tomcat/trunk/webapps/docs/config/http.xml (original) +++ tomcat/trunk/webapps/docs/config/http.xml Wed Apr 25 15:31:48 2018 @@ -558,6 +558,32 @@ <code>true</code> which will cause the request to be rejected.</p> </attribute> + <attribute name="relaxedPathChars" required="false"> + <p>The <a href="https://tools.ietf.org/rfc/rfc7230.txt">HTTP/1.1 + specification</a> requires that certain characters are %nn encoded when + used in URI paths. Unfortunately, many user agents including all the major + browsers are not compliant with this specification and use these + characters in unencoded form. To prevent Tomcat rejecting such requests, + this attribute may be used to specify the additional characters to allow. + If not specified, no addtional characters will be allowed. The value may + be any combination of the following characters: + <code>" < > [ \ ] ^ ` { | }</code> . Any other characters + present in the value will be ignored.</p> + </attribute> + + <attribute name="relaxedQueryChars" required="false"> + <p>The <a href="https://tools.ietf.org/rfc/rfc7230.txt">HTTP/1.1 + specification</a> requires that certain characters are %nn encoded when + used in URI query strings. Unfortunately, many user agents including all + the major browsers are not compliant with this specification and use these + characters in unencoded form. To prevent Tomcat rejecting such requests, + this attribute may be used to specify the additional characters to allow. + If not specified, no addtional characters will be allowed. The value may + be any combination of the following characters: + <code>" < > [ \ ] ^ ` { | }</code> . Any other characters + present in the value will be ignored.</p> + </attribute> + <attribute name="restrictedUserAgents" required="false"> <p>The value is a regular expression (using <code>java.util.regex</code>) matching the <code>user-agent</code> header of HTTP clients for which --------------------------------------------------------------------- To unsubscribe, e-mail: dev-unsubscr...@tomcat.apache.org For additional commands, e-mail: dev-h...@tomcat.apache.org