Author: markt
Date: Wed Apr 25 15:31:48 2018
New Revision: 1830087

URL: http://svn.apache.org/viewvc?rev=1830087&view=rev
Log:
Fix https://bz.apache.org/bugzilla/show_bug.cgi?id=62273
Implement configuration options to work-around specification non-compliant user 
agents (including all the major browsers) that do not correctly %nn encode URI 
paths and query strings as required by RFC 7230 and RFC 3986.

Modified:
    tomcat/trunk/java/org/apache/coyote/http11/AbstractHttp11Protocol.java
    tomcat/trunk/java/org/apache/coyote/http11/Http11InputBuffer.java
    tomcat/trunk/java/org/apache/coyote/http11/Http11Processor.java
    tomcat/trunk/java/org/apache/tomcat/util/http/parser/HttpParser.java
    tomcat/trunk/webapps/docs/changelog.xml
    tomcat/trunk/webapps/docs/config/http.xml

Modified: tomcat/trunk/java/org/apache/coyote/http11/AbstractHttp11Protocol.java
URL: 
http://svn.apache.org/viewvc/tomcat/trunk/java/org/apache/coyote/http11/AbstractHttp11Protocol.java?rev=1830087&r1=1830086&r2=1830087&view=diff
==============================================================================
--- tomcat/trunk/java/org/apache/coyote/http11/AbstractHttp11Protocol.java 
(original)
+++ tomcat/trunk/java/org/apache/coyote/http11/AbstractHttp11Protocol.java Wed 
Apr 25 15:31:48 2018
@@ -95,6 +95,24 @@ public abstract class AbstractHttp11Prot
     // ------------------------------------------------ HTTP specific 
properties
     // ------------------------------------------ managed in the 
ProtocolHandler
 
+    private String relaxedPathChars = null;
+    public String getRelaxedPathChars() {
+        return relaxedPathChars;
+    }
+    public void setRelaxedPathChars(String relaxedPathChars) {
+        this.relaxedPathChars = relaxedPathChars;
+    }
+
+
+    private String relaxedQueryChars = null;
+    public String getRelaxedQueryChars() {
+        return relaxedQueryChars;
+    }
+    public void setRelaxedQueryChars(String relaxedQueryChars) {
+        this.relaxedQueryChars = relaxedQueryChars;
+    }
+
+
     private boolean allowHostHeaderMismatch = false;
     /**
      * Will Tomcat accept an HTTP 1.1 request where the host header does not

Modified: tomcat/trunk/java/org/apache/coyote/http11/Http11InputBuffer.java
URL: 
http://svn.apache.org/viewvc/tomcat/trunk/java/org/apache/coyote/http11/Http11InputBuffer.java?rev=1830087&r1=1830086&r2=1830087&view=diff
==============================================================================
--- tomcat/trunk/java/org/apache/coyote/http11/Http11InputBuffer.java (original)
+++ tomcat/trunk/java/org/apache/coyote/http11/Http11InputBuffer.java Wed Apr 
25 15:31:48 2018
@@ -133,6 +133,7 @@ public class Http11InputBuffer implement
     private int parsingRequestLineQPos = -1;
     private HeaderParsePosition headerParsePos;
     private final HeaderParseData headerData = new HeaderParseData();
+    private final HttpParser httpParser;
 
     /**
      * Maximum allowed size of the HTTP request line plus headers plus any
@@ -149,13 +150,14 @@ public class Http11InputBuffer implement
     // ----------------------------------------------------------- Constructors
 
     public Http11InputBuffer(Request request, int headerBufferSize,
-            boolean rejectIllegalHeaderName) {
+            boolean rejectIllegalHeaderName, HttpParser httpParser) {
 
         this.request = request;
         headers = request.getMimeHeaders();
 
         this.headerBufferSize = headerBufferSize;
         this.rejectIllegalHeaderName = rejectIllegalHeaderName;
+        this.httpParser = httpParser;
 
         filterLibrary = new InputFilter[0];
         activeFilters = new InputFilter[0];
@@ -456,10 +458,10 @@ public class Http11InputBuffer implement
                     end = pos;
                 } else if (chr == Constants.QUESTION && parsingRequestLineQPos 
== -1) {
                     parsingRequestLineQPos = pos;
-                } else if (parsingRequestLineQPos != -1 && 
!HttpParser.isQuery(chr)) {
+                } else if (parsingRequestLineQPos != -1 && 
!httpParser.isQueryRelaxed(chr)) {
                     // %nn decoding will be checked at the point of decoding
                     throw new 
IllegalArgumentException(sm.getString("iib.invalidRequestTarget"));
-                } else if (HttpParser.isNotRequestTarget(chr)) {
+                } else if (httpParser.isNotRequestTargetRelaxed(chr)) {
                     // This is a general check that aims to catch problems 
early
                     // Detailed checking of each part of the request target 
will
                     // happen in Http11Processor#prepareRequest()

Modified: tomcat/trunk/java/org/apache/coyote/http11/Http11Processor.java
URL: 
http://svn.apache.org/viewvc/tomcat/trunk/java/org/apache/coyote/http11/Http11Processor.java?rev=1830087&r1=1830086&r2=1830087&view=diff
==============================================================================
--- tomcat/trunk/java/org/apache/coyote/http11/Http11Processor.java (original)
+++ tomcat/trunk/java/org/apache/coyote/http11/Http11Processor.java Wed Apr 25 
15:31:48 2018
@@ -87,6 +87,9 @@ public class Http11Processor extends Abs
     private final Http11OutputBuffer outputBuffer;
 
 
+    private final HttpParser httpParser;
+
+
     /**
      * Tracks how many internal filters are in the filter library so they
      * are skipped when looking for pluggable filters.
@@ -150,8 +153,11 @@ public class Http11Processor extends Abs
 
         userDataHelper = new UserDataHelper(log);
 
+        httpParser = new HttpParser(protocol.getRelaxedPathChars(),
+                protocol.getRelaxedQueryChars());
+
         inputBuffer = new Http11InputBuffer(request, 
protocol.getMaxHttpHeaderSize(),
-                protocol.getRejectIllegalHeaderName());
+                protocol.getRejectIllegalHeaderName(), httpParser);
         request.setInputBuffer(inputBuffer);
 
         outputBuffer = new Http11OutputBuffer(response, 
protocol.getMaxHttpHeaderSize());
@@ -754,7 +760,7 @@ public class Http11Processor extends Abs
         // Validate the characters in the URI. %nn decoding will be checked at
         // the point of decoding.
         for (int i = uriBC.getStart(); i < uriBC.getEnd(); i++) {
-            if (!HttpParser.isAbsolutePath(uriB[i])) {
+            if (!httpParser.isAbsolutePathRelaxed(uriB[i])) {
                 response.setStatus(400);
                 setErrorState(ErrorState.CLOSE_CLEAN, null);
                 if (log.isDebugEnabled()) {

Modified: tomcat/trunk/java/org/apache/tomcat/util/http/parser/HttpParser.java
URL: 
http://svn.apache.org/viewvc/tomcat/trunk/java/org/apache/tomcat/util/http/parser/HttpParser.java?rev=1830087&r1=1830086&r2=1830087&view=diff
==============================================================================
--- tomcat/trunk/java/org/apache/tomcat/util/http/parser/HttpParser.java 
(original)
+++ tomcat/trunk/java/org/apache/tomcat/util/http/parser/HttpParser.java Wed 
Apr 25 15:31:48 2018
@@ -36,21 +36,24 @@ import org.apache.tomcat.util.res.String
  */
 public class HttpParser {
 
+    private static final StringManager sm = 
StringManager.getManager(HttpParser.class);
+
     private static final int ARRAY_SIZE = 128;
 
     private static final boolean[] IS_CONTROL = new boolean[ARRAY_SIZE];
     private static final boolean[] IS_SEPARATOR = new boolean[ARRAY_SIZE];
     private static final boolean[] IS_TOKEN = new boolean[ARRAY_SIZE];
     private static final boolean[] IS_HEX = new boolean[ARRAY_SIZE];
-    private static final boolean[] IS_NOT_REQUEST_TARGET = new 
boolean[ARRAY_SIZE];
     private static final boolean[] IS_HTTP_PROTOCOL = new boolean[ARRAY_SIZE];
     private static final boolean[] IS_ALPHA = new boolean[ARRAY_SIZE];
     private static final boolean[] IS_NUMERIC = new boolean[ARRAY_SIZE];
     private static final boolean[] IS_UNRESERVED = new boolean[ARRAY_SIZE];
     private static final boolean[] IS_SUBDELIM = new boolean[ARRAY_SIZE];
     private static final boolean[] IS_USERINFO = new boolean[ARRAY_SIZE];
-    private static final boolean[] IS_ABSOLUTEPATH = new boolean[ARRAY_SIZE];
-    private static final boolean[] IS_QUERY = new boolean[ARRAY_SIZE];
+    private static final boolean[] IS_RELAXABLE = new boolean[ARRAY_SIZE];
+
+    private static final HttpParser DEFAULT;
+
 
     static {
         for (int i = 0; i < ARRAY_SIZE; i++) {
@@ -77,15 +80,6 @@ public class HttpParser {
                 IS_HEX[i] = true;
             }
 
-            // Not valid for request target.
-            // Combination of multiple rules from RFC7230 and RFC 3986. Must be
-            // ASCII, no controls plus a few additional characters excluded
-            if (IS_CONTROL[i] || i > 127 ||
-                    i == ' ' || i == '\"' || i == '#' || i == '<' || i == '>' 
|| i == '\\' ||
-                    i == '^' || i == '`'  || i == '{' || i == '|' || i == '}') 
{
-                IS_NOT_REQUEST_TARGET[i] = true;
-            }
-
             // Not valid for HTTP protocol
             // "HTTP/" DIGIT "." DIGIT
             if (i == 'H' || i == 'T' || i == 'P' || i == '/' || i == '.' || (i 
>= '0' && i <= '9')) {
@@ -104,8 +98,8 @@ public class HttpParser {
                 IS_UNRESERVED[i] = true;
             }
 
-            if (i == '!' || i == '$' || i == '&' || i == '\'' || i == '(' || i 
== ')' || i == '*' || i == '+' ||
-                    i == ',' || i == ';' || i == '=') {
+            if (i == '!' || i == '$' || i == '&' || i == '\'' || i == '(' || i 
== ')' || i == '*' ||
+                    i == '+' || i == ',' || i == ';' || i == '=') {
                 IS_SUBDELIM[i] = true;
             }
 
@@ -114,6 +108,35 @@ public class HttpParser {
                 IS_USERINFO[i] = true;
             }
 
+            // The characters that are normally not permitted for which the
+            // restrictions may be relaxed when used in the path and/or query
+            // string
+            if (i == '\"' || i == '<' || i == '>' || i == '[' || i == '\\' || 
i == ']' ||
+                    i == '^' || i == '`'  || i == '{' || i == '|' || i == '}') 
{
+                IS_RELAXABLE[i] = true;
+            }
+        }
+
+        DEFAULT = new HttpParser(null, null);
+    }
+
+
+    private final boolean[] IS_NOT_REQUEST_TARGET = new boolean[ARRAY_SIZE];
+    private final boolean[] IS_ABSOLUTEPATH_RELAXED = new boolean[ARRAY_SIZE];
+    private final boolean[] IS_QUERY_RELAXED = new boolean[ARRAY_SIZE];
+
+
+    public HttpParser(String relaxedPathChars, String relaxedQueryChars) {
+        for (int i = 0; i < ARRAY_SIZE; i++) {
+            // Not valid for request target.
+            // Combination of multiple rules from RFC7230 and RFC 3986. Must be
+            // ASCII, no controls plus a few additional characters excluded
+            if (IS_CONTROL[i] || i > 127 ||
+                    i == ' ' || i == '\"' || i == '#' || i == '<' || i == '>' 
|| i == '\\' ||
+                    i == '^' || i == '`'  || i == '{' || i == '|' || i == '}') 
{
+                IS_NOT_REQUEST_TARGET[i] = true;
+            }
+
             /*
              * absolute-path  = 1*( "/" segment )
              * segment        = *pchar
@@ -122,7 +145,7 @@ public class HttpParser {
              * Note pchar allows everything userinfo allows plus "@"
              */
             if (IS_USERINFO[i] || i == '@' || i == '/') {
-                IS_ABSOLUTEPATH[i] = true;
+                IS_ABSOLUTEPATH_RELAXED[i] = true;
             }
 
             /*
@@ -130,14 +153,47 @@ public class HttpParser {
              *
              * Note query allows everything absolute-path allows plus "?"
              */
-            if (IS_ABSOLUTEPATH[i] || i == '?') {
-                IS_QUERY[i] = true;
+            if (IS_ABSOLUTEPATH_RELAXED[i] || i == '?') {
+                IS_QUERY_RELAXED[i] = true;
             }
         }
+
+        relax(IS_ABSOLUTEPATH_RELAXED, relaxedPathChars);
+        relax(IS_QUERY_RELAXED, relaxedQueryChars);
     }
 
 
-    private static final StringManager sm = 
StringManager.getManager(HttpParser.class);
+    public boolean isNotRequestTargetRelaxed(int c) {
+        // Fast for valid request target characters, slower for some incorrect
+        // ones
+        try {
+            return IS_NOT_REQUEST_TARGET[c];
+        } catch (ArrayIndexOutOfBoundsException ex) {
+            return true;
+        }
+    }
+
+
+    public boolean isAbsolutePathRelaxed(int c) {
+        // Fast for valid user info characters, slower for some incorrect
+        // ones
+        try {
+            return IS_ABSOLUTEPATH_RELAXED[c];
+        } catch (ArrayIndexOutOfBoundsException ex) {
+            return false;
+        }
+    }
+
+
+    public boolean isQueryRelaxed(int c) {
+        // Fast for valid user info characters, slower for some incorrect
+        // ones
+        try {
+            return IS_QUERY_RELAXED[c];
+        } catch (ArrayIndexOutOfBoundsException ex) {
+            return false;
+        }
+    }
 
 
     public static String unquote(String input) {
@@ -192,13 +248,7 @@ public class HttpParser {
 
 
     public static boolean isNotRequestTarget(int c) {
-        // Fast for valid request target characters, slower for some incorrect
-        // ones
-        try {
-            return IS_NOT_REQUEST_TARGET[c];
-        } catch (ArrayIndexOutOfBoundsException ex) {
-            return true;
-        }
+        return DEFAULT.isNotRequestTargetRelaxed(c);
     }
 
 
@@ -246,25 +296,24 @@ public class HttpParser {
     }
 
 
-    public static boolean isAbsolutePath(int c) {
+    private static boolean isRelaxable(int c) {
         // Fast for valid user info characters, slower for some incorrect
         // ones
         try {
-            return IS_ABSOLUTEPATH[c];
+            return IS_RELAXABLE[c];
         } catch (ArrayIndexOutOfBoundsException ex) {
             return false;
         }
     }
 
 
+    public static boolean isAbsolutePath(int c) {
+        return DEFAULT.isAbsolutePathRelaxed(c);
+    }
+
+
     public static boolean isQuery(int c) {
-        // Fast for valid user info characters, slower for some incorrect
-        // ones
-        try {
-            return IS_QUERY[c];
-        } catch (ArrayIndexOutOfBoundsException ex) {
-            return false;
-        }
+        return DEFAULT.isQueryRelaxed(c);
     }
 
 
@@ -773,12 +822,27 @@ public class HttpParser {
         }
     }
 
+
+    private void relax(boolean[] flags, String relaxedChars) {
+        if (relaxedChars != null && relaxedChars.length() > 0) {
+            char[] chars = relaxedChars.toCharArray();
+            for (char c : chars) {
+                if (isRelaxable(c)) {
+                    flags[c] = true;
+                    IS_NOT_REQUEST_TARGET[c] = false;
+                }
+            }
+        }
+    }
+
+
     private enum AllowsEnd {
         NEVER,
         FIRST,
         ALWAYS
     }
 
+
     private enum DomainParseState {
         NEW(       true, false, false,  AllowsEnd.NEVER,  AllowsEnd.NEVER, " 
at the start of"),
         ALL_ALPHA( true,  true,  true, AllowsEnd.ALWAYS, AllowsEnd.ALWAYS, " 
after a letter in"),

Modified: tomcat/trunk/webapps/docs/changelog.xml
URL: 
http://svn.apache.org/viewvc/tomcat/trunk/webapps/docs/changelog.xml?rev=1830087&r1=1830086&r2=1830087&view=diff
==============================================================================
--- tomcat/trunk/webapps/docs/changelog.xml (original)
+++ tomcat/trunk/webapps/docs/changelog.xml Wed Apr 25 15:31:48 2018
@@ -72,6 +72,12 @@
         Update the internal fork of Apache Commons BCEL to r1829827 to add 
early
         access Java 11 support to the annotation scanning code. (markt)
       </add>
+      <add>
+        <bug>62273</bug>: Implement configuration options to work-around
+        specification non-compliant user agents (including all the major
+        browsers) that do not correctly %nn encode URI paths and query strings
+        as required by RFC 7230 and RFC 3986. (markt)
+      </add>
       <fix>
         <bug>62297</bug>: Enable the <code>CrawlerSessionManagerValve</code> to
         correctly handle bots that crawl multiple hosts and/or web applications

Modified: tomcat/trunk/webapps/docs/config/http.xml
URL: 
http://svn.apache.org/viewvc/tomcat/trunk/webapps/docs/config/http.xml?rev=1830087&r1=1830086&r2=1830087&view=diff
==============================================================================
--- tomcat/trunk/webapps/docs/config/http.xml (original)
+++ tomcat/trunk/webapps/docs/config/http.xml Wed Apr 25 15:31:48 2018
@@ -558,6 +558,32 @@
       <code>true</code> which will cause the request to be rejected.</p>
     </attribute>
 
+    <attribute name="relaxedPathChars" required="false">
+      <p>The <a href="https://tools.ietf.org/rfc/rfc7230.txt";>HTTP/1.1
+      specification</a> requires that certain characters are %nn encoded when
+      used in URI paths. Unfortunately, many user agents including all the 
major
+      browsers are not compliant with this specification and use these
+      characters in unencoded form. To prevent Tomcat rejecting such requests,
+      this attribute may be used to specify the additional characters to allow.
+      If not specified, no addtional characters will be allowed. The value may
+      be any combination of the following characters:
+      <code>&quot; &lt; &gt; [ \ ] ^ ` { | }</code> . Any other characters
+      present in the value will be ignored.</p>
+    </attribute>
+
+    <attribute name="relaxedQueryChars" required="false">
+      <p>The <a href="https://tools.ietf.org/rfc/rfc7230.txt";>HTTP/1.1
+      specification</a> requires that certain characters are %nn encoded when
+      used in URI query strings. Unfortunately, many user agents including all
+      the major browsers are not compliant with this specification and use 
these
+      characters in unencoded form. To prevent Tomcat rejecting such requests,
+      this attribute may be used to specify the additional characters to allow.
+      If not specified, no addtional characters will be allowed. The value may
+      be any combination of the following characters:
+      <code>&quot; &lt; &gt; [ \ ] ^ ` { | }</code> . Any other characters
+      present in the value will be ignored.</p>
+    </attribute>
+
     <attribute name="restrictedUserAgents" required="false">
       <p>The value is a regular expression (using <code>java.util.regex</code>)
       matching the <code>user-agent</code> header of HTTP clients for which



---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscr...@tomcat.apache.org
For additional commands, e-mail: dev-h...@tomcat.apache.org

Reply via email to