Author: markt Date: Wed Sep 3 17:47:42 2014 New Revision: 1622303 URL: http://svn.apache.org/r1622303 Log: Fix https://issues.apache.org/bugzilla/show_bug.cgi?id=56848 Improve handling of accept-language headers
Added: tomcat/tc7.0.x/trunk/java/org/apache/tomcat/util/http/parser/AcceptLanguage.java - copied, changed from r1618166, tomcat/trunk/java/org/apache/tomcat/util/http/parser/AcceptLanguage.java tomcat/tc7.0.x/trunk/test/org/apache/tomcat/util/http/parser/TestAcceptLanguage.java - copied unchanged from r1618166, tomcat/trunk/test/org/apache/tomcat/util/http/parser/TestAcceptLanguage.java Modified: tomcat/tc7.0.x/trunk/ (props changed) tomcat/tc7.0.x/trunk/java/org/apache/catalina/connector/Request.java tomcat/tc7.0.x/trunk/java/org/apache/tomcat/util/http/parser/HttpParser.java tomcat/tc7.0.x/trunk/java/org/apache/tomcat/util/http/parser/MediaType.java tomcat/tc7.0.x/trunk/test/org/apache/catalina/connector/TestRequest.java tomcat/tc7.0.x/trunk/test/org/apache/catalina/connector/TesterRequest.java tomcat/tc7.0.x/trunk/webapps/docs/changelog.xml Propchange: tomcat/tc7.0.x/trunk/ ------------------------------------------------------------------------------ Merged /tomcat/trunk:r1618166 Modified: tomcat/tc7.0.x/trunk/java/org/apache/catalina/connector/Request.java URL: http://svn.apache.org/viewvc/tomcat/tc7.0.x/trunk/java/org/apache/catalina/connector/Request.java?rev=1622303&r1=1622302&r2=1622303&view=diff ============================================================================== --- tomcat/tc7.0.x/trunk/java/org/apache/catalina/connector/Request.java (original) +++ tomcat/tc7.0.x/trunk/java/org/apache/catalina/connector/Request.java Wed Sep 3 17:47:42 2014 @@ -21,6 +21,7 @@ import java.io.BufferedReader; import java.io.File; import java.io.IOException; import java.io.InputStream; +import java.io.StringReader; import java.io.UnsupportedEncodingException; import java.lang.reflect.InvocationTargetException; import java.nio.charset.Charset; @@ -74,7 +75,6 @@ import org.apache.catalina.core.Applicat import org.apache.catalina.core.AsyncContextImpl; import org.apache.catalina.realm.GenericPrincipal; import org.apache.catalina.util.ParameterMap; -import org.apache.catalina.util.StringParser; import org.apache.coyote.ActionCode; import org.apache.coyote.http11.upgrade.servlet31.HttpUpgradeHandler; import org.apache.juli.logging.Log; @@ -95,6 +95,7 @@ import org.apache.tomcat.util.http.fileu import org.apache.tomcat.util.http.fileupload.servlet.ServletFileUpload; import org.apache.tomcat.util.http.fileupload.servlet.ServletRequestContext; import org.apache.tomcat.util.http.mapper.MappingData; +import org.apache.tomcat.util.http.parser.AcceptLanguage; import org.apache.tomcat.util.res.StringManager; import org.ietf.jgss.GSSCredential; import org.ietf.jgss.GSSException; @@ -375,12 +376,6 @@ public class Request /** - * The string parser we will use for parsing request lines. - */ - private final StringParser parser = new StringParser(); - - - /** * Local port */ protected int localPort = -1; @@ -3254,99 +3249,24 @@ public class Request */ protected void parseLocalesHeader(String value, TreeMap<Double, ArrayList<Locale>> locales) { - // Preprocess the value to remove all whitespace - int white = value.indexOf(' '); - if (white < 0) { - white = value.indexOf('\t'); - } - if (white >= 0) { - StringBuilder sb = new StringBuilder(); - int len = value.length(); - for (int i = 0; i < len; i++) { - char ch = value.charAt(i); - if ((ch != ' ') && (ch != '\t')) { - sb.append(ch); - } - } - parser.setString(sb.toString()); - } else { - parser.setString(value); + List<AcceptLanguage> acceptLanguages; + try { + acceptLanguages = AcceptLanguage.parse(new StringReader(value)); + } catch (IOException e) { + // Mal-formed headers are ignore. Do the same in the unlikely event + // of an IOException. + return; } - // Process each comma-delimited language specification - int length = parser.getLength(); - while (true) { - - // Extract the next comma-delimited entry - int start = parser.getIndex(); - if (start >= length) { - break; - } - int end = parser.findChar(','); - String entry = parser.extract(start, end).trim(); - parser.advance(); // For the following entry - - // Extract the quality factor for this entry - double quality = 1.0; - int semi = entry.indexOf(";q="); - if (semi >= 0) { - try { - String strQuality = entry.substring(semi + 3); - if (strQuality.length() <= 5) { - quality = Double.parseDouble(strQuality); - } else { - quality = 0.0; - } - } catch (NumberFormatException e) { - quality = 0.0; - } - entry = entry.substring(0, semi); - } - - // Skip entries we are not going to keep track of - if (quality < 0.00005) - { - continue; // Zero (or effectively zero) quality factors - } - if ("*".equals(entry)) - { - continue; // FIXME - "*" entries are not handled - } - - // Extract the language and country for this entry - String language = null; - String country = null; - String variant = null; - int dash = entry.indexOf('-'); - if (dash < 0) { - language = entry; - country = ""; - variant = ""; - } else { - language = entry.substring(0, dash); - country = entry.substring(dash + 1); - int vDash = country.indexOf('-'); - if (vDash > 0) { - String cTemp = country.substring(0, vDash); - variant = country.substring(vDash + 1); - country = cTemp; - } else { - variant = ""; - } - } - if (!isAlpha(language) || !isAlpha(country) || !isAlpha(variant)) { - continue; - } - + for (AcceptLanguage acceptLanguage : acceptLanguages) { // Add a new Locale to the list of Locales for this quality level - Locale locale = new Locale(language, country, variant); - Double key = new Double(-quality); // Reverse the order + Double key = new Double(-acceptLanguage.getQuality()); // Reverse the order ArrayList<Locale> values = locales.get(key); if (values == null) { values = new ArrayList<Locale>(); locales.put(key, values); } - values.add(locale); + values.add(acceptLanguage.getLocale()); } } Copied: tomcat/tc7.0.x/trunk/java/org/apache/tomcat/util/http/parser/AcceptLanguage.java (from r1618166, tomcat/trunk/java/org/apache/tomcat/util/http/parser/AcceptLanguage.java) URL: http://svn.apache.org/viewvc/tomcat/tc7.0.x/trunk/java/org/apache/tomcat/util/http/parser/AcceptLanguage.java?p2=tomcat/tc7.0.x/trunk/java/org/apache/tomcat/util/http/parser/AcceptLanguage.java&p1=tomcat/trunk/java/org/apache/tomcat/util/http/parser/AcceptLanguage.java&r1=1618166&r2=1622303&rev=1622303&view=diff ============================================================================== --- tomcat/trunk/java/org/apache/tomcat/util/http/parser/AcceptLanguage.java (original) +++ tomcat/tc7.0.x/trunk/java/org/apache/tomcat/util/http/parser/AcceptLanguage.java Wed Sep 3 17:47:42 2014 @@ -43,7 +43,7 @@ public class AcceptLanguage { public static List<AcceptLanguage> parse(StringReader input) throws IOException { - List<AcceptLanguage> result = new ArrayList<>(); + List<AcceptLanguage> result = new ArrayList<AcceptLanguage>(); do { // Token is broader than what is permitted in a language tag Modified: tomcat/tc7.0.x/trunk/java/org/apache/tomcat/util/http/parser/HttpParser.java URL: http://svn.apache.org/viewvc/tomcat/tc7.0.x/trunk/java/org/apache/tomcat/util/http/parser/HttpParser.java?rev=1622303&r1=1622302&r2=1622303&view=diff ============================================================================== --- tomcat/tc7.0.x/trunk/java/org/apache/tomcat/util/http/parser/HttpParser.java (original) +++ tomcat/tc7.0.x/trunk/java/org/apache/tomcat/util/http/parser/HttpParser.java Wed Sep 3 17:47:42 2014 @@ -119,7 +119,7 @@ public class HttpParser { Map<String,String> result = new HashMap<String,String>(); - if (skipConstant(input, "Digest") != SkipConstantResult.FOUND) { + if (skipConstant(input, "Digest") != SkipResult.FOUND) { return null; } // All field names are valid tokens @@ -128,7 +128,7 @@ public class HttpParser { return null; } while (!field.equals("")) { - if (skipConstant(input, "=") != SkipConstantResult.FOUND) { + if (skipConstant(input, "=") != SkipResult.FOUND) { return null; } String value = null; @@ -169,7 +169,7 @@ public class HttpParser { } result.put(field, value); - if (skipConstant(input, ",") == SkipConstantResult.NOT_FOUND) { + if (skipConstant(input, ",") == SkipResult.NOT_FOUND) { return null; } field = readToken(input); @@ -190,7 +190,7 @@ public class HttpParser { return null; } - if (skipConstant(input, "/") == SkipConstantResult.NOT_FOUND) { + if (skipConstant(input, "/") == SkipResult.NOT_FOUND) { return null; } @@ -203,15 +203,15 @@ public class HttpParser { LinkedHashMap<String,String> parameters = new LinkedHashMap<String,String>(); - SkipConstantResult lookForSemiColon = skipConstant(input, ";"); - if (lookForSemiColon == SkipConstantResult.NOT_FOUND) { + SkipResult lookForSemiColon = skipConstant(input, ";"); + if (lookForSemiColon == SkipResult.NOT_FOUND) { return null; } - while (lookForSemiColon == SkipConstantResult.FOUND) { + while (lookForSemiColon == SkipResult.FOUND) { String attribute = readToken(input); String value = ""; - if (skipConstant(input, "=") == SkipConstantResult.FOUND) { + if (skipConstant(input, "=") == SkipResult.FOUND) { value = readTokenOrQuotedString(input, true); } @@ -220,7 +220,7 @@ public class HttpParser { } lookForSemiColon = skipConstant(input, ";"); - if (lookForSemiColon == SkipConstantResult.NOT_FOUND) { + if (lookForSemiColon == SkipResult.NOT_FOUND) { return null; } } @@ -286,25 +286,24 @@ public class HttpParser { return c; } - private static SkipConstantResult skipConstant(StringReader input, - String constant) throws IOException { + static SkipResult skipConstant(StringReader input, String constant) throws IOException { int len = constant.length(); int c = skipLws(input, false); for (int i = 0; i < len; i++) { if (i == 0 && c == -1) { - return SkipConstantResult.EOF; + return SkipResult.EOF; } if (c != constant.charAt(i)) { input.skip(-(i + 1)); - return SkipConstantResult.NOT_FOUND; + return SkipResult.NOT_FOUND; } if (i != (len - 1)) { c = input.read(); } } - return SkipConstantResult.FOUND; + return SkipResult.FOUND; } /** @@ -312,7 +311,7 @@ public class HttpParser { * available to read or <code>null</code> if data other than a * token was found */ - private static String readToken(StringReader input) throws IOException { + static String readToken(StringReader input) throws IOException { StringBuilder result = new StringBuilder(); int c = skipLws(input, false); @@ -372,7 +371,7 @@ public class HttpParser { return result.toString(); } - private static String readTokenOrQuotedString(StringReader input, + static String readTokenOrQuotedString(StringReader input, boolean returnQuoted) throws IOException { // Go back so first non-LWS character is available to be read again @@ -493,7 +492,85 @@ public class HttpParser { } } - private static enum SkipConstantResult { + static double readWeight(StringReader input, char delimiter) throws IOException { + int c = skipLws(input, false); + if (c == -1 || c == delimiter) { + // No q value just whitespace + return 1; + } else if (c != 'q') { + // Malformed. Use quality of zero so it is dropped. + skipUntil(input, c, delimiter); + return 0; + } + // RFC 7231 does not allow whitespace here but be tolerant + c = skipLws(input, false); + if (c != '=') { + // Malformed. Use quality of zero so it is dropped. + skipUntil(input, c, delimiter); + return 0; + } + + // RFC 7231 does not allow whitespace here but be tolerant + c = skipLws(input, false); + + // Should be no more than 3 decimal places + StringBuilder value = new StringBuilder(5); + int decimalPlacesRead = 0; + if (c == '0' || c == '1') { + value.append((char) c); + c = input.read(); + if (c == '.') { + value.append('.'); + } else if (c < '0' || c > '9') { + decimalPlacesRead = 3; + } + while (true) { + c = input.read(); + if (c >= '0' && c <= '9') { + if (decimalPlacesRead < 3) { + value.append((char) c); + decimalPlacesRead++; + } + } else if (c == delimiter || c == 9 || c == 32 || c == -1) { + break; + } else { + // Go back so character is available for next read + input.skip(-1); + return 0; + } + } + } else { + // Malformed. Use quality of zero so it is dropped and skip until + // EOF or the next delimiter + skipUntil(input, c, delimiter); + return 0; + } + + double result = Double.parseDouble(value.toString()); + if (result > 1) { + return 0; + } + return result; + } + + + /** + * Skips all characters until EOF or the specified target is found. Normally + * used to skip invalid input until the next separator. + */ + static SkipResult skipUntil(StringReader input, int c, char target) throws IOException { + while (c != -1 && c != target) { + c = input.read(); + } + if (c == -1) { + return SkipResult.EOF; + } else { + return SkipResult.FOUND; + } + } + + + static enum SkipResult { FOUND, NOT_FOUND, EOF Modified: tomcat/tc7.0.x/trunk/java/org/apache/tomcat/util/http/parser/MediaType.java URL: http://svn.apache.org/viewvc/tomcat/tc7.0.x/trunk/java/org/apache/tomcat/util/http/parser/MediaType.java?rev=1622303&r1=1622302&r2=1622303&view=diff ============================================================================== --- tomcat/tc7.0.x/trunk/java/org/apache/tomcat/util/http/parser/MediaType.java (original) +++ tomcat/tc7.0.x/trunk/java/org/apache/tomcat/util/http/parser/MediaType.java Wed Sep 3 17:47:42 2014 @@ -16,6 +16,8 @@ */ package org.apache.tomcat.util.http.parser; +import java.io.IOException; +import java.io.StringReader; import java.util.LinkedHashMap; import java.util.Locale; import java.util.Map; @@ -122,4 +124,56 @@ public class MediaType { } return noCharset; } + + /** + * Parses a MediaType value, either from a HTTP header or from an application. + * + * @param input a reader over the header text + * @return a MediaType parsed from the input, or null if not valid + * @throws IOException if there was a problem reading the input + */ + public static MediaType parseMediaType(StringReader input) throws IOException { + + // Type (required) + String type = HttpParser.readToken(input); + if (type == null || type.length() == 0) { + return null; + } + + if (HttpParser.skipConstant(input, "/") == HttpParser.SkipResult.NOT_FOUND) { + return null; + } + + // Subtype (required) + String subtype = HttpParser.readToken(input); + if (subtype == null || subtype.length() == 0) { + return null; + } + + LinkedHashMap<String,String> parameters = new LinkedHashMap<String, String>(); + + HttpParser.SkipResult lookForSemiColon = HttpParser.skipConstant(input, ";"); + if (lookForSemiColon == HttpParser.SkipResult.NOT_FOUND) { + return null; + } + while (lookForSemiColon == HttpParser.SkipResult.FOUND) { + String attribute = HttpParser.readToken(input); + + String value = ""; + if (HttpParser.skipConstant(input, "=") == HttpParser.SkipResult.FOUND) { + value = HttpParser.readTokenOrQuotedString(input, true); + } + + if (attribute != null) { + parameters.put(attribute.toLowerCase(Locale.ENGLISH), value); + } + + lookForSemiColon = HttpParser.skipConstant(input, ";"); + if (lookForSemiColon == HttpParser.SkipResult.NOT_FOUND) { + return null; + } + } + + return new MediaType(type, subtype, parameters); + } } Modified: tomcat/tc7.0.x/trunk/test/org/apache/catalina/connector/TestRequest.java URL: http://svn.apache.org/viewvc/tomcat/tc7.0.x/trunk/test/org/apache/catalina/connector/TestRequest.java?rev=1622303&r1=1622302&r2=1622303&view=diff ============================================================================== --- tomcat/tc7.0.x/trunk/test/org/apache/catalina/connector/TestRequest.java (original) +++ tomcat/tc7.0.x/trunk/test/org/apache/catalina/connector/TestRequest.java Wed Sep 3 17:47:42 2014 @@ -42,6 +42,7 @@ import static org.junit.Assert.assertTru import static org.junit.Assert.fail; import org.junit.Assert; +import org.junit.Ignore; import org.junit.Test; import org.apache.catalina.Context; @@ -840,4 +841,24 @@ public class TestRequest extends TomcatB Assert.assertEquals(expected, actual); } + + @Test + @Ignore("Used to check performance of different parsing approaches") + public void localeParsePerformance() throws Exception { + TesterRequest req = new TesterRequest(); + req.addHeader("accept-encoding", "en-gb,en"); + + long start = System.nanoTime(); + + // Takes about 0.3s on a quad core 2.7Ghz 2013 MacBook + for (int i = 0; i < 10000000; i++) { + req.parseLocales(); + req.localesParsed = false; + req.locales.clear(); + } + + long time = System.nanoTime() - start; + + System.out.println(time); + } } Modified: tomcat/tc7.0.x/trunk/test/org/apache/catalina/connector/TesterRequest.java URL: http://svn.apache.org/viewvc/tomcat/tc7.0.x/trunk/test/org/apache/catalina/connector/TesterRequest.java?rev=1622303&r1=1622302&r2=1622303&view=diff ============================================================================== --- tomcat/tc7.0.x/trunk/test/org/apache/catalina/connector/TesterRequest.java (original) +++ tomcat/tc7.0.x/trunk/test/org/apache/catalina/connector/TesterRequest.java Wed Sep 3 17:47:42 2014 @@ -72,6 +72,10 @@ public class TesterRequest extends Reque } @Override public Enumeration<String> getHeaders(String name) { + List<String> values = headers.get(name); + if (values == null || values.size() == 0) { + return Collections.emptyEnumeration(); + } return Collections.enumeration(headers.get(name)); } Modified: tomcat/tc7.0.x/trunk/webapps/docs/changelog.xml URL: http://svn.apache.org/viewvc/tomcat/tc7.0.x/trunk/webapps/docs/changelog.xml?rev=1622303&r1=1622302&r2=1622303&view=diff ============================================================================== --- tomcat/tc7.0.x/trunk/webapps/docs/changelog.xml (original) +++ tomcat/tc7.0.x/trunk/webapps/docs/changelog.xml Wed Sep 3 17:47:42 2014 @@ -141,6 +141,10 @@ than just using the first header to determine the user's preferred Locale. (markt) </fix> + <fix> + <bug>56848</bug>: Improve handling of <code>accept-language</code> + headers. (markt) + </fix> </changelog> </subsection> <subsection name="Coyote"> --------------------------------------------------------------------- To unsubscribe, e-mail: dev-unsubscr...@tomcat.apache.org For additional commands, e-mail: dev-h...@tomcat.apache.org