This is an automated email from the ASF dual-hosted git repository. markt pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/tomcat.git
The following commit(s) were added to refs/heads/master by this push: new ea00bb5 Fix #418 - add "pass-through" as an option for useBomIfPresent ea00bb5 is described below commit ea00bb55348faad97db3f5f70de0efad036fec17 Author: Mark Thomas <ma...@apache.org> AuthorDate: Wed May 19 12:17:17 2021 +0100 Fix #418 - add "pass-through" as an option for useBomIfPresent Based on a PR by Jean-Louis Monteiro Also adds support for the <pr> element in the change log --- .../apache/catalina/servlets/DefaultServlet.java | 84 +++++++++++++++++----- .../catalina/servlets/LocalStrings.properties | 1 + .../servlets/DefaultServletEncodingBaseTest.java | 28 +++++--- ... TestDefaultServletEncodingPassThroughBom.java} | 7 +- .../TestDefaultServletEncodingWithBom.java | 5 +- .../TestDefaultServletEncodingWithoutBom.java | 5 +- webapps/docs/changelog.xml | 9 +++ webapps/docs/default-servlet.xml | 6 +- webapps/docs/tomcat-docs.xsl | 7 ++ 9 files changed, 117 insertions(+), 35 deletions(-) diff --git a/java/org/apache/catalina/servlets/DefaultServlet.java b/java/org/apache/catalina/servlets/DefaultServlet.java index 05ceabd..cff8f88 100644 --- a/java/org/apache/catalina/servlets/DefaultServlet.java +++ b/java/org/apache/catalina/servlets/DefaultServlet.java @@ -246,8 +246,9 @@ public class DefaultServlet extends HttpServlet { /** * If a file has a BOM, should that be used in preference to fileEncoding? + * Will default to {@link BomConfig#TRUE} in {@link #init()}. */ - private boolean useBomIfPresent = true; + private BomConfig useBomIfPresent = null; /** * Minimum size for sendfile usage in bytes. @@ -335,8 +336,23 @@ public class DefaultServlet extends HttpServlet { } } - if (getServletConfig().getInitParameter("useBomIfPresent") != null) { - useBomIfPresent = Boolean.parseBoolean(getServletConfig().getInitParameter("useBomIfPresent")); + String useBomIfPresent = getServletConfig().getInitParameter("useBomIfPresent"); + if (useBomIfPresent == null) { + // Use default + this.useBomIfPresent = BomConfig.TRUE; + } else { + for (BomConfig bomConfig : BomConfig.values()) { + if (bomConfig.configurationValue.equalsIgnoreCase(useBomIfPresent)) { + this.useBomIfPresent = bomConfig; + break; + } + } + if (this.useBomIfPresent == null) { + // Unrecognised configuration value + IllegalArgumentException iae = new IllegalArgumentException( + sm.getString("defaultServlet.unknownBomConfig", useBomIfPresent)); + throw new ServletException(iae); + } } globalXsltFile = getServletConfig().getInitParameter("globalXsltFile"); @@ -1083,8 +1099,8 @@ public class DefaultServlet extends HttpServlet { if (!renderResult.markSupported()) { renderResult = new BufferedInputStream(renderResult); } - Charset bomCharset = processBom(renderResult); - if (bomCharset != null && useBomIfPresent) { + Charset bomCharset = processBom(renderResult, useBomIfPresent.stripBom); + if (bomCharset != null && useBomIfPresent.useBomEncoding) { inputEncoding = bomCharset.name(); } } @@ -1105,8 +1121,8 @@ public class DefaultServlet extends HttpServlet { if (!source.markSupported()) { source = new BufferedInputStream(source); } - Charset bomCharset = processBom(source); - if (bomCharset != null && useBomIfPresent) { + Charset bomCharset = processBom(source, useBomIfPresent.stripBom); + if (bomCharset != null && useBomIfPresent.useBomEncoding) { inputEncoding = bomCharset.name(); } // Following test also ensures included resources @@ -1217,7 +1233,7 @@ public class DefaultServlet extends HttpServlet { /* * Code borrowed heavily from Jasper's EncodingDetector */ - private static Charset processBom(InputStream is) throws IOException { + private static Charset processBom(InputStream is, boolean stripBom) throws IOException { // Java supported character sets do not use BOMs longer than 4 bytes byte[] bom = new byte[4]; is.mark(bom.length); @@ -1226,7 +1242,7 @@ public class DefaultServlet extends HttpServlet { // BOMs are at least 2 bytes if (count < 2) { - skip(is, 0); + skip(is, 0, stripBom); return null; } @@ -1234,31 +1250,31 @@ public class DefaultServlet extends HttpServlet { int b0 = bom[0] & 0xFF; int b1 = bom[1] & 0xFF; if (b0 == 0xFE && b1 == 0xFF) { - skip(is, 2); + skip(is, 2, stripBom); return StandardCharsets.UTF_16BE; } // Delay the UTF_16LE check if there are more that 2 bytes since it // overlaps with UTF-32LE. if (count == 2 && b0 == 0xFF && b1 == 0xFE) { - skip(is, 2); + skip(is, 2, stripBom); return StandardCharsets.UTF_16LE; } // Remaining BOMs are at least 3 bytes if (count < 3) { - skip(is, 0); + skip(is, 0, stripBom); return null; } // UTF-8 is only 3-byte BOM int b2 = bom[2] & 0xFF; if (b0 == 0xEF && b1 == 0xBB && b2 == 0xBF) { - skip(is, 3); + skip(is, 3, stripBom); return StandardCharsets.UTF_8; } if (count < 4) { - skip(is, 0); + skip(is, 0, stripBom); return null; } @@ -1275,19 +1291,21 @@ public class DefaultServlet extends HttpServlet { // won't see a UTF16-LE file with a BOM where the first real data is // 0x00 0x00 if (b0 == 0xFF && b1 == 0xFE) { - skip(is, 2); + skip(is, 2, stripBom); return StandardCharsets.UTF_16LE; } - skip(is, 0); + skip(is, 0, stripBom); return null; } - private static void skip(InputStream is, int skip) throws IOException { + private static void skip(InputStream is, int skip, boolean stripBom) throws IOException { is.reset(); - while (skip-- > 0) { - is.read(); + if (stripBom) { + while (skip-- > 0) { + is.read(); + } } } @@ -2941,4 +2959,32 @@ public class DefaultServlet extends HttpServlet { return c; } } + + static enum BomConfig { + /** + * BoM is stripped if present and any BoM found used to determine the + * encoding used to read the resource. + */ + TRUE("true", true, true), + /** + * BoM is stripped if present but the configured file encoding is used + * to read the resource. + */ + FALSE("false", true, false), + /** + * BoM is not stripped and the configured file encoding is used to read + * the resource. + */ + PASS_THROUGH("pass-through", false, false); + + final String configurationValue; + final boolean stripBom; + final boolean useBomEncoding; + + private BomConfig(String configurationValue, boolean stripBom, boolean useBomEncoding) { + this.configurationValue = configurationValue; + this.stripBom = stripBom; + this.useBomEncoding = useBomEncoding; + } + } } diff --git a/java/org/apache/catalina/servlets/LocalStrings.properties b/java/org/apache/catalina/servlets/LocalStrings.properties index 226a3ae..1ce4990 100644 --- a/java/org/apache/catalina/servlets/LocalStrings.properties +++ b/java/org/apache/catalina/servlets/LocalStrings.properties @@ -42,6 +42,7 @@ defaultServlet.missingResource=The requested resource [{0}] is not available defaultServlet.noResources=No static resources were found defaultServlet.readerCloseFailed=Failed to close reader defaultServlet.skipfail=Read failed because only [{0}] bytes were available but needed to skip [{1}] bytes to reach the start of the requested range +defaultServlet.unknownBomConfig=Unrecognised value of [{0}] provided for useBomIfPresent initialization parameter defaultServlet.xslError=XSL transformer error directory.filename=Filename diff --git a/test/org/apache/catalina/servlets/DefaultServletEncodingBaseTest.java b/test/org/apache/catalina/servlets/DefaultServletEncodingBaseTest.java index caeb286..2143fac 100644 --- a/test/org/apache/catalina/servlets/DefaultServletEncodingBaseTest.java +++ b/test/org/apache/catalina/servlets/DefaultServletEncodingBaseTest.java @@ -40,6 +40,7 @@ import org.junit.runners.Parameterized.Parameter; import org.apache.catalina.Context; import org.apache.catalina.Wrapper; +import org.apache.catalina.servlets.DefaultServlet.BomConfig; import org.apache.catalina.startup.Tomcat; import org.apache.catalina.startup.TomcatBaseTest; import org.apache.tomcat.util.buf.B2CConverter; @@ -109,16 +110,27 @@ public abstract class DefaultServletEncodingBaseTest extends TomcatBaseTest { } - private static boolean getExpected(String fileEncoding, boolean useBom, String targetFile, + private static boolean getExpected(String fileEncoding, BomConfig bomConfig, String targetFile, String outputEncoding, boolean callSetCharacterEncoding, boolean useWriter) { - if (useWriter || callSetCharacterEncoding) { + if (targetFile.endsWith("-bom") && !bomConfig.stripBom) { + /* + * If the target file contains a BOM and the BOM is not stripped the + * test should always fail because BOM will be kept untouched + * and may result in a different character in the output + * + * We could differentiate iso-8859-1 and cp1252 but we would need logic here and in + * the asserts and that would make the logic in the test as complex as the code + * under test. + */ + return false; + } else if (useWriter || callSetCharacterEncoding) { /* * Using a writer or setting the output character encoding means the * response will specify a character set. These cases therefore * reduce to can the file be read with the correct encoding. * (Assuming any BOM is always skipped in the included output.) */ - if (targetFile.endsWith("-bom") && useBom || + if (targetFile.endsWith("-bom") && bomConfig.useBomEncoding || targetFile.startsWith(fileEncoding) || targetFile.equals("cp1252") && fileEncoding.equals("iso-8859-1") || targetFile.equals("iso-8859-1") && fileEncoding.equals("cp1252")) { @@ -160,7 +172,7 @@ public abstract class DefaultServletEncodingBaseTest extends TomcatBaseTest { public boolean useWriter; - protected abstract boolean getUseBom(); + protected abstract BomConfig getUseBom(); @Test @@ -176,7 +188,7 @@ public abstract class DefaultServletEncodingBaseTest extends TomcatBaseTest { ctxt.setResponseCharacterEncoding(contextResponseEncoding); Wrapper defaultServlet = Tomcat.addServlet(ctxt, "default", DefaultServlet.class.getName()); defaultServlet.addInitParameter("fileEncoding", fileEncoding); - defaultServlet.addInitParameter("useBomIfPresent", Boolean.toString(getUseBom())); + defaultServlet.addInitParameter("useBomIfPresent", getUseBom().configurationValue); ctxt.addServletMappingDecoded("/", "default"); @@ -214,10 +226,10 @@ public abstract class DefaultServletEncodingBaseTest extends TomcatBaseTest { } String body = res.toString(); /* - * Remove BOM before checking content - * BOM (should be) removed by Tomcat when file is included + * Remove BOM before checking content if DefaultServlet is configured to + * remove BOM. */ - if (!useInclude && targetFile.endsWith("-bom")) { + if (!useInclude && targetFile.endsWith("-bom") && getUseBom().stripBom) { body = body.substring(1); } diff --git a/test/org/apache/catalina/servlets/TestDefaultServletEncodingWithoutBom.java b/test/org/apache/catalina/servlets/TestDefaultServletEncodingPassThroughBom.java similarity index 78% copy from test/org/apache/catalina/servlets/TestDefaultServletEncodingWithoutBom.java copy to test/org/apache/catalina/servlets/TestDefaultServletEncodingPassThroughBom.java index 9deec1d..48006cd 100644 --- a/test/org/apache/catalina/servlets/TestDefaultServletEncodingWithoutBom.java +++ b/test/org/apache/catalina/servlets/TestDefaultServletEncodingPassThroughBom.java @@ -16,11 +16,12 @@ */ package org.apache.catalina.servlets; +import org.apache.catalina.servlets.DefaultServlet.BomConfig; -public class TestDefaultServletEncodingWithoutBom extends DefaultServletEncodingBaseTest { +public class TestDefaultServletEncodingPassThroughBom extends DefaultServletEncodingBaseTest { @Override - protected boolean getUseBom() { - return false; + protected BomConfig getUseBom() { + return BomConfig.PASS_THROUGH; } } diff --git a/test/org/apache/catalina/servlets/TestDefaultServletEncodingWithBom.java b/test/org/apache/catalina/servlets/TestDefaultServletEncodingWithBom.java index 46bd78f..dccbfd3 100644 --- a/test/org/apache/catalina/servlets/TestDefaultServletEncodingWithBom.java +++ b/test/org/apache/catalina/servlets/TestDefaultServletEncodingWithBom.java @@ -16,11 +16,12 @@ */ package org.apache.catalina.servlets; +import org.apache.catalina.servlets.DefaultServlet.BomConfig; public class TestDefaultServletEncodingWithBom extends DefaultServletEncodingBaseTest { @Override - protected boolean getUseBom() { - return true; + protected BomConfig getUseBom() { + return BomConfig.TRUE; } } diff --git a/test/org/apache/catalina/servlets/TestDefaultServletEncodingWithoutBom.java b/test/org/apache/catalina/servlets/TestDefaultServletEncodingWithoutBom.java index 9deec1d..0d6bae0 100644 --- a/test/org/apache/catalina/servlets/TestDefaultServletEncodingWithoutBom.java +++ b/test/org/apache/catalina/servlets/TestDefaultServletEncodingWithoutBom.java @@ -16,11 +16,12 @@ */ package org.apache.catalina.servlets; +import org.apache.catalina.servlets.DefaultServlet.BomConfig; public class TestDefaultServletEncodingWithoutBom extends DefaultServletEncodingBaseTest { @Override - protected boolean getUseBom() { - return false; + protected BomConfig getUseBom() { + return BomConfig.FALSE; } } diff --git a/webapps/docs/changelog.xml b/webapps/docs/changelog.xml index 173e8cd..aab44fb 100644 --- a/webapps/docs/changelog.xml +++ b/webapps/docs/changelog.xml @@ -65,6 +65,7 @@ <!ELEMENT bug (#PCDATA)> <!ELEMENT rev (#PCDATA)> + <!ELEMENT pr (#PCDATA)> <!-- Random HTML markup tags. Add more here as needed. --> <!ELEMENT a (#PCDATA)> @@ -110,6 +111,14 @@ <bug>65308</bug>: NPE in JNDIRealm when no <code>userRoleAttribute</code> is given. (fschumacher) </fix> + <add> + <pr>418</pr>: Add a new option, <code>pass-through</code>, to the + default servlet's <code>useBomIfPresent</code> initialization parameter + that causes the default servlet to leave any BOM in place when + processing a static file and not to use the BOM to determine the + encoding of the file. Based on a pull request by Jean-Louis Monteiro. + (markt) + </add> </changelog> </subsection> <subsection name="Coyote"> diff --git a/webapps/docs/default-servlet.xml b/webapps/docs/default-servlet.xml index 1136d06..d5e5cb5 100644 --- a/webapps/docs/default-servlet.xml +++ b/webapps/docs/default-servlet.xml @@ -180,7 +180,11 @@ Tomcat.</p> </property> <property name="useBomIfPresent"> If a static file contains a byte order mark (BOM), should this be used - to determine the file encoding in preference to fileEncoding. [true] + to determine the file encoding in preference to fileEncoding. This + setting must be one of <code>true</code> (remove the BOM and use it in + preference to fileEncoding), <code>false</code> (remove the BOM but do + not use it) or <code>pass-through</code> (do not use the BOM and do not + remove it). [true] </property> <property name="sendfileSize"> If the connector used supports sendfile, this represents the minimal diff --git a/webapps/docs/tomcat-docs.xsl b/webapps/docs/tomcat-docs.xsl index e7c6cf0..8aa9e9d 100644 --- a/webapps/docs/tomcat-docs.xsl +++ b/webapps/docs/tomcat-docs.xsl @@ -43,6 +43,7 @@ <xsl:param name="build-date-iso-8601" select="'yyyy-MM-dd'"/> <xsl:param name="year" select="'yyyy'"/> <xsl:param name="buglink" select="'https://bz.apache.org/bugzilla/show_bug.cgi?id='"/> + <xsl:param name="prlink" select="'https://github.com/apache/tomcat/pull/'"/> <xsl:param name="revlink" select="'https://svn.apache.org/viewvc?view=rev&rev='"/> <xsl:param name="doclink" select="'https://tomcat.apache.org/tomcat-10.0-doc'"/> <xsl:param name="sylink" select="'https://tomcat.apache.org/security-10.html'"/> @@ -388,6 +389,12 @@ <a href="{$link}"><xsl:apply-templates/></a> </xsl:template> + <!-- Link to a pull request --> + <xsl:template match="pr"> + <xsl:variable name="link"><xsl:value-of select="$prlink"/><xsl:value-of select="text()"/></xsl:variable> + <a href="{$link}">#<xsl:apply-templates/></a> + </xsl:template> + <!-- Link to a SVN revision report --> <xsl:template match="rev"> <xsl:variable name="link"><xsl:value-of select="$revlink"/><xsl:value-of select="text()"/></xsl:variable> --------------------------------------------------------------------- To unsubscribe, e-mail: dev-unsubscr...@tomcat.apache.org For additional commands, e-mail: dev-h...@tomcat.apache.org