This is an automated email from the ASF dual-hosted git repository.
markt pushed a commit to branch 9.0.x
in repository https://gitbox.apache.org/repos/asf/tomcat.git
The following commit(s) were added to refs/heads/9.0.x by this push:
new 61a27bd Fix #418 - add "pass-through" as an option for useBomIfPresent
61a27bd is described below
commit 61a27bd60a8c2e26a6338d21d82e47a8a206241c
Author: Mark Thomas <[email protected]>
AuthorDate: Wed May 19 12:17:17 2021 +0100
Fix #418 - add "pass-through" as an option for useBomIfPresent
Based on a PR by Jean-Louis Monteiro
Also adds support for the <pr> element in the change log
---
.../apache/catalina/servlets/DefaultServlet.java | 84 +++++++++++++++++-----
.../catalina/servlets/LocalStrings.properties | 1 +
.../servlets/DefaultServletEncodingBaseTest.java | 28 +++++---
... TestDefaultServletEncodingPassThroughBom.java} | 7 +-
.../TestDefaultServletEncodingWithBom.java | 5 +-
.../TestDefaultServletEncodingWithoutBom.java | 5 +-
webapps/docs/changelog.xml | 9 +++
webapps/docs/default-servlet.xml | 6 +-
webapps/docs/tomcat-docs.xsl | 7 ++
9 files changed, 117 insertions(+), 35 deletions(-)
diff --git a/java/org/apache/catalina/servlets/DefaultServlet.java
b/java/org/apache/catalina/servlets/DefaultServlet.java
index 3c135f8..9618d63 100644
--- a/java/org/apache/catalina/servlets/DefaultServlet.java
+++ b/java/org/apache/catalina/servlets/DefaultServlet.java
@@ -246,8 +246,9 @@ public class DefaultServlet extends HttpServlet {
/**
* If a file has a BOM, should that be used in preference to fileEncoding?
+ * Will default to {@link BomConfig#TRUE} in {@link #init()}.
*/
- private boolean useBomIfPresent = true;
+ private BomConfig useBomIfPresent = null;
/**
* Minimum size for sendfile usage in bytes.
@@ -335,8 +336,23 @@ public class DefaultServlet extends HttpServlet {
}
}
- if (getServletConfig().getInitParameter("useBomIfPresent") != null) {
- useBomIfPresent =
Boolean.parseBoolean(getServletConfig().getInitParameter("useBomIfPresent"));
+ String useBomIfPresent =
getServletConfig().getInitParameter("useBomIfPresent");
+ if (useBomIfPresent == null) {
+ // Use default
+ this.useBomIfPresent = BomConfig.TRUE;
+ } else {
+ for (BomConfig bomConfig : BomConfig.values()) {
+ if
(bomConfig.configurationValue.equalsIgnoreCase(useBomIfPresent)) {
+ this.useBomIfPresent = bomConfig;
+ break;
+ }
+ }
+ if (this.useBomIfPresent == null) {
+ // Unrecognised configuration value
+ IllegalArgumentException iae = new IllegalArgumentException(
+ sm.getString("defaultServlet.unknownBomConfig",
useBomIfPresent));
+ throw new ServletException(iae);
+ }
}
globalXsltFile = getServletConfig().getInitParameter("globalXsltFile");
@@ -1083,8 +1099,8 @@ public class DefaultServlet extends HttpServlet {
if (!renderResult.markSupported()) {
renderResult = new
BufferedInputStream(renderResult);
}
- Charset bomCharset = processBom(renderResult);
- if (bomCharset != null && useBomIfPresent) {
+ Charset bomCharset = processBom(renderResult,
useBomIfPresent.stripBom);
+ if (bomCharset != null &&
useBomIfPresent.useBomEncoding) {
inputEncoding = bomCharset.name();
}
}
@@ -1105,8 +1121,8 @@ public class DefaultServlet extends HttpServlet {
if (!source.markSupported()) {
source = new BufferedInputStream(source);
}
- Charset bomCharset = processBom(source);
- if (bomCharset != null && useBomIfPresent) {
+ Charset bomCharset = processBom(source,
useBomIfPresent.stripBom);
+ if (bomCharset != null &&
useBomIfPresent.useBomEncoding) {
inputEncoding = bomCharset.name();
}
// Following test also ensures included resources
@@ -1217,7 +1233,7 @@ public class DefaultServlet extends HttpServlet {
/*
* Code borrowed heavily from Jasper's EncodingDetector
*/
- private static Charset processBom(InputStream is) throws IOException {
+ private static Charset processBom(InputStream is, boolean stripBom) throws
IOException {
// Java supported character sets do not use BOMs longer than 4 bytes
byte[] bom = new byte[4];
is.mark(bom.length);
@@ -1226,7 +1242,7 @@ public class DefaultServlet extends HttpServlet {
// BOMs are at least 2 bytes
if (count < 2) {
- skip(is, 0);
+ skip(is, 0, stripBom);
return null;
}
@@ -1234,31 +1250,31 @@ public class DefaultServlet extends HttpServlet {
int b0 = bom[0] & 0xFF;
int b1 = bom[1] & 0xFF;
if (b0 == 0xFE && b1 == 0xFF) {
- skip(is, 2);
+ skip(is, 2, stripBom);
return StandardCharsets.UTF_16BE;
}
// Delay the UTF_16LE check if there are more that 2 bytes since it
// overlaps with UTF-32LE.
if (count == 2 && b0 == 0xFF && b1 == 0xFE) {
- skip(is, 2);
+ skip(is, 2, stripBom);
return StandardCharsets.UTF_16LE;
}
// Remaining BOMs are at least 3 bytes
if (count < 3) {
- skip(is, 0);
+ skip(is, 0, stripBom);
return null;
}
// UTF-8 is only 3-byte BOM
int b2 = bom[2] & 0xFF;
if (b0 == 0xEF && b1 == 0xBB && b2 == 0xBF) {
- skip(is, 3);
+ skip(is, 3, stripBom);
return StandardCharsets.UTF_8;
}
if (count < 4) {
- skip(is, 0);
+ skip(is, 0, stripBom);
return null;
}
@@ -1275,19 +1291,21 @@ public class DefaultServlet extends HttpServlet {
// won't see a UTF16-LE file with a BOM where the first real data is
// 0x00 0x00
if (b0 == 0xFF && b1 == 0xFE) {
- skip(is, 2);
+ skip(is, 2, stripBom);
return StandardCharsets.UTF_16LE;
}
- skip(is, 0);
+ skip(is, 0, stripBom);
return null;
}
- private static void skip(InputStream is, int skip) throws IOException {
+ private static void skip(InputStream is, int skip, boolean stripBom)
throws IOException {
is.reset();
- while (skip-- > 0) {
- is.read();
+ if (stripBom) {
+ while (skip-- > 0) {
+ is.read();
+ }
}
}
@@ -3023,4 +3041,32 @@ public class DefaultServlet extends HttpServlet {
return c;
}
}
+
+ static enum BomConfig {
+ /**
+ * BoM is stripped if present and any BoM found used to determine the
+ * encoding used to read the resource.
+ */
+ TRUE("true", true, true),
+ /**
+ * BoM is stripped if present but the configured file encoding is used
+ * to read the resource.
+ */
+ FALSE("false", true, false),
+ /**
+ * BoM is not stripped and the configured file encoding is used to read
+ * the resource.
+ */
+ PASS_THROUGH("pass-through", false, false);
+
+ final String configurationValue;
+ final boolean stripBom;
+ final boolean useBomEncoding;
+
+ private BomConfig(String configurationValue, boolean stripBom, boolean
useBomEncoding) {
+ this.configurationValue = configurationValue;
+ this.stripBom = stripBom;
+ this.useBomEncoding = useBomEncoding;
+ }
+ }
}
diff --git a/java/org/apache/catalina/servlets/LocalStrings.properties
b/java/org/apache/catalina/servlets/LocalStrings.properties
index 226a3ae..1ce4990 100644
--- a/java/org/apache/catalina/servlets/LocalStrings.properties
+++ b/java/org/apache/catalina/servlets/LocalStrings.properties
@@ -42,6 +42,7 @@ defaultServlet.missingResource=The requested resource [{0}]
is not available
defaultServlet.noResources=No static resources were found
defaultServlet.readerCloseFailed=Failed to close reader
defaultServlet.skipfail=Read failed because only [{0}] bytes were available
but needed to skip [{1}] bytes to reach the start of the requested range
+defaultServlet.unknownBomConfig=Unrecognised value of [{0}] provided for
useBomIfPresent initialization parameter
defaultServlet.xslError=XSL transformer error
directory.filename=Filename
diff --git
a/test/org/apache/catalina/servlets/DefaultServletEncodingBaseTest.java
b/test/org/apache/catalina/servlets/DefaultServletEncodingBaseTest.java
index 7a1b1c9..87c55dc 100644
--- a/test/org/apache/catalina/servlets/DefaultServletEncodingBaseTest.java
+++ b/test/org/apache/catalina/servlets/DefaultServletEncodingBaseTest.java
@@ -40,6 +40,7 @@ import org.junit.runners.Parameterized.Parameter;
import org.apache.catalina.Context;
import org.apache.catalina.Wrapper;
+import org.apache.catalina.servlets.DefaultServlet.BomConfig;
import org.apache.catalina.startup.Tomcat;
import org.apache.catalina.startup.TomcatBaseTest;
import org.apache.tomcat.util.buf.B2CConverter;
@@ -109,16 +110,27 @@ public abstract class DefaultServletEncodingBaseTest
extends TomcatBaseTest {
}
- private static boolean getExpected(String fileEncoding, boolean useBom,
String targetFile,
+ private static boolean getExpected(String fileEncoding, BomConfig
bomConfig, String targetFile,
String outputEncoding, boolean callSetCharacterEncoding, boolean
useWriter) {
- if (useWriter || callSetCharacterEncoding) {
+ if (targetFile.endsWith("-bom") && !bomConfig.stripBom) {
+ /*
+ * If the target file contains a BOM and the BOM is not stripped
the
+ * test should always fail because BOM will be kept untouched
+ * and may result in a different character in the output
+ *
+ * We could differentiate iso-8859-1 and cp1252 but we would need
logic here and in
+ * the asserts and that would make the logic in the test as
complex as the code
+ * under test.
+ */
+ return false;
+ } else if (useWriter || callSetCharacterEncoding) {
/*
* Using a writer or setting the output character encoding means
the
* response will specify a character set. These cases therefore
* reduce to can the file be read with the correct encoding.
* (Assuming any BOM is always skipped in the included output.)
*/
- if (targetFile.endsWith("-bom") && useBom ||
+ if (targetFile.endsWith("-bom") && bomConfig.useBomEncoding ||
targetFile.startsWith(fileEncoding) ||
targetFile.equals("cp1252") &&
fileEncoding.equals("iso-8859-1") ||
targetFile.equals("iso-8859-1") &&
fileEncoding.equals("cp1252")) {
@@ -160,7 +172,7 @@ public abstract class DefaultServletEncodingBaseTest
extends TomcatBaseTest {
public boolean useWriter;
- protected abstract boolean getUseBom();
+ protected abstract BomConfig getUseBom();
@Test
@@ -176,7 +188,7 @@ public abstract class DefaultServletEncodingBaseTest
extends TomcatBaseTest {
ctxt.setResponseCharacterEncoding(contextResponseEncoding);
Wrapper defaultServlet = Tomcat.addServlet(ctxt, "default",
DefaultServlet.class.getName());
defaultServlet.addInitParameter("fileEncoding", fileEncoding);
- defaultServlet.addInitParameter("useBomIfPresent",
Boolean.toString(getUseBom()));
+ defaultServlet.addInitParameter("useBomIfPresent",
getUseBom().configurationValue);
ctxt.addServletMappingDecoded("/", "default");
@@ -214,10 +226,10 @@ public abstract class DefaultServletEncodingBaseTest
extends TomcatBaseTest {
}
String body = res.toString();
/*
- * Remove BOM before checking content
- * BOM (should be) removed by Tomcat when file is included
+ * Remove BOM before checking content if DefaultServlet is configured
to
+ * remove BOM.
*/
- if (!useInclude && targetFile.endsWith("-bom")) {
+ if (!useInclude && targetFile.endsWith("-bom") &&
getUseBom().stripBom) {
body = body.substring(1);
}
diff --git
a/test/org/apache/catalina/servlets/TestDefaultServletEncodingWithoutBom.java
b/test/org/apache/catalina/servlets/TestDefaultServletEncodingPassThroughBom.java
similarity index 78%
copy from
test/org/apache/catalina/servlets/TestDefaultServletEncodingWithoutBom.java
copy to
test/org/apache/catalina/servlets/TestDefaultServletEncodingPassThroughBom.java
index 9deec1d..48006cd 100644
---
a/test/org/apache/catalina/servlets/TestDefaultServletEncodingWithoutBom.java
+++
b/test/org/apache/catalina/servlets/TestDefaultServletEncodingPassThroughBom.java
@@ -16,11 +16,12 @@
*/
package org.apache.catalina.servlets;
+import org.apache.catalina.servlets.DefaultServlet.BomConfig;
-public class TestDefaultServletEncodingWithoutBom extends
DefaultServletEncodingBaseTest {
+public class TestDefaultServletEncodingPassThroughBom extends
DefaultServletEncodingBaseTest {
@Override
- protected boolean getUseBom() {
- return false;
+ protected BomConfig getUseBom() {
+ return BomConfig.PASS_THROUGH;
}
}
diff --git
a/test/org/apache/catalina/servlets/TestDefaultServletEncodingWithBom.java
b/test/org/apache/catalina/servlets/TestDefaultServletEncodingWithBom.java
index 46bd78f..dccbfd3 100644
--- a/test/org/apache/catalina/servlets/TestDefaultServletEncodingWithBom.java
+++ b/test/org/apache/catalina/servlets/TestDefaultServletEncodingWithBom.java
@@ -16,11 +16,12 @@
*/
package org.apache.catalina.servlets;
+import org.apache.catalina.servlets.DefaultServlet.BomConfig;
public class TestDefaultServletEncodingWithBom extends
DefaultServletEncodingBaseTest {
@Override
- protected boolean getUseBom() {
- return true;
+ protected BomConfig getUseBom() {
+ return BomConfig.TRUE;
}
}
diff --git
a/test/org/apache/catalina/servlets/TestDefaultServletEncodingWithoutBom.java
b/test/org/apache/catalina/servlets/TestDefaultServletEncodingWithoutBom.java
index 9deec1d..0d6bae0 100644
---
a/test/org/apache/catalina/servlets/TestDefaultServletEncodingWithoutBom.java
+++
b/test/org/apache/catalina/servlets/TestDefaultServletEncodingWithoutBom.java
@@ -16,11 +16,12 @@
*/
package org.apache.catalina.servlets;
+import org.apache.catalina.servlets.DefaultServlet.BomConfig;
public class TestDefaultServletEncodingWithoutBom extends
DefaultServletEncodingBaseTest {
@Override
- protected boolean getUseBom() {
- return false;
+ protected BomConfig getUseBom() {
+ return BomConfig.FALSE;
}
}
diff --git a/webapps/docs/changelog.xml b/webapps/docs/changelog.xml
index 2246907..f0a0164 100644
--- a/webapps/docs/changelog.xml
+++ b/webapps/docs/changelog.xml
@@ -65,6 +65,7 @@
<!ELEMENT bug (#PCDATA)>
<!ELEMENT rev (#PCDATA)>
+ <!ELEMENT pr (#PCDATA)>
<!-- Random HTML markup tags. Add more here as needed. -->
<!ELEMENT a (#PCDATA)>
@@ -110,6 +111,14 @@
<bug>65308</bug>: NPE in JNDIRealm when no
<code>userRoleAttribute</code>
is given. (fschumacher)
</fix>
+ <add>
+ <pr>418</pr>: Add a new option, <code>pass-through</code>, to the
+ default servlet's <code>useBomIfPresent</code> initialization parameter
+ that causes the default servlet to leave any BOM in place when
+ processing a static file and not to use the BOM to determine the
+ encoding of the file. Based on a pull request by Jean-Louis Monteiro.
+ (markt)
+ </add>
</changelog>
</subsection>
<subsection name="Coyote">
diff --git a/webapps/docs/default-servlet.xml b/webapps/docs/default-servlet.xml
index 1136d06..d5e5cb5 100644
--- a/webapps/docs/default-servlet.xml
+++ b/webapps/docs/default-servlet.xml
@@ -180,7 +180,11 @@ Tomcat.</p>
</property>
<property name="useBomIfPresent">
If a static file contains a byte order mark (BOM), should this be used
- to determine the file encoding in preference to fileEncoding. [true]
+ to determine the file encoding in preference to fileEncoding. This
+ setting must be one of <code>true</code> (remove the BOM and use it in
+ preference to fileEncoding), <code>false</code> (remove the BOM but do
+ not use it) or <code>pass-through</code> (do not use the BOM and do not
+ remove it). [true]
</property>
<property name="sendfileSize">
If the connector used supports sendfile, this represents the minimal
diff --git a/webapps/docs/tomcat-docs.xsl b/webapps/docs/tomcat-docs.xsl
index a940896..29af97f 100644
--- a/webapps/docs/tomcat-docs.xsl
+++ b/webapps/docs/tomcat-docs.xsl
@@ -43,6 +43,7 @@
<xsl:param name="build-date-iso-8601" select="'yyyy-MM-dd'"/>
<xsl:param name="year" select="'yyyy'"/>
<xsl:param name="buglink"
select="'https://bz.apache.org/bugzilla/show_bug.cgi?id='"/>
+ <xsl:param name="prlink"
select="'https://github.com/apache/tomcat/pull/'"/>
<xsl:param name="revlink"
select="'https://svn.apache.org/viewvc?view=rev&rev='"/>
<xsl:param name="doclink"
select="'https://tomcat.apache.org/tomcat-9.0-doc'"/>
<xsl:param name="sylink"
select="'https://tomcat.apache.org/security-9.html'"/>
@@ -388,6 +389,12 @@
<a href="{$link}"><xsl:apply-templates/></a>
</xsl:template>
+ <!-- Link to a pull request -->
+ <xsl:template match="pr">
+ <xsl:variable name="link"><xsl:value-of select="$prlink"/><xsl:value-of
select="text()"/></xsl:variable>
+ <a href="{$link}">#<xsl:apply-templates/></a>
+ </xsl:template>
+
<!-- Link to a SVN revision report -->
<xsl:template match="rev">
<xsl:variable name="link"><xsl:value-of select="$revlink"/><xsl:value-of
select="text()"/></xsl:variable>
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]