This is an automated email from the ASF dual-hosted git repository.

markt pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/tomcat.git


The following commit(s) were added to refs/heads/master by this push:
     new ea00bb5  Fix #418 - add "pass-through" as an option for useBomIfPresent
ea00bb5 is described below

commit ea00bb55348faad97db3f5f70de0efad036fec17
Author: Mark Thomas <ma...@apache.org>
AuthorDate: Wed May 19 12:17:17 2021 +0100

    Fix #418 - add "pass-through" as an option for useBomIfPresent
    
    Based on a PR by Jean-Louis Monteiro
    Also adds support for the <pr> element in the change log
---
 .../apache/catalina/servlets/DefaultServlet.java   | 84 +++++++++++++++++-----
 .../catalina/servlets/LocalStrings.properties      |  1 +
 .../servlets/DefaultServletEncodingBaseTest.java   | 28 +++++---
 ... TestDefaultServletEncodingPassThroughBom.java} |  7 +-
 .../TestDefaultServletEncodingWithBom.java         |  5 +-
 .../TestDefaultServletEncodingWithoutBom.java      |  5 +-
 webapps/docs/changelog.xml                         |  9 +++
 webapps/docs/default-servlet.xml                   |  6 +-
 webapps/docs/tomcat-docs.xsl                       |  7 ++
 9 files changed, 117 insertions(+), 35 deletions(-)

diff --git a/java/org/apache/catalina/servlets/DefaultServlet.java 
b/java/org/apache/catalina/servlets/DefaultServlet.java
index 05ceabd..cff8f88 100644
--- a/java/org/apache/catalina/servlets/DefaultServlet.java
+++ b/java/org/apache/catalina/servlets/DefaultServlet.java
@@ -246,8 +246,9 @@ public class DefaultServlet extends HttpServlet {
 
     /**
      * If a file has a BOM, should that be used in preference to fileEncoding?
+     * Will default to {@link BomConfig#TRUE} in {@link #init()}.
      */
-    private boolean useBomIfPresent = true;
+    private BomConfig useBomIfPresent = null;
 
     /**
      * Minimum size for sendfile usage in bytes.
@@ -335,8 +336,23 @@ public class DefaultServlet extends HttpServlet {
             }
         }
 
-        if (getServletConfig().getInitParameter("useBomIfPresent") != null) {
-            useBomIfPresent = 
Boolean.parseBoolean(getServletConfig().getInitParameter("useBomIfPresent"));
+        String useBomIfPresent = 
getServletConfig().getInitParameter("useBomIfPresent");
+        if (useBomIfPresent == null) {
+            // Use default
+            this.useBomIfPresent = BomConfig.TRUE;
+        } else {
+            for (BomConfig bomConfig : BomConfig.values()) {
+                if 
(bomConfig.configurationValue.equalsIgnoreCase(useBomIfPresent)) {
+                    this.useBomIfPresent = bomConfig;
+                    break;
+                }
+            }
+            if (this.useBomIfPresent == null) {
+                // Unrecognised configuration value
+                IllegalArgumentException iae = new IllegalArgumentException(
+                        sm.getString("defaultServlet.unknownBomConfig", 
useBomIfPresent));
+                throw new ServletException(iae);
+            }
         }
 
         globalXsltFile = getServletConfig().getInitParameter("globalXsltFile");
@@ -1083,8 +1099,8 @@ public class DefaultServlet extends HttpServlet {
                             if (!renderResult.markSupported()) {
                                 renderResult = new 
BufferedInputStream(renderResult);
                             }
-                            Charset bomCharset = processBom(renderResult);
-                            if (bomCharset != null && useBomIfPresent) {
+                            Charset bomCharset = processBom(renderResult, 
useBomIfPresent.stripBom);
+                            if (bomCharset != null && 
useBomIfPresent.useBomEncoding) {
                                 inputEncoding = bomCharset.name();
                             }
                         }
@@ -1105,8 +1121,8 @@ public class DefaultServlet extends HttpServlet {
                             if (!source.markSupported()) {
                                 source = new BufferedInputStream(source);
                             }
-                            Charset bomCharset = processBom(source);
-                            if (bomCharset != null && useBomIfPresent) {
+                            Charset bomCharset = processBom(source, 
useBomIfPresent.stripBom);
+                            if (bomCharset != null && 
useBomIfPresent.useBomEncoding) {
                                 inputEncoding = bomCharset.name();
                             }
                             // Following test also ensures included resources
@@ -1217,7 +1233,7 @@ public class DefaultServlet extends HttpServlet {
     /*
      * Code borrowed heavily from Jasper's EncodingDetector
      */
-    private static Charset processBom(InputStream is) throws IOException {
+    private static Charset processBom(InputStream is, boolean stripBom) throws 
IOException {
         // Java supported character sets do not use BOMs longer than 4 bytes
         byte[] bom = new byte[4];
         is.mark(bom.length);
@@ -1226,7 +1242,7 @@ public class DefaultServlet extends HttpServlet {
 
         // BOMs are at least 2 bytes
         if (count < 2) {
-            skip(is, 0);
+            skip(is, 0, stripBom);
             return null;
         }
 
@@ -1234,31 +1250,31 @@ public class DefaultServlet extends HttpServlet {
         int b0 = bom[0] & 0xFF;
         int b1 = bom[1] & 0xFF;
         if (b0 == 0xFE && b1 == 0xFF) {
-            skip(is, 2);
+            skip(is, 2, stripBom);
             return StandardCharsets.UTF_16BE;
         }
         // Delay the UTF_16LE check if there are more that 2 bytes since it
         // overlaps with UTF-32LE.
         if (count == 2 && b0 == 0xFF && b1 == 0xFE) {
-            skip(is, 2);
+            skip(is, 2, stripBom);
             return StandardCharsets.UTF_16LE;
         }
 
         // Remaining BOMs are at least 3 bytes
         if (count < 3) {
-            skip(is, 0);
+            skip(is, 0, stripBom);
             return null;
         }
 
         // UTF-8 is only 3-byte BOM
         int b2 = bom[2] & 0xFF;
         if (b0 == 0xEF && b1 == 0xBB && b2 == 0xBF) {
-            skip(is, 3);
+            skip(is, 3, stripBom);
             return StandardCharsets.UTF_8;
         }
 
         if (count < 4) {
-            skip(is, 0);
+            skip(is, 0, stripBom);
             return null;
         }
 
@@ -1275,19 +1291,21 @@ public class DefaultServlet extends HttpServlet {
         // won't see a UTF16-LE file with a BOM where the first real data is
         // 0x00 0x00
         if (b0 == 0xFF && b1 == 0xFE) {
-            skip(is, 2);
+            skip(is, 2, stripBom);
             return StandardCharsets.UTF_16LE;
         }
 
-        skip(is, 0);
+        skip(is, 0, stripBom);
         return null;
     }
 
 
-    private static void skip(InputStream is, int skip) throws IOException {
+    private static void skip(InputStream is, int skip, boolean stripBom) 
throws IOException {
         is.reset();
-        while (skip-- > 0) {
-            is.read();
+        if (stripBom) {
+            while (skip-- > 0) {
+                is.read();
+            }
         }
     }
 
@@ -2941,4 +2959,32 @@ public class DefaultServlet extends HttpServlet {
                 return c;
         }
     }
+
+    static enum BomConfig {
+        /**
+         * BoM is stripped if present and any BoM found used to determine the
+         * encoding used to read the resource.
+         */
+        TRUE("true", true, true),
+        /**
+         * BoM is stripped if present but the configured file encoding is used
+         * to read the resource.
+         */
+        FALSE("false", true, false),
+        /**
+         * BoM is not stripped and the configured file encoding is used to read
+         * the resource.
+         */
+        PASS_THROUGH("pass-through", false, false);
+
+        final String configurationValue;
+        final boolean stripBom;
+        final boolean useBomEncoding;
+
+        private BomConfig(String configurationValue, boolean stripBom, boolean 
useBomEncoding) {
+            this.configurationValue = configurationValue;
+            this.stripBom = stripBom;
+            this.useBomEncoding = useBomEncoding;
+        }
+    }
 }
diff --git a/java/org/apache/catalina/servlets/LocalStrings.properties 
b/java/org/apache/catalina/servlets/LocalStrings.properties
index 226a3ae..1ce4990 100644
--- a/java/org/apache/catalina/servlets/LocalStrings.properties
+++ b/java/org/apache/catalina/servlets/LocalStrings.properties
@@ -42,6 +42,7 @@ defaultServlet.missingResource=The requested resource [{0}] 
is not available
 defaultServlet.noResources=No static resources were found
 defaultServlet.readerCloseFailed=Failed to close reader
 defaultServlet.skipfail=Read failed because only [{0}] bytes were available 
but needed to skip [{1}] bytes to reach the start of the requested range
+defaultServlet.unknownBomConfig=Unrecognised value of [{0}] provided for 
useBomIfPresent initialization parameter
 defaultServlet.xslError=XSL transformer error
 
 directory.filename=Filename
diff --git 
a/test/org/apache/catalina/servlets/DefaultServletEncodingBaseTest.java 
b/test/org/apache/catalina/servlets/DefaultServletEncodingBaseTest.java
index caeb286..2143fac 100644
--- a/test/org/apache/catalina/servlets/DefaultServletEncodingBaseTest.java
+++ b/test/org/apache/catalina/servlets/DefaultServletEncodingBaseTest.java
@@ -40,6 +40,7 @@ import org.junit.runners.Parameterized.Parameter;
 
 import org.apache.catalina.Context;
 import org.apache.catalina.Wrapper;
+import org.apache.catalina.servlets.DefaultServlet.BomConfig;
 import org.apache.catalina.startup.Tomcat;
 import org.apache.catalina.startup.TomcatBaseTest;
 import org.apache.tomcat.util.buf.B2CConverter;
@@ -109,16 +110,27 @@ public abstract class DefaultServletEncodingBaseTest 
extends TomcatBaseTest {
     }
 
 
-    private static boolean getExpected(String fileEncoding, boolean useBom, 
String targetFile,
+    private static boolean getExpected(String fileEncoding, BomConfig 
bomConfig, String targetFile,
             String outputEncoding, boolean callSetCharacterEncoding, boolean 
useWriter) {
-        if (useWriter || callSetCharacterEncoding) {
+        if (targetFile.endsWith("-bom") && !bomConfig.stripBom) {
+            /*
+             * If the target file contains a BOM and the BOM is not stripped 
the
+             * test should always fail because BOM will be kept untouched
+             * and may result in a different character in the output
+             *
+             * We could differentiate iso-8859-1 and cp1252 but we would need 
logic here and in
+             * the asserts and that would make the logic in the test as 
complex as the code
+             * under test.
+             */
+            return false;
+        } else if (useWriter || callSetCharacterEncoding) {
             /*
              * Using a writer or setting the output character encoding means 
the
              * response will specify a character set. These cases therefore
              * reduce to can the file be read with the correct encoding.
              * (Assuming any BOM is always skipped in the included output.)
              */
-            if (targetFile.endsWith("-bom") && useBom ||
+            if (targetFile.endsWith("-bom") && bomConfig.useBomEncoding ||
                     targetFile.startsWith(fileEncoding) ||
                     targetFile.equals("cp1252") && 
fileEncoding.equals("iso-8859-1") ||
                     targetFile.equals("iso-8859-1") && 
fileEncoding.equals("cp1252")) {
@@ -160,7 +172,7 @@ public abstract class DefaultServletEncodingBaseTest 
extends TomcatBaseTest {
     public boolean useWriter;
 
 
-    protected abstract boolean getUseBom();
+    protected abstract BomConfig getUseBom();
 
 
     @Test
@@ -176,7 +188,7 @@ public abstract class DefaultServletEncodingBaseTest 
extends TomcatBaseTest {
         ctxt.setResponseCharacterEncoding(contextResponseEncoding);
         Wrapper defaultServlet = Tomcat.addServlet(ctxt, "default", 
DefaultServlet.class.getName());
         defaultServlet.addInitParameter("fileEncoding", fileEncoding);
-        defaultServlet.addInitParameter("useBomIfPresent", 
Boolean.toString(getUseBom()));
+        defaultServlet.addInitParameter("useBomIfPresent", 
getUseBom().configurationValue);
 
         ctxt.addServletMappingDecoded("/", "default");
 
@@ -214,10 +226,10 @@ public abstract class DefaultServletEncodingBaseTest 
extends TomcatBaseTest {
         }
         String body = res.toString();
         /*
-         * Remove BOM before checking content
-         * BOM (should be) removed by Tomcat when file is included
+         * Remove BOM before checking content if DefaultServlet is configured 
to
+         * remove BOM.
          */
-        if (!useInclude && targetFile.endsWith("-bom")) {
+        if (!useInclude && targetFile.endsWith("-bom") && 
getUseBom().stripBom) {
             body = body.substring(1);
         }
 
diff --git 
a/test/org/apache/catalina/servlets/TestDefaultServletEncodingWithoutBom.java 
b/test/org/apache/catalina/servlets/TestDefaultServletEncodingPassThroughBom.java
similarity index 78%
copy from 
test/org/apache/catalina/servlets/TestDefaultServletEncodingWithoutBom.java
copy to 
test/org/apache/catalina/servlets/TestDefaultServletEncodingPassThroughBom.java
index 9deec1d..48006cd 100644
--- 
a/test/org/apache/catalina/servlets/TestDefaultServletEncodingWithoutBom.java
+++ 
b/test/org/apache/catalina/servlets/TestDefaultServletEncodingPassThroughBom.java
@@ -16,11 +16,12 @@
  */
 package org.apache.catalina.servlets;
 
+import org.apache.catalina.servlets.DefaultServlet.BomConfig;
 
-public class TestDefaultServletEncodingWithoutBom extends 
DefaultServletEncodingBaseTest {
+public class TestDefaultServletEncodingPassThroughBom extends 
DefaultServletEncodingBaseTest {
 
     @Override
-    protected boolean getUseBom() {
-        return false;
+    protected BomConfig getUseBom() {
+        return BomConfig.PASS_THROUGH;
     }
 }
diff --git 
a/test/org/apache/catalina/servlets/TestDefaultServletEncodingWithBom.java 
b/test/org/apache/catalina/servlets/TestDefaultServletEncodingWithBom.java
index 46bd78f..dccbfd3 100644
--- a/test/org/apache/catalina/servlets/TestDefaultServletEncodingWithBom.java
+++ b/test/org/apache/catalina/servlets/TestDefaultServletEncodingWithBom.java
@@ -16,11 +16,12 @@
  */
 package org.apache.catalina.servlets;
 
+import org.apache.catalina.servlets.DefaultServlet.BomConfig;
 
 public class TestDefaultServletEncodingWithBom extends 
DefaultServletEncodingBaseTest {
 
     @Override
-    protected boolean getUseBom() {
-        return true;
+    protected BomConfig getUseBom() {
+        return BomConfig.TRUE;
     }
 }
diff --git 
a/test/org/apache/catalina/servlets/TestDefaultServletEncodingWithoutBom.java 
b/test/org/apache/catalina/servlets/TestDefaultServletEncodingWithoutBom.java
index 9deec1d..0d6bae0 100644
--- 
a/test/org/apache/catalina/servlets/TestDefaultServletEncodingWithoutBom.java
+++ 
b/test/org/apache/catalina/servlets/TestDefaultServletEncodingWithoutBom.java
@@ -16,11 +16,12 @@
  */
 package org.apache.catalina.servlets;
 
+import org.apache.catalina.servlets.DefaultServlet.BomConfig;
 
 public class TestDefaultServletEncodingWithoutBom extends 
DefaultServletEncodingBaseTest {
 
     @Override
-    protected boolean getUseBom() {
-        return false;
+    protected BomConfig getUseBom() {
+        return BomConfig.FALSE;
     }
 }
diff --git a/webapps/docs/changelog.xml b/webapps/docs/changelog.xml
index 173e8cd..aab44fb 100644
--- a/webapps/docs/changelog.xml
+++ b/webapps/docs/changelog.xml
@@ -65,6 +65,7 @@
 
   <!ELEMENT bug (#PCDATA)>
   <!ELEMENT rev (#PCDATA)>
+  <!ELEMENT pr (#PCDATA)>
 
   <!-- Random HTML markup tags. Add more here as needed. -->
   <!ELEMENT a (#PCDATA)>
@@ -110,6 +111,14 @@
         <bug>65308</bug>: NPE in JNDIRealm when no 
<code>userRoleAttribute</code>
         is given. (fschumacher)
       </fix>
+      <add>
+        <pr>418</pr>: Add a new option, <code>pass-through</code>, to the
+        default servlet's <code>useBomIfPresent</code> initialization parameter
+        that causes the default servlet to leave any BOM in place when
+        processing a static file and not to use the BOM to determine the
+        encoding of the file. Based on a pull request by Jean-Louis Monteiro.
+        (markt)
+      </add>
     </changelog>
   </subsection>
   <subsection name="Coyote">
diff --git a/webapps/docs/default-servlet.xml b/webapps/docs/default-servlet.xml
index 1136d06..d5e5cb5 100644
--- a/webapps/docs/default-servlet.xml
+++ b/webapps/docs/default-servlet.xml
@@ -180,7 +180,11 @@ Tomcat.</p>
   </property>
   <property name="useBomIfPresent">
         If a static file contains a byte order mark (BOM), should this be used
-        to determine the file encoding in preference to fileEncoding. [true]
+        to determine the file encoding in preference to fileEncoding. This
+        setting must be one of <code>true</code> (remove the BOM and use it in
+        preference to fileEncoding), <code>false</code> (remove the BOM but do
+        not use it) or <code>pass-through</code> (do not use the BOM and do not
+        remove it). [true]
   </property>
   <property name="sendfileSize">
         If the connector used supports sendfile, this represents the minimal
diff --git a/webapps/docs/tomcat-docs.xsl b/webapps/docs/tomcat-docs.xsl
index e7c6cf0..8aa9e9d 100644
--- a/webapps/docs/tomcat-docs.xsl
+++ b/webapps/docs/tomcat-docs.xsl
@@ -43,6 +43,7 @@
   <xsl:param    name="build-date-iso-8601" select="'yyyy-MM-dd'"/>
   <xsl:param    name="year"                select="'yyyy'"/>
   <xsl:param    name="buglink"             
select="'https://bz.apache.org/bugzilla/show_bug.cgi?id='"/>
+  <xsl:param    name="prlink"              
select="'https://github.com/apache/tomcat/pull/'"/>
   <xsl:param    name="revlink"             
select="'https://svn.apache.org/viewvc?view=rev&amp;rev='"/>
   <xsl:param    name="doclink"             
select="'https://tomcat.apache.org/tomcat-10.0-doc'"/>
   <xsl:param    name="sylink"              
select="'https://tomcat.apache.org/security-10.html'"/>
@@ -388,6 +389,12 @@
     <a href="{$link}"><xsl:apply-templates/></a>
   </xsl:template>
 
+  <!-- Link to a pull request -->
+  <xsl:template match="pr">
+    <xsl:variable name="link"><xsl:value-of select="$prlink"/><xsl:value-of 
select="text()"/></xsl:variable>
+    <a href="{$link}">#<xsl:apply-templates/></a>
+  </xsl:template>
+
   <!-- Link to a SVN revision report -->
   <xsl:template match="rev">
     <xsl:variable name="link"><xsl:value-of select="$revlink"/><xsl:value-of 
select="text()"/></xsl:variable>

---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscr...@tomcat.apache.org
For additional commands, e-mail: dev-h...@tomcat.apache.org

Reply via email to