This is an automated email from the ASF dual-hosted git repository.

markt pushed a commit to branch 8.5.x
in repository https://gitbox.apache.org/repos/asf/tomcat.git

commit 486e49e7c9e36c3adb93f1ec684c7d3d4a59fe69
Author: Mark Thomas <ma...@apache.org>
AuthorDate: Mon Jun 24 13:38:12 2019 +0100

    Fix https://bz.apache.org/bugzilla/show_bug.cgi?id=49464
    
    Improve the Default Servlet's handling of static files when the file
    encoding is not compatible with the required response encoding.
---
 conf/web.xml                                       |   5 +
 java/org/apache/catalina/core/StandardContext.java |  12 +-
 .../apache/catalina/servlets/DefaultServlet.java   | 237 ++++++++++++++---
 .../servlets/DefaultServletEncodingBaseTest.java   | 284 +++++++++++++++++++++
 .../catalina/servlets/TestDefaultServlet.java      |  48 ++--
 .../TestDefaultServletEncodingWithBom.java         |  26 ++
 .../TestDefaultServletEncodingWithoutBom.java      |  26 ++
 test/webapp/404.html                               |   1 +
 test/webapp/bug49nnn/bug49464-cp1252.txt           |   1 +
 test/webapp/bug49nnn/bug49464-ibm850.txt           |   1 +
 test/webapp/bug49nnn/bug49464-iso-8859-1.txt       |   1 +
 test/webapp/bug49nnn/bug49464-utf-8-bom.txt        |   1 +
 test/webapp/bug49nnn/bug49464-utf-8.txt            |   1 +
 webapps/docs/changelog.xml                         |   5 +
 14 files changed, 585 insertions(+), 64 deletions(-)

diff --git a/conf/web.xml b/conf/web.xml
index 0a34838..4d207ee 100644
--- a/conf/web.xml
+++ b/conf/web.xml
@@ -48,6 +48,11 @@
   <!--   fileEncoding        Encoding to be used to read static resources   -->
   <!--                       [platform default]                             -->
   <!--                                                                      -->
+  <!--   useBomIfPresent     If a static file contains a byte order mark    -->
+  <!--                       (BOM), should this be used to determine the    -->
+  <!--                       file encoding in preference to fileEncoding.   -->
+  <!--                       [true]                                         -->
+  <!--                                                                      -->
   <!--   input               Input buffer size (in bytes) when reading      -->
   <!--                       resources to be served.  [2048]                -->
   <!--                                                                      -->
diff --git a/java/org/apache/catalina/core/StandardContext.java 
b/java/org/apache/catalina/core/StandardContext.java
index e425076..c7803f2 100644
--- a/java/org/apache/catalina/core/StandardContext.java
+++ b/java/org/apache/catalina/core/StandardContext.java
@@ -882,7 +882,17 @@ public class StandardContext extends ContainerBase
 
     @Override
     public void setResponseCharacterEncoding(String responseEncoding) {
-        this.responseEncoding = responseEncoding;
+        /*
+         * This ensures that the context response encoding is represented by a
+         * unique String object. This enables the Default Servlet to
+         * differentiate between a Response using this default encoding and one
+         * that has been explicitly configured.
+         */
+        if (responseEncoding == null) {
+            this.responseEncoding = null;
+        } else {
+            this.responseEncoding = new String(responseEncoding);
+        }
     }
 
 
diff --git a/java/org/apache/catalina/servlets/DefaultServlet.java 
b/java/org/apache/catalina/servlets/DefaultServlet.java
index dd0bfbc..c99afe1 100644
--- a/java/org/apache/catalina/servlets/DefaultServlet.java
+++ b/java/org/apache/catalina/servlets/DefaultServlet.java
@@ -32,6 +32,8 @@ import java.io.Reader;
 import java.io.Serializable;
 import java.io.StringReader;
 import java.io.StringWriter;
+import java.io.UnsupportedEncodingException;
+import java.nio.charset.Charset;
 import java.nio.charset.StandardCharsets;
 import java.security.AccessController;
 import java.util.ArrayList;
@@ -76,6 +78,7 @@ import org.apache.catalina.util.IOTools;
 import org.apache.catalina.util.ServerInfo;
 import org.apache.catalina.util.URLEncoder;
 import org.apache.catalina.webresources.CachedResource;
+import org.apache.tomcat.util.buf.B2CConverter;
 import org.apache.tomcat.util.http.ResponseUtil;
 import org.apache.tomcat.util.res.StringManager;
 import org.apache.tomcat.util.security.Escape;
@@ -242,6 +245,12 @@ public class DefaultServlet extends HttpServlet {
      * the platform default is used.
      */
     protected String fileEncoding = null;
+    private transient Charset fileEncodingCharset = null;
+
+    /**
+     * If a file has a BOM, should that be used in preference to fileEncoding?
+     */
+    private boolean useBomIfPresent = true;
 
     /**
      * Minimum size for sendfile usage in bytes.
@@ -308,6 +317,20 @@ public class DefaultServlet extends HttpServlet {
                 
Integer.parseInt(getServletConfig().getInitParameter("sendfileSize")) * 1024;
 
         fileEncoding = getServletConfig().getInitParameter("fileEncoding");
+        if (fileEncoding == null) {
+            fileEncodingCharset = Charset.defaultCharset();
+            fileEncoding = fileEncodingCharset.name();
+        } else {
+            try {
+                fileEncodingCharset = B2CConverter.getCharset(fileEncoding);
+            } catch (UnsupportedEncodingException e) {
+                throw new ServletException(e);
+            }
+        }
+
+        if (getServletConfig().getInitParameter("useBomIfPresent") != null)
+            useBomIfPresent = Boolean.parseBoolean(
+                    getServletConfig().getInitParameter("useBomIfPresent"));
 
         globalXsltFile = getServletConfig().getInitParameter("globalXsltFile");
         contextXsltFile = 
getServletConfig().getInitParameter("contextXsltFile");
@@ -759,11 +782,11 @@ public class DefaultServlet extends HttpServlet {
     /**
      * Serve the specified resource, optionally including the data content.
      *
-     * @param request  The servlet request we are processing
-     * @param response The servlet response we are creating
-     * @param content  Should the content be included?
-     * @param encoding The encoding to use if it is necessary to access the
-     *                 source as characters rather than as bytes
+     * @param request       The servlet request we are processing
+     * @param response      The servlet response we are creating
+     * @param content       Should the content be included?
+     * @param inputEncoding The encoding to use if it is necessary to access 
the
+     *                      source as characters rather than as bytes
      *
      * @exception IOException if an input/output error occurs
      * @exception ServletException if a servlet-specified error occurs
@@ -771,7 +794,7 @@ public class DefaultServlet extends HttpServlet {
     protected void serveResource(HttpServletRequest request,
                                  HttpServletResponse response,
                                  boolean content,
-                                 String encoding)
+                                 String inputEncoding)
         throws IOException, ServletException {
 
         boolean serveContent = content;
@@ -945,12 +968,7 @@ public class DefaultServlet extends HttpServlet {
             } catch (IllegalStateException e) {
                 // If it fails, we try to get a Writer instead if we're
                 // trying to serve a text file
-                if (!usingPrecompressedVersion &&
-                        ((contentType == null) ||
-                                (contentType.startsWith("text")) ||
-                                (contentType.endsWith("xml")) ||
-                                (contentType.contains("/javascript")))
-                        ) {
+                if (!usingPrecompressedVersion && isText(contentType)) {
                     writer = response.getWriter();
                     // Cannot reliably serve partial content with a Writer
                     ranges = FULL;
@@ -975,6 +993,32 @@ public class DefaultServlet extends HttpServlet {
             ranges = FULL;
         }
 
+        String outputEncoding = response.getCharacterEncoding();
+        Charset charset = B2CConverter.getCharset(outputEncoding);
+        boolean conversionRequired;
+        /*
+         * The test below deliberately uses != to compare two Strings. This is
+         * because the code is looking to see if the default character encoding
+         * has been returned because no explicit character encoding has been
+         * defined. There is no clean way of doing this via the Servlet API. It
+         * would be possible to add a Tomcat specific API but that would 
require
+         * quite a bit of code to get to the Tomcat specific request object 
that
+         * may have been wrapped. The != test is a (slightly hacky) quick way 
of
+         * doing this.
+         */
+        boolean outputEncodingSpecified =
+                outputEncoding != 
org.apache.coyote.Constants.DEFAULT_BODY_CHARSET.name() &&
+                outputEncoding != 
resources.getContext().getResponseCharacterEncoding();
+        if (!usingPrecompressedVersion && isText(contentType) && 
outputEncodingSpecified &&
+                !charset.equals(fileEncodingCharset)) {
+            conversionRequired = true;
+            // Conversion often results fewer/more/different bytes.
+            // That does not play nicely with range requests.
+            ranges = FULL;
+        } else {
+            conversionRequired = false;
+        }
+
         if (resource.isDirectory() ||
                 isError ||
                 ( (ranges == null || ranges.isEmpty())
@@ -997,8 +1041,8 @@ public class DefaultServlet extends HttpServlet {
                     log("DefaultServlet.serveFile:  contentLength=" +
                         contentLength);
                 // Don't set a content length if something else has already
-                // written to the response.
-                if (contentWritten == 0) {
+                // written to the response or if conversion will be taking 
place
+                if (contentWritten == 0 && !conversionRequired) {
                     response.setContentLengthLong(contentLength);
                 }
             }
@@ -1014,34 +1058,72 @@ public class DefaultServlet extends HttpServlet {
                     // Output via a writer so can't use sendfile or write
                     // content directly.
                     if (resource.isDirectory()) {
-                        renderResult = render(request, getPathPrefix(request), 
resource, encoding);
+                        renderResult = render(request, getPathPrefix(request), 
resource, inputEncoding);
                     } else {
                         renderResult = resource.getInputStream();
+                        if (included) {
+                            // Need to make sure any BOM is removed
+                            if (!renderResult.markSupported()) {
+                                renderResult = new 
BufferedInputStream(renderResult);
+                            }
+                            Charset bomCharset = processBom(renderResult);
+                            if (bomCharset != null && useBomIfPresent) {
+                                inputEncoding = bomCharset.name();
+                            }
+                        }
                     }
-                    copy(renderResult, writer, encoding);
+                    copy(renderResult, writer, inputEncoding);
                 } else {
-                    // Output is via an InputStream
+                    // Output is via an OutputStream
                     if (resource.isDirectory()) {
-                        renderResult = render(request, getPathPrefix(request), 
resource, encoding);
+                        renderResult = render(request, getPathPrefix(request), 
resource, inputEncoding);
                     } else {
-                        if (!checkSendfile(request, response, resource, 
contentLength, null)) {
-                            // sendfile not possible so check if resource
-                            // content is available directly via
-                            // CachedResource. Do not want to call
-                            // getContent() on other resource
-                            // implementations as that could trigger loading
-                            // the contents of a very large file into memory
-                            byte[] resourceBody = null;
-                            if (resource instanceof CachedResource) {
-                                resourceBody = resource.getContent();
+                        // Output is content of resource
+                        // Check to see if conversion is required
+                        if (conversionRequired || included) {
+                            // When including a file, we need to check for a 
BOM
+                            // to determine if a conversion is required, so we
+                            // might as well always convert
+                            InputStream source = resource.getInputStream();
+                            if (!source.markSupported()) {
+                                source = new BufferedInputStream(source);
+                            }
+                            Charset bomCharset = processBom(source);
+                            if (bomCharset != null && useBomIfPresent) {
+                                inputEncoding = bomCharset.name();
                             }
-                            if (resourceBody == null) {
-                                // Resource content not directly available,
-                                // use InputStream
-                                renderResult = resource.getInputStream();
+                            // Following test also ensures included resources
+                            // are converted if an explicit output encoding was
+                            // specified
+                            if (outputEncodingSpecified) {
+                                OutputStreamWriter osw = new 
OutputStreamWriter(ostream, charset);
+                                PrintWriter pw = new PrintWriter(osw);
+                                copy(source, pw, inputEncoding);
+                                pw.flush();
                             } else {
-                                // Use the resource content directly
-                                ostream.write(resourceBody);
+                                // Just included but no conversion
+                                renderResult = source;
+                            }
+                        } else {
+                            if (!checkSendfile(request, response, resource, 
contentLength, null)) {
+                                // sendfile not possible so check if resource
+                                // content is available directly via
+                                // CachedResource. Do not want to call
+                                // getContent() on other resource
+                                // implementations as that could trigger 
loading
+                                // the contents of a very large file into 
memory
+                                byte[] resourceBody = null;
+                                if (resource instanceof CachedResource) {
+                                    resourceBody = resource.getContent();
+                                }
+                                if (resourceBody == null) {
+                                    // Resource content not directly available,
+                                    // use InputStream
+                                    renderResult = resource.getInputStream();
+                                } else {
+                                    // Use the resource content directly
+                                    ostream.write(resourceBody);
+                                }
                             }
                         }
                     }
@@ -1114,6 +1196,91 @@ public class DefaultServlet extends HttpServlet {
         }
     }
 
+
+    /*
+     * Code borrowed heavily from Jasper's EncodingDetector
+     */
+    private static Charset processBom(InputStream is) throws IOException {
+        // Java supported character sets do not use BOMs longer than 4 bytes
+        byte[] bom = new byte[4];
+        is.mark(bom.length);
+
+        int count = is.read(bom);
+
+        // BOMs are at least 2 bytes
+        if (count < 2) {
+            skip(is, 0);
+            return null;
+        }
+
+        // Look for two byte BOMs
+        int b0 = bom[0] & 0xFF;
+        int b1 = bom[1] & 0xFF;
+        if (b0 == 0xFE && b1 == 0xFF) {
+            skip(is, 2);
+            return StandardCharsets.UTF_16BE;
+        }
+        // Delay the UTF_16LE check if there are more that 2 bytes since it
+        // overlaps with UTF-32LE.
+        if (count == 2 && b0 == 0xFF && b1 == 0xFE) {
+            skip(is, 2);
+            return StandardCharsets.UTF_16LE;
+        }
+
+        // Remaining BOMs are at least 3 bytes
+        if (count < 3) {
+            skip(is, 0);
+            return null;
+        }
+
+        // UTF-8 is only 3-byte BOM
+        int b2 = bom[2] & 0xFF;
+        if (b0 == 0xEF && b1 == 0xBB && b2 == 0xBF) {
+            skip(is, 3);
+            return StandardCharsets.UTF_8;
+        }
+
+        if (count < 4) {
+            skip(is, 0);
+            return null;
+        }
+
+        // Look for 4-byte BOMs
+        int b3 = bom[3] & 0xFF;
+        if (b0 == 0x00 && b1 == 0x00 && b2 == 0xFE && b3 == 0xFF) {
+            return Charset.forName("UTF-32BE");
+        }
+        if (b0 == 0xFF && b1 == 0xFE && b2 == 0x00 && b3 == 0x00) {
+            return Charset.forName("UTF-32LE");
+        }
+
+        // Now we can check for UTF16-LE. There is an assumption here that we
+        // won't see a UTF16-LE file with a BOM where the first real data is
+        // 0x00 0x00
+        if (b0 == 0xFF && b1 == 0xFE) {
+            skip(is, 2);
+            return StandardCharsets.UTF_16LE;
+        }
+
+        skip(is, 0);
+        return null;
+    }
+
+
+    private static void skip(InputStream is, int skip) throws IOException {
+        is.reset();
+        while (skip-- > 0) {
+            is.read();
+        }
+    }
+
+
+    private static boolean isText(String contentType) {
+        return  contentType == null || contentType.startsWith("text") ||
+                contentType.endsWith("xml") || 
contentType.contains("/javascript");
+    }
+
+
     private boolean pathEndsWithCompressedExtension(String path) {
         for (CompressionFormat format : compressionFormats) {
             if (path.endsWith(format.extension)) {
@@ -1434,7 +1601,7 @@ public class DefaultServlet extends HttpServlet {
      * Decide which way to render. HTML or XML.
      *
      * @param contextPath The path
-     * @param resource The resource
+     * @param resource    The resource
      * @param encoding    The encoding to use to process the readme (if any)
      *
      * @return the input stream with the rendered output
diff --git 
a/test/org/apache/catalina/servlets/DefaultServletEncodingBaseTest.java 
b/test/org/apache/catalina/servlets/DefaultServletEncodingBaseTest.java
new file mode 100644
index 0000000..6ff4849
--- /dev/null
+++ b/test/org/apache/catalina/servlets/DefaultServletEncodingBaseTest.java
@@ -0,0 +1,284 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.catalina.servlets;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.PrintWriter;
+import java.io.StringReader;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import javax.servlet.RequestDispatcher;
+import javax.servlet.ServletException;
+import javax.servlet.http.HttpServlet;
+import javax.servlet.http.HttpServletRequest;
+import javax.servlet.http.HttpServletResponse;
+
+import org.junit.Assert;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+import org.junit.runners.Parameterized.Parameter;
+
+import org.apache.catalina.Context;
+import org.apache.catalina.Wrapper;
+import org.apache.catalina.startup.Tomcat;
+import org.apache.catalina.startup.TomcatBaseTest;
+import org.apache.tomcat.util.buf.B2CConverter;
+import org.apache.tomcat.util.buf.ByteChunk;
+import org.apache.tomcat.util.http.parser.MediaType;
+
+/*
+ * Note: This test is split using two base classes. This is because, as a 
single
+ *       test, it takes so long to run it dominates the time taken to run the
+ *       tests when running tests using multiple threads. For example, on a
+ *       system with 12 cores, the tests take ~5 minutes per connector with 
this
+ *       test as a single test and ~3.5 minutes per connector with this test
+ *       split in two.
+ */
+@RunWith(Parameterized.class)
+public abstract class DefaultServletEncodingBaseTest extends TomcatBaseTest {
+
+    @Parameterized.Parameters(name = "{index}: contextEnc[{0}], fileEnc[{1}], 
target[{2}]," +
+            " useInclude[{3}], outputEnc[{4}], callSetCharacterEnc[{5}], 
useWriter[{6}]")
+    public static Collection<Object[]> parameters() {
+
+        String[] encodings = new String[] {
+                "utf-8", "ibm850", "cp1252", "iso-8859-1" };
+
+        String[] targetFiles = new String[] {
+                "cp1252", "ibm850", "iso-8859-1", "utf-8-bom", "utf-8" };
+
+        Boolean[] booleans = new Boolean[] { Boolean.FALSE, Boolean.TRUE };
+
+        List<Object[]> parameterSets = new ArrayList<>();
+
+        for (String contextResponseEncoding : encodings) {
+            for (String fileEncoding : encodings) {
+                for (String targetFile : targetFiles) {
+                    for (Boolean useInclude : booleans) {
+                        if (useInclude.booleanValue()) {
+                            for (String outputEncoding : encodings) {
+                                for (Boolean callSetCharacterEncoding : 
booleans) {
+                                    for (Boolean useWriter : booleans) {
+                                        parameterSets.add(new Object[] { 
contextResponseEncoding,
+                                                fileEncoding, targetFile,
+                                                useInclude, outputEncoding,
+                                                callSetCharacterEncoding, 
useWriter });
+                                    }
+                                }
+                            }
+                        } else {
+                            /*
+                             * Not using include so ignore outputEncoding,
+                             * callSetCharacterEncoding and useWriter
+                             *
+                             * Tests that do not use include are always 
expected to
+                             * pass.
+                             */
+                            String encoding = targetFile;
+                            if (encoding.endsWith("-bom")) {
+                                encoding = encoding.substring(0, 
encoding.length() - 4);
+                            }
+                            parameterSets.add(new Object[] { 
contextResponseEncoding, fileEncoding,
+                                    targetFile, useInclude, encoding, 
Boolean.FALSE,
+                                    Boolean.FALSE });
+                        }
+                    }
+                }
+            }
+        }
+
+        return parameterSets;
+    }
+
+
+    private static boolean getExpected(String fileEncoding, boolean useBom, 
String targetFile,
+            String outputEncoding, boolean callSetCharacterEncoding, boolean 
useWriter) {
+        if (useWriter || callSetCharacterEncoding) {
+            /*
+             * Using a writer or setting the output character encoding means 
the
+             * response will specify a character set. These cases therefore
+             * reduce to can the file be read with the correct encoding.
+             * (Assuming any BOM is always skipped in the included output.)
+             */
+            if (targetFile.endsWith("-bom") && useBom ||
+                    targetFile.startsWith(fileEncoding) ||
+                    targetFile.equals("cp1252") && 
fileEncoding.equals("iso-8859-1") ||
+                    targetFile.equals("iso-8859-1") && 
fileEncoding.equals("cp1252")) {
+                return true;
+            } else {
+                return false;
+            }
+        } else if (!(targetFile.startsWith(outputEncoding) ||
+                targetFile.equals("cp1252") && 
outputEncoding.equals("iso-8859-1") ||
+                targetFile.equals("iso-8859-1") && 
outputEncoding.equals("cp1252"))) {
+            /*
+             * The non-writer use cases read the target file as bytes. These
+             * cases therefore reduce to can the bytes from the target file be
+             * included in the output without corruption? The character used in
+             * the tests has been chosen so that, apart from iso-8859-1 and
+             * cp1252, the bytes vary by character set.
+             * (Assuming any BOM is always skipped in the included output.)
+             */
+            return false;
+        } else {
+            return true;
+        }
+    }
+
+
+    @Parameter(0)
+    public String contextResponseEncoding;
+    @Parameter(1)
+    public String fileEncoding;
+    @Parameter(2)
+    public String targetFile;
+    @Parameter(3)
+    public boolean useInclude;
+    @Parameter(4)
+    public String outputEncoding;
+    @Parameter(5)
+    public boolean callSetCharacterEncoding;
+    @Parameter(6)
+    public boolean useWriter;
+
+
+    protected abstract boolean getUseBom();
+
+
+    @Test
+    public void testEncoding() throws Exception {
+
+        boolean expectedPass = getExpected(fileEncoding, getUseBom(), 
targetFile, outputEncoding,
+                callSetCharacterEncoding, useWriter);
+
+        Tomcat tomcat = getTomcatInstance();
+
+        File appDir = new File("test/webapp");
+        Context ctxt = tomcat.addContext("", appDir.getAbsolutePath());
+        ctxt.setResponseCharacterEncoding(contextResponseEncoding);
+        Wrapper defaultServlet = Tomcat.addServlet(ctxt, "default", 
DefaultServlet.class.getName());
+        defaultServlet.addInitParameter("fileEncoding", fileEncoding);
+        defaultServlet.addInitParameter("useBomIfPresent", 
Boolean.toString(getUseBom()));
+
+        ctxt.addServletMappingDecoded("/", "default");
+
+        if (useInclude) {
+            Tomcat.addServlet(ctxt, "include", new EncodingServlet(
+                    outputEncoding, callSetCharacterEncoding, targetFile, 
useWriter));
+            ctxt.addServletMappingDecoded("/include", "include");
+        }
+
+        tomcat.start();
+
+        final ByteChunk res = new ByteChunk();
+        Map<String,List<String>> headers = new HashMap<>();
+
+        String target;
+        if (useInclude) {
+            target = "http://localhost:"; + getPort() + "/include";
+        } else {
+            target = "http://localhost:"; + getPort() + "/bug49nnn/bug49464-" + 
targetFile + ".txt";
+        }
+        int rc = getUrl(target, res, headers);
+
+        Assert.assertEquals(HttpServletResponse.SC_OK, rc);
+        List<String> values = headers.get("Content-Type");
+        if (values != null && values.size() == 1) {
+            MediaType mediaType = MediaType.parseMediaType(new 
StringReader(values.get(0)));
+            String charset = mediaType.getCharset();
+            if (charset == null) {
+                res.setCharset(B2CConverter.getCharset(outputEncoding));
+            } else {
+                res.setCharset(B2CConverter.getCharset(charset));
+            }
+        } else {
+            res.setCharset(B2CConverter.getCharset(outputEncoding));
+        }
+        String body = res.toString();
+        /*
+         * Remove BOM before checking content
+         * BOM (should be) removed by Tomcat when file is included
+         */
+        if (!useInclude && targetFile.endsWith("-bom")) {
+            body = body.substring(1);
+        }
+
+        if (expectedPass) {
+            if (useInclude) {
+                Assert.assertEquals("\u00bd-\u00bd-\u00bd", body);
+            } else {
+                Assert.assertEquals("\u00bd", body);
+            }
+        } else {
+            if (useInclude) {
+                Assert.assertNotEquals("\u00bd-\u00bd-\u00bd", body);
+            } else {
+                Assert.assertNotEquals("\u00bd", body);
+            }
+        }
+    }
+
+
+    private static class EncodingServlet extends HttpServlet {
+
+        private static final long serialVersionUID = 1L;
+
+        private final String outputEncoding;
+        private final boolean callSetCharacterEncoding;
+        private final String includeTarget;
+        private final boolean useWriter;
+
+        public EncodingServlet(String outputEncoding, boolean 
callSetCharacterEncoding,
+                String includeTarget, boolean useWriter) {
+            this.outputEncoding = outputEncoding;
+            this.callSetCharacterEncoding = callSetCharacterEncoding;
+            this.includeTarget = includeTarget;
+            this.useWriter = useWriter;
+        }
+
+        @Override
+        protected void doGet(HttpServletRequest req, HttpServletResponse resp)
+                throws ServletException, IOException {
+            resp.setContentType("text/plain");
+            if (callSetCharacterEncoding) {
+                resp.setCharacterEncoding(outputEncoding);
+            }
+            if (useWriter) {
+                PrintWriter pw = resp.getWriter();
+                pw.print("\u00bd-");
+            } else {
+                
resp.getOutputStream().write("\u00bd-".getBytes(outputEncoding));
+            }
+            resp.flushBuffer();
+            RequestDispatcher rd =
+                    req.getRequestDispatcher("/bug49nnn/bug49464-" + 
includeTarget + ".txt");
+            rd.include(req, resp);
+            if (useWriter) {
+                PrintWriter pw = resp.getWriter();
+                pw.print("-\u00bd");
+            } else {
+                
resp.getOutputStream().write("-\u00bd".getBytes(outputEncoding));
+            }
+        }
+    }
+}
diff --git a/test/org/apache/catalina/servlets/TestDefaultServlet.java 
b/test/org/apache/catalina/servlets/TestDefaultServlet.java
index 0c1af97..e9a0503 100644
--- a/test/org/apache/catalina/servlets/TestDefaultServlet.java
+++ b/test/org/apache/catalina/servlets/TestDefaultServlet.java
@@ -41,6 +41,7 @@ import org.apache.catalina.startup.SimpleHttpClient;
 import org.apache.catalina.startup.Tomcat;
 import org.apache.catalina.startup.TomcatBaseTest;
 import org.apache.tomcat.util.buf.ByteChunk;
+import org.apache.tomcat.util.descriptor.web.ErrorPage;
 import org.apache.tomcat.websocket.server.WsContextListener;
 
 public class TestDefaultServlet extends TomcatBaseTest {
@@ -106,6 +107,7 @@ public class TestDefaultServlet extends TomcatBaseTest {
         Wrapper defaultServlet = Tomcat.addServlet(ctxt, "default",
                 "org.apache.catalina.servlets.DefaultServlet");
         defaultServlet.addInitParameter("gzip", "true");
+        defaultServlet.addInitParameter("fileEncoding", "ISO-8859-1");
         ctxt.addServletMappingDecoded("/", "default");
 
         ctxt.addMimeMapping("html", "text/html");
@@ -161,6 +163,7 @@ public class TestDefaultServlet extends TomcatBaseTest {
         Wrapper defaultServlet = Tomcat.addServlet(ctxt, "default",
                 "org.apache.catalina.servlets.DefaultServlet");
         defaultServlet.addInitParameter("precompressed", "true");
+        defaultServlet.addInitParameter("fileEncoding", "ISO-8859-1");
 
         ctxt.addServletMappingDecoded("/", "default");
         ctxt.addMimeMapping("html", "text/html");
@@ -271,6 +274,7 @@ public class TestDefaultServlet extends TomcatBaseTest {
         Wrapper defaultServlet = Tomcat.addServlet(ctxt, "default",
                 DefaultServlet.class.getName());
         defaultServlet.addInitParameter("precompressed", "br=.br,gzip=.gz");
+        defaultServlet.addInitParameter("fileEncoding", "ISO-8859-1");
 
         ctxt.addServletMappingDecoded("/", "default");
         ctxt.addMimeMapping("html", "text/html");
@@ -441,36 +445,24 @@ public class TestDefaultServlet extends TomcatBaseTest {
      */
     @Test
     public void testCustomErrorPage() throws Exception {
-        File appDir = new File(getTemporaryDirectory(), "MyApp");
-        File webInf = new File(appDir, "WEB-INF");
-        addDeleteOnTearDown(appDir);
-        if (!webInf.mkdirs() && !webInf.isDirectory()) {
-            Assert.fail("Unable to create directory [" + webInf + "]");
-        }
 
-        File webxml = new File(appDir, "WEB-INF/web.xml");
-        try (FileOutputStream fos = new FileOutputStream(webxml);
-                Writer w = new OutputStreamWriter(fos, "UTF-8");) {
-            w.write("<?xml version='1.0' encoding='UTF-8'?>\n"
-                    + "<web-app xmlns='http://java.sun.com/xml/ns/j2ee' "
-                    + " xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance'"
-                    + " xsi:schemaLocation='http://java.sun.com/xml/ns/j2ee "
-                    + " http://java.sun.com/xml/ns/j2ee/web-app_2_4.xsd'"
-                    + " version='2.4'>\n"
-                    + "<error-page>\n<error-code>404</error-code>\n"
-                    + "<location>/404.html</location>\n</error-page>\n"
-                    + "</web-app>\n");
-        }
+        Tomcat tomcat = getTomcatInstance();
 
-        File error404 = new File(appDir, "404.html");
-        try (FileOutputStream fos = new FileOutputStream(error404);
-                Writer w = new OutputStreamWriter(fos, "ISO-8859-1")) {
-            w.write("It is 404.html");
-        }
+        File appDir = new File("test/webapp");
+
+        // app dir is relative to server home
+        Context ctxt = tomcat.addContext("", appDir.getAbsolutePath());
+        Wrapper defaultServlet = Tomcat.addServlet(ctxt, "default",
+                DefaultServlet.class.getName());
+        defaultServlet.addInitParameter("fileEncoding", "ISO-8859-1");
+
+        ctxt.addServletMappingDecoded("/", "default");
+        ctxt.addMimeMapping("html", "text/html");
+        ErrorPage ep = new ErrorPage();
+        ep.setErrorCode(404);
+        ep.setLocation("/404.html");
+        ctxt.addErrorPage(ep);
 
-        Tomcat tomcat = getTomcatInstance();
-        String contextPath = "/MyApp";
-        tomcat.addWebapp(null, contextPath, appDir.getAbsolutePath());
         tomcat.start();
 
         TestCustomErrorClient client =
@@ -532,7 +524,7 @@ public class TestDefaultServlet extends TomcatBaseTest {
 
         File webxml = new File(appDir, "WEB-INF/web.xml");
         try (FileOutputStream fos = new FileOutputStream(webxml);
-                Writer w = new OutputStreamWriter(fos, "UTF-8");) {
+                Writer w = new OutputStreamWriter(fos, "UTF-8")) {
             w.write("<?xml version='1.0' encoding='UTF-8'?>\n"
                     + "<web-app xmlns='http://java.sun.com/xml/ns/j2ee' "
                     + " xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance'"
diff --git 
a/test/org/apache/catalina/servlets/TestDefaultServletEncodingWithBom.java 
b/test/org/apache/catalina/servlets/TestDefaultServletEncodingWithBom.java
new file mode 100644
index 0000000..46bd78f
--- /dev/null
+++ b/test/org/apache/catalina/servlets/TestDefaultServletEncodingWithBom.java
@@ -0,0 +1,26 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.catalina.servlets;
+
+
+public class TestDefaultServletEncodingWithBom extends 
DefaultServletEncodingBaseTest {
+
+    @Override
+    protected boolean getUseBom() {
+        return true;
+    }
+}
diff --git 
a/test/org/apache/catalina/servlets/TestDefaultServletEncodingWithoutBom.java 
b/test/org/apache/catalina/servlets/TestDefaultServletEncodingWithoutBom.java
new file mode 100644
index 0000000..9deec1d
--- /dev/null
+++ 
b/test/org/apache/catalina/servlets/TestDefaultServletEncodingWithoutBom.java
@@ -0,0 +1,26 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.catalina.servlets;
+
+
+public class TestDefaultServletEncodingWithoutBom extends 
DefaultServletEncodingBaseTest {
+
+    @Override
+    protected boolean getUseBom() {
+        return false;
+    }
+}
diff --git a/test/webapp/404.html b/test/webapp/404.html
new file mode 100644
index 0000000..50c55f6
--- /dev/null
+++ b/test/webapp/404.html
@@ -0,0 +1 @@
+It is 404.html
\ No newline at end of file
diff --git a/test/webapp/bug49nnn/bug49464-cp1252.txt 
b/test/webapp/bug49nnn/bug49464-cp1252.txt
new file mode 100644
index 0000000..9c95a6b
--- /dev/null
+++ b/test/webapp/bug49nnn/bug49464-cp1252.txt
@@ -0,0 +1 @@
+�
\ No newline at end of file
diff --git a/test/webapp/bug49nnn/bug49464-ibm850.txt 
b/test/webapp/bug49nnn/bug49464-ibm850.txt
new file mode 100644
index 0000000..f982586
--- /dev/null
+++ b/test/webapp/bug49nnn/bug49464-ibm850.txt
@@ -0,0 +1 @@
+�
\ No newline at end of file
diff --git a/test/webapp/bug49nnn/bug49464-iso-8859-1.txt 
b/test/webapp/bug49nnn/bug49464-iso-8859-1.txt
new file mode 100644
index 0000000..9c95a6b
--- /dev/null
+++ b/test/webapp/bug49nnn/bug49464-iso-8859-1.txt
@@ -0,0 +1 @@
+�
\ No newline at end of file
diff --git a/test/webapp/bug49nnn/bug49464-utf-8-bom.txt 
b/test/webapp/bug49nnn/bug49464-utf-8-bom.txt
new file mode 100644
index 0000000..6ee21c1
--- /dev/null
+++ b/test/webapp/bug49nnn/bug49464-utf-8-bom.txt
@@ -0,0 +1 @@
+½
\ No newline at end of file
diff --git a/test/webapp/bug49nnn/bug49464-utf-8.txt 
b/test/webapp/bug49nnn/bug49464-utf-8.txt
new file mode 100644
index 0000000..66f5eb7
--- /dev/null
+++ b/test/webapp/bug49nnn/bug49464-utf-8.txt
@@ -0,0 +1 @@
+½
\ No newline at end of file
diff --git a/webapps/docs/changelog.xml b/webapps/docs/changelog.xml
index 67ffece..8328fb2 100644
--- a/webapps/docs/changelog.xml
+++ b/webapps/docs/changelog.xml
@@ -65,6 +65,11 @@
         rather than fails (with a 500 response). This enables Tomcat to pass 
two
         additional tests from the Litmus WebDAV test suite. (markt)
       </fix>
+      <fix>
+        <bug>49464</bug>: Improve the Default Servlet's handling of static 
files
+        when the file encoding is not compatible with the required response
+        encoding. (markt)
+      </fix>
     </changelog>
   </subsection>
   <subsection name="Jasper">


---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscr...@tomcat.apache.org
For additional commands, e-mail: dev-h...@tomcat.apache.org

Reply via email to