This is an automated email from the ASF dual-hosted git repository. markt pushed a commit to branch 8.5.x in repository https://gitbox.apache.org/repos/asf/tomcat.git
commit 486e49e7c9e36c3adb93f1ec684c7d3d4a59fe69 Author: Mark Thomas <ma...@apache.org> AuthorDate: Mon Jun 24 13:38:12 2019 +0100 Fix https://bz.apache.org/bugzilla/show_bug.cgi?id=49464 Improve the Default Servlet's handling of static files when the file encoding is not compatible with the required response encoding. --- conf/web.xml | 5 + java/org/apache/catalina/core/StandardContext.java | 12 +- .../apache/catalina/servlets/DefaultServlet.java | 237 ++++++++++++++--- .../servlets/DefaultServletEncodingBaseTest.java | 284 +++++++++++++++++++++ .../catalina/servlets/TestDefaultServlet.java | 48 ++-- .../TestDefaultServletEncodingWithBom.java | 26 ++ .../TestDefaultServletEncodingWithoutBom.java | 26 ++ test/webapp/404.html | 1 + test/webapp/bug49nnn/bug49464-cp1252.txt | 1 + test/webapp/bug49nnn/bug49464-ibm850.txt | 1 + test/webapp/bug49nnn/bug49464-iso-8859-1.txt | 1 + test/webapp/bug49nnn/bug49464-utf-8-bom.txt | 1 + test/webapp/bug49nnn/bug49464-utf-8.txt | 1 + webapps/docs/changelog.xml | 5 + 14 files changed, 585 insertions(+), 64 deletions(-) diff --git a/conf/web.xml b/conf/web.xml index 0a34838..4d207ee 100644 --- a/conf/web.xml +++ b/conf/web.xml @@ -48,6 +48,11 @@ <!-- fileEncoding Encoding to be used to read static resources --> <!-- [platform default] --> <!-- --> + <!-- useBomIfPresent If a static file contains a byte order mark --> + <!-- (BOM), should this be used to determine the --> + <!-- file encoding in preference to fileEncoding. --> + <!-- [true] --> + <!-- --> <!-- input Input buffer size (in bytes) when reading --> <!-- resources to be served. [2048] --> <!-- --> diff --git a/java/org/apache/catalina/core/StandardContext.java b/java/org/apache/catalina/core/StandardContext.java index e425076..c7803f2 100644 --- a/java/org/apache/catalina/core/StandardContext.java +++ b/java/org/apache/catalina/core/StandardContext.java @@ -882,7 +882,17 @@ public class StandardContext extends ContainerBase @Override public void setResponseCharacterEncoding(String responseEncoding) { - this.responseEncoding = responseEncoding; + /* + * This ensures that the context response encoding is represented by a + * unique String object. This enables the Default Servlet to + * differentiate between a Response using this default encoding and one + * that has been explicitly configured. + */ + if (responseEncoding == null) { + this.responseEncoding = null; + } else { + this.responseEncoding = new String(responseEncoding); + } } diff --git a/java/org/apache/catalina/servlets/DefaultServlet.java b/java/org/apache/catalina/servlets/DefaultServlet.java index dd0bfbc..c99afe1 100644 --- a/java/org/apache/catalina/servlets/DefaultServlet.java +++ b/java/org/apache/catalina/servlets/DefaultServlet.java @@ -32,6 +32,8 @@ import java.io.Reader; import java.io.Serializable; import java.io.StringReader; import java.io.StringWriter; +import java.io.UnsupportedEncodingException; +import java.nio.charset.Charset; import java.nio.charset.StandardCharsets; import java.security.AccessController; import java.util.ArrayList; @@ -76,6 +78,7 @@ import org.apache.catalina.util.IOTools; import org.apache.catalina.util.ServerInfo; import org.apache.catalina.util.URLEncoder; import org.apache.catalina.webresources.CachedResource; +import org.apache.tomcat.util.buf.B2CConverter; import org.apache.tomcat.util.http.ResponseUtil; import org.apache.tomcat.util.res.StringManager; import org.apache.tomcat.util.security.Escape; @@ -242,6 +245,12 @@ public class DefaultServlet extends HttpServlet { * the platform default is used. */ protected String fileEncoding = null; + private transient Charset fileEncodingCharset = null; + + /** + * If a file has a BOM, should that be used in preference to fileEncoding? + */ + private boolean useBomIfPresent = true; /** * Minimum size for sendfile usage in bytes. @@ -308,6 +317,20 @@ public class DefaultServlet extends HttpServlet { Integer.parseInt(getServletConfig().getInitParameter("sendfileSize")) * 1024; fileEncoding = getServletConfig().getInitParameter("fileEncoding"); + if (fileEncoding == null) { + fileEncodingCharset = Charset.defaultCharset(); + fileEncoding = fileEncodingCharset.name(); + } else { + try { + fileEncodingCharset = B2CConverter.getCharset(fileEncoding); + } catch (UnsupportedEncodingException e) { + throw new ServletException(e); + } + } + + if (getServletConfig().getInitParameter("useBomIfPresent") != null) + useBomIfPresent = Boolean.parseBoolean( + getServletConfig().getInitParameter("useBomIfPresent")); globalXsltFile = getServletConfig().getInitParameter("globalXsltFile"); contextXsltFile = getServletConfig().getInitParameter("contextXsltFile"); @@ -759,11 +782,11 @@ public class DefaultServlet extends HttpServlet { /** * Serve the specified resource, optionally including the data content. * - * @param request The servlet request we are processing - * @param response The servlet response we are creating - * @param content Should the content be included? - * @param encoding The encoding to use if it is necessary to access the - * source as characters rather than as bytes + * @param request The servlet request we are processing + * @param response The servlet response we are creating + * @param content Should the content be included? + * @param inputEncoding The encoding to use if it is necessary to access the + * source as characters rather than as bytes * * @exception IOException if an input/output error occurs * @exception ServletException if a servlet-specified error occurs @@ -771,7 +794,7 @@ public class DefaultServlet extends HttpServlet { protected void serveResource(HttpServletRequest request, HttpServletResponse response, boolean content, - String encoding) + String inputEncoding) throws IOException, ServletException { boolean serveContent = content; @@ -945,12 +968,7 @@ public class DefaultServlet extends HttpServlet { } catch (IllegalStateException e) { // If it fails, we try to get a Writer instead if we're // trying to serve a text file - if (!usingPrecompressedVersion && - ((contentType == null) || - (contentType.startsWith("text")) || - (contentType.endsWith("xml")) || - (contentType.contains("/javascript"))) - ) { + if (!usingPrecompressedVersion && isText(contentType)) { writer = response.getWriter(); // Cannot reliably serve partial content with a Writer ranges = FULL; @@ -975,6 +993,32 @@ public class DefaultServlet extends HttpServlet { ranges = FULL; } + String outputEncoding = response.getCharacterEncoding(); + Charset charset = B2CConverter.getCharset(outputEncoding); + boolean conversionRequired; + /* + * The test below deliberately uses != to compare two Strings. This is + * because the code is looking to see if the default character encoding + * has been returned because no explicit character encoding has been + * defined. There is no clean way of doing this via the Servlet API. It + * would be possible to add a Tomcat specific API but that would require + * quite a bit of code to get to the Tomcat specific request object that + * may have been wrapped. The != test is a (slightly hacky) quick way of + * doing this. + */ + boolean outputEncodingSpecified = + outputEncoding != org.apache.coyote.Constants.DEFAULT_BODY_CHARSET.name() && + outputEncoding != resources.getContext().getResponseCharacterEncoding(); + if (!usingPrecompressedVersion && isText(contentType) && outputEncodingSpecified && + !charset.equals(fileEncodingCharset)) { + conversionRequired = true; + // Conversion often results fewer/more/different bytes. + // That does not play nicely with range requests. + ranges = FULL; + } else { + conversionRequired = false; + } + if (resource.isDirectory() || isError || ( (ranges == null || ranges.isEmpty()) @@ -997,8 +1041,8 @@ public class DefaultServlet extends HttpServlet { log("DefaultServlet.serveFile: contentLength=" + contentLength); // Don't set a content length if something else has already - // written to the response. - if (contentWritten == 0) { + // written to the response or if conversion will be taking place + if (contentWritten == 0 && !conversionRequired) { response.setContentLengthLong(contentLength); } } @@ -1014,34 +1058,72 @@ public class DefaultServlet extends HttpServlet { // Output via a writer so can't use sendfile or write // content directly. if (resource.isDirectory()) { - renderResult = render(request, getPathPrefix(request), resource, encoding); + renderResult = render(request, getPathPrefix(request), resource, inputEncoding); } else { renderResult = resource.getInputStream(); + if (included) { + // Need to make sure any BOM is removed + if (!renderResult.markSupported()) { + renderResult = new BufferedInputStream(renderResult); + } + Charset bomCharset = processBom(renderResult); + if (bomCharset != null && useBomIfPresent) { + inputEncoding = bomCharset.name(); + } + } } - copy(renderResult, writer, encoding); + copy(renderResult, writer, inputEncoding); } else { - // Output is via an InputStream + // Output is via an OutputStream if (resource.isDirectory()) { - renderResult = render(request, getPathPrefix(request), resource, encoding); + renderResult = render(request, getPathPrefix(request), resource, inputEncoding); } else { - if (!checkSendfile(request, response, resource, contentLength, null)) { - // sendfile not possible so check if resource - // content is available directly via - // CachedResource. Do not want to call - // getContent() on other resource - // implementations as that could trigger loading - // the contents of a very large file into memory - byte[] resourceBody = null; - if (resource instanceof CachedResource) { - resourceBody = resource.getContent(); + // Output is content of resource + // Check to see if conversion is required + if (conversionRequired || included) { + // When including a file, we need to check for a BOM + // to determine if a conversion is required, so we + // might as well always convert + InputStream source = resource.getInputStream(); + if (!source.markSupported()) { + source = new BufferedInputStream(source); + } + Charset bomCharset = processBom(source); + if (bomCharset != null && useBomIfPresent) { + inputEncoding = bomCharset.name(); } - if (resourceBody == null) { - // Resource content not directly available, - // use InputStream - renderResult = resource.getInputStream(); + // Following test also ensures included resources + // are converted if an explicit output encoding was + // specified + if (outputEncodingSpecified) { + OutputStreamWriter osw = new OutputStreamWriter(ostream, charset); + PrintWriter pw = new PrintWriter(osw); + copy(source, pw, inputEncoding); + pw.flush(); } else { - // Use the resource content directly - ostream.write(resourceBody); + // Just included but no conversion + renderResult = source; + } + } else { + if (!checkSendfile(request, response, resource, contentLength, null)) { + // sendfile not possible so check if resource + // content is available directly via + // CachedResource. Do not want to call + // getContent() on other resource + // implementations as that could trigger loading + // the contents of a very large file into memory + byte[] resourceBody = null; + if (resource instanceof CachedResource) { + resourceBody = resource.getContent(); + } + if (resourceBody == null) { + // Resource content not directly available, + // use InputStream + renderResult = resource.getInputStream(); + } else { + // Use the resource content directly + ostream.write(resourceBody); + } } } } @@ -1114,6 +1196,91 @@ public class DefaultServlet extends HttpServlet { } } + + /* + * Code borrowed heavily from Jasper's EncodingDetector + */ + private static Charset processBom(InputStream is) throws IOException { + // Java supported character sets do not use BOMs longer than 4 bytes + byte[] bom = new byte[4]; + is.mark(bom.length); + + int count = is.read(bom); + + // BOMs are at least 2 bytes + if (count < 2) { + skip(is, 0); + return null; + } + + // Look for two byte BOMs + int b0 = bom[0] & 0xFF; + int b1 = bom[1] & 0xFF; + if (b0 == 0xFE && b1 == 0xFF) { + skip(is, 2); + return StandardCharsets.UTF_16BE; + } + // Delay the UTF_16LE check if there are more that 2 bytes since it + // overlaps with UTF-32LE. + if (count == 2 && b0 == 0xFF && b1 == 0xFE) { + skip(is, 2); + return StandardCharsets.UTF_16LE; + } + + // Remaining BOMs are at least 3 bytes + if (count < 3) { + skip(is, 0); + return null; + } + + // UTF-8 is only 3-byte BOM + int b2 = bom[2] & 0xFF; + if (b0 == 0xEF && b1 == 0xBB && b2 == 0xBF) { + skip(is, 3); + return StandardCharsets.UTF_8; + } + + if (count < 4) { + skip(is, 0); + return null; + } + + // Look for 4-byte BOMs + int b3 = bom[3] & 0xFF; + if (b0 == 0x00 && b1 == 0x00 && b2 == 0xFE && b3 == 0xFF) { + return Charset.forName("UTF-32BE"); + } + if (b0 == 0xFF && b1 == 0xFE && b2 == 0x00 && b3 == 0x00) { + return Charset.forName("UTF-32LE"); + } + + // Now we can check for UTF16-LE. There is an assumption here that we + // won't see a UTF16-LE file with a BOM where the first real data is + // 0x00 0x00 + if (b0 == 0xFF && b1 == 0xFE) { + skip(is, 2); + return StandardCharsets.UTF_16LE; + } + + skip(is, 0); + return null; + } + + + private static void skip(InputStream is, int skip) throws IOException { + is.reset(); + while (skip-- > 0) { + is.read(); + } + } + + + private static boolean isText(String contentType) { + return contentType == null || contentType.startsWith("text") || + contentType.endsWith("xml") || contentType.contains("/javascript"); + } + + private boolean pathEndsWithCompressedExtension(String path) { for (CompressionFormat format : compressionFormats) { if (path.endsWith(format.extension)) { @@ -1434,7 +1601,7 @@ public class DefaultServlet extends HttpServlet { * Decide which way to render. HTML or XML. * * @param contextPath The path - * @param resource The resource + * @param resource The resource * @param encoding The encoding to use to process the readme (if any) * * @return the input stream with the rendered output diff --git a/test/org/apache/catalina/servlets/DefaultServletEncodingBaseTest.java b/test/org/apache/catalina/servlets/DefaultServletEncodingBaseTest.java new file mode 100644 index 0000000..6ff4849 --- /dev/null +++ b/test/org/apache/catalina/servlets/DefaultServletEncodingBaseTest.java @@ -0,0 +1,284 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.catalina.servlets; + +import java.io.File; +import java.io.IOException; +import java.io.PrintWriter; +import java.io.StringReader; +import java.util.ArrayList; +import java.util.Collection; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import javax.servlet.RequestDispatcher; +import javax.servlet.ServletException; +import javax.servlet.http.HttpServlet; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; + +import org.junit.Assert; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; +import org.junit.runners.Parameterized.Parameter; + +import org.apache.catalina.Context; +import org.apache.catalina.Wrapper; +import org.apache.catalina.startup.Tomcat; +import org.apache.catalina.startup.TomcatBaseTest; +import org.apache.tomcat.util.buf.B2CConverter; +import org.apache.tomcat.util.buf.ByteChunk; +import org.apache.tomcat.util.http.parser.MediaType; + +/* + * Note: This test is split using two base classes. This is because, as a single + * test, it takes so long to run it dominates the time taken to run the + * tests when running tests using multiple threads. For example, on a + * system with 12 cores, the tests take ~5 minutes per connector with this + * test as a single test and ~3.5 minutes per connector with this test + * split in two. + */ +@RunWith(Parameterized.class) +public abstract class DefaultServletEncodingBaseTest extends TomcatBaseTest { + + @Parameterized.Parameters(name = "{index}: contextEnc[{0}], fileEnc[{1}], target[{2}]," + + " useInclude[{3}], outputEnc[{4}], callSetCharacterEnc[{5}], useWriter[{6}]") + public static Collection<Object[]> parameters() { + + String[] encodings = new String[] { + "utf-8", "ibm850", "cp1252", "iso-8859-1" }; + + String[] targetFiles = new String[] { + "cp1252", "ibm850", "iso-8859-1", "utf-8-bom", "utf-8" }; + + Boolean[] booleans = new Boolean[] { Boolean.FALSE, Boolean.TRUE }; + + List<Object[]> parameterSets = new ArrayList<>(); + + for (String contextResponseEncoding : encodings) { + for (String fileEncoding : encodings) { + for (String targetFile : targetFiles) { + for (Boolean useInclude : booleans) { + if (useInclude.booleanValue()) { + for (String outputEncoding : encodings) { + for (Boolean callSetCharacterEncoding : booleans) { + for (Boolean useWriter : booleans) { + parameterSets.add(new Object[] { contextResponseEncoding, + fileEncoding, targetFile, + useInclude, outputEncoding, + callSetCharacterEncoding, useWriter }); + } + } + } + } else { + /* + * Not using include so ignore outputEncoding, + * callSetCharacterEncoding and useWriter + * + * Tests that do not use include are always expected to + * pass. + */ + String encoding = targetFile; + if (encoding.endsWith("-bom")) { + encoding = encoding.substring(0, encoding.length() - 4); + } + parameterSets.add(new Object[] { contextResponseEncoding, fileEncoding, + targetFile, useInclude, encoding, Boolean.FALSE, + Boolean.FALSE }); + } + } + } + } + } + + return parameterSets; + } + + + private static boolean getExpected(String fileEncoding, boolean useBom, String targetFile, + String outputEncoding, boolean callSetCharacterEncoding, boolean useWriter) { + if (useWriter || callSetCharacterEncoding) { + /* + * Using a writer or setting the output character encoding means the + * response will specify a character set. These cases therefore + * reduce to can the file be read with the correct encoding. + * (Assuming any BOM is always skipped in the included output.) + */ + if (targetFile.endsWith("-bom") && useBom || + targetFile.startsWith(fileEncoding) || + targetFile.equals("cp1252") && fileEncoding.equals("iso-8859-1") || + targetFile.equals("iso-8859-1") && fileEncoding.equals("cp1252")) { + return true; + } else { + return false; + } + } else if (!(targetFile.startsWith(outputEncoding) || + targetFile.equals("cp1252") && outputEncoding.equals("iso-8859-1") || + targetFile.equals("iso-8859-1") && outputEncoding.equals("cp1252"))) { + /* + * The non-writer use cases read the target file as bytes. These + * cases therefore reduce to can the bytes from the target file be + * included in the output without corruption? The character used in + * the tests has been chosen so that, apart from iso-8859-1 and + * cp1252, the bytes vary by character set. + * (Assuming any BOM is always skipped in the included output.) + */ + return false; + } else { + return true; + } + } + + + @Parameter(0) + public String contextResponseEncoding; + @Parameter(1) + public String fileEncoding; + @Parameter(2) + public String targetFile; + @Parameter(3) + public boolean useInclude; + @Parameter(4) + public String outputEncoding; + @Parameter(5) + public boolean callSetCharacterEncoding; + @Parameter(6) + public boolean useWriter; + + + protected abstract boolean getUseBom(); + + + @Test + public void testEncoding() throws Exception { + + boolean expectedPass = getExpected(fileEncoding, getUseBom(), targetFile, outputEncoding, + callSetCharacterEncoding, useWriter); + + Tomcat tomcat = getTomcatInstance(); + + File appDir = new File("test/webapp"); + Context ctxt = tomcat.addContext("", appDir.getAbsolutePath()); + ctxt.setResponseCharacterEncoding(contextResponseEncoding); + Wrapper defaultServlet = Tomcat.addServlet(ctxt, "default", DefaultServlet.class.getName()); + defaultServlet.addInitParameter("fileEncoding", fileEncoding); + defaultServlet.addInitParameter("useBomIfPresent", Boolean.toString(getUseBom())); + + ctxt.addServletMappingDecoded("/", "default"); + + if (useInclude) { + Tomcat.addServlet(ctxt, "include", new EncodingServlet( + outputEncoding, callSetCharacterEncoding, targetFile, useWriter)); + ctxt.addServletMappingDecoded("/include", "include"); + } + + tomcat.start(); + + final ByteChunk res = new ByteChunk(); + Map<String,List<String>> headers = new HashMap<>(); + + String target; + if (useInclude) { + target = "http://localhost:" + getPort() + "/include"; + } else { + target = "http://localhost:" + getPort() + "/bug49nnn/bug49464-" + targetFile + ".txt"; + } + int rc = getUrl(target, res, headers); + + Assert.assertEquals(HttpServletResponse.SC_OK, rc); + List<String> values = headers.get("Content-Type"); + if (values != null && values.size() == 1) { + MediaType mediaType = MediaType.parseMediaType(new StringReader(values.get(0))); + String charset = mediaType.getCharset(); + if (charset == null) { + res.setCharset(B2CConverter.getCharset(outputEncoding)); + } else { + res.setCharset(B2CConverter.getCharset(charset)); + } + } else { + res.setCharset(B2CConverter.getCharset(outputEncoding)); + } + String body = res.toString(); + /* + * Remove BOM before checking content + * BOM (should be) removed by Tomcat when file is included + */ + if (!useInclude && targetFile.endsWith("-bom")) { + body = body.substring(1); + } + + if (expectedPass) { + if (useInclude) { + Assert.assertEquals("\u00bd-\u00bd-\u00bd", body); + } else { + Assert.assertEquals("\u00bd", body); + } + } else { + if (useInclude) { + Assert.assertNotEquals("\u00bd-\u00bd-\u00bd", body); + } else { + Assert.assertNotEquals("\u00bd", body); + } + } + } + + + private static class EncodingServlet extends HttpServlet { + + private static final long serialVersionUID = 1L; + + private final String outputEncoding; + private final boolean callSetCharacterEncoding; + private final String includeTarget; + private final boolean useWriter; + + public EncodingServlet(String outputEncoding, boolean callSetCharacterEncoding, + String includeTarget, boolean useWriter) { + this.outputEncoding = outputEncoding; + this.callSetCharacterEncoding = callSetCharacterEncoding; + this.includeTarget = includeTarget; + this.useWriter = useWriter; + } + + @Override + protected void doGet(HttpServletRequest req, HttpServletResponse resp) + throws ServletException, IOException { + resp.setContentType("text/plain"); + if (callSetCharacterEncoding) { + resp.setCharacterEncoding(outputEncoding); + } + if (useWriter) { + PrintWriter pw = resp.getWriter(); + pw.print("\u00bd-"); + } else { + resp.getOutputStream().write("\u00bd-".getBytes(outputEncoding)); + } + resp.flushBuffer(); + RequestDispatcher rd = + req.getRequestDispatcher("/bug49nnn/bug49464-" + includeTarget + ".txt"); + rd.include(req, resp); + if (useWriter) { + PrintWriter pw = resp.getWriter(); + pw.print("-\u00bd"); + } else { + resp.getOutputStream().write("-\u00bd".getBytes(outputEncoding)); + } + } + } +} diff --git a/test/org/apache/catalina/servlets/TestDefaultServlet.java b/test/org/apache/catalina/servlets/TestDefaultServlet.java index 0c1af97..e9a0503 100644 --- a/test/org/apache/catalina/servlets/TestDefaultServlet.java +++ b/test/org/apache/catalina/servlets/TestDefaultServlet.java @@ -41,6 +41,7 @@ import org.apache.catalina.startup.SimpleHttpClient; import org.apache.catalina.startup.Tomcat; import org.apache.catalina.startup.TomcatBaseTest; import org.apache.tomcat.util.buf.ByteChunk; +import org.apache.tomcat.util.descriptor.web.ErrorPage; import org.apache.tomcat.websocket.server.WsContextListener; public class TestDefaultServlet extends TomcatBaseTest { @@ -106,6 +107,7 @@ public class TestDefaultServlet extends TomcatBaseTest { Wrapper defaultServlet = Tomcat.addServlet(ctxt, "default", "org.apache.catalina.servlets.DefaultServlet"); defaultServlet.addInitParameter("gzip", "true"); + defaultServlet.addInitParameter("fileEncoding", "ISO-8859-1"); ctxt.addServletMappingDecoded("/", "default"); ctxt.addMimeMapping("html", "text/html"); @@ -161,6 +163,7 @@ public class TestDefaultServlet extends TomcatBaseTest { Wrapper defaultServlet = Tomcat.addServlet(ctxt, "default", "org.apache.catalina.servlets.DefaultServlet"); defaultServlet.addInitParameter("precompressed", "true"); + defaultServlet.addInitParameter("fileEncoding", "ISO-8859-1"); ctxt.addServletMappingDecoded("/", "default"); ctxt.addMimeMapping("html", "text/html"); @@ -271,6 +274,7 @@ public class TestDefaultServlet extends TomcatBaseTest { Wrapper defaultServlet = Tomcat.addServlet(ctxt, "default", DefaultServlet.class.getName()); defaultServlet.addInitParameter("precompressed", "br=.br,gzip=.gz"); + defaultServlet.addInitParameter("fileEncoding", "ISO-8859-1"); ctxt.addServletMappingDecoded("/", "default"); ctxt.addMimeMapping("html", "text/html"); @@ -441,36 +445,24 @@ public class TestDefaultServlet extends TomcatBaseTest { */ @Test public void testCustomErrorPage() throws Exception { - File appDir = new File(getTemporaryDirectory(), "MyApp"); - File webInf = new File(appDir, "WEB-INF"); - addDeleteOnTearDown(appDir); - if (!webInf.mkdirs() && !webInf.isDirectory()) { - Assert.fail("Unable to create directory [" + webInf + "]"); - } - File webxml = new File(appDir, "WEB-INF/web.xml"); - try (FileOutputStream fos = new FileOutputStream(webxml); - Writer w = new OutputStreamWriter(fos, "UTF-8");) { - w.write("<?xml version='1.0' encoding='UTF-8'?>\n" - + "<web-app xmlns='http://java.sun.com/xml/ns/j2ee' " - + " xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance'" - + " xsi:schemaLocation='http://java.sun.com/xml/ns/j2ee " - + " http://java.sun.com/xml/ns/j2ee/web-app_2_4.xsd'" - + " version='2.4'>\n" - + "<error-page>\n<error-code>404</error-code>\n" - + "<location>/404.html</location>\n</error-page>\n" - + "</web-app>\n"); - } + Tomcat tomcat = getTomcatInstance(); - File error404 = new File(appDir, "404.html"); - try (FileOutputStream fos = new FileOutputStream(error404); - Writer w = new OutputStreamWriter(fos, "ISO-8859-1")) { - w.write("It is 404.html"); - } + File appDir = new File("test/webapp"); + + // app dir is relative to server home + Context ctxt = tomcat.addContext("", appDir.getAbsolutePath()); + Wrapper defaultServlet = Tomcat.addServlet(ctxt, "default", + DefaultServlet.class.getName()); + defaultServlet.addInitParameter("fileEncoding", "ISO-8859-1"); + + ctxt.addServletMappingDecoded("/", "default"); + ctxt.addMimeMapping("html", "text/html"); + ErrorPage ep = new ErrorPage(); + ep.setErrorCode(404); + ep.setLocation("/404.html"); + ctxt.addErrorPage(ep); - Tomcat tomcat = getTomcatInstance(); - String contextPath = "/MyApp"; - tomcat.addWebapp(null, contextPath, appDir.getAbsolutePath()); tomcat.start(); TestCustomErrorClient client = @@ -532,7 +524,7 @@ public class TestDefaultServlet extends TomcatBaseTest { File webxml = new File(appDir, "WEB-INF/web.xml"); try (FileOutputStream fos = new FileOutputStream(webxml); - Writer w = new OutputStreamWriter(fos, "UTF-8");) { + Writer w = new OutputStreamWriter(fos, "UTF-8")) { w.write("<?xml version='1.0' encoding='UTF-8'?>\n" + "<web-app xmlns='http://java.sun.com/xml/ns/j2ee' " + " xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance'" diff --git a/test/org/apache/catalina/servlets/TestDefaultServletEncodingWithBom.java b/test/org/apache/catalina/servlets/TestDefaultServletEncodingWithBom.java new file mode 100644 index 0000000..46bd78f --- /dev/null +++ b/test/org/apache/catalina/servlets/TestDefaultServletEncodingWithBom.java @@ -0,0 +1,26 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.catalina.servlets; + + +public class TestDefaultServletEncodingWithBom extends DefaultServletEncodingBaseTest { + + @Override + protected boolean getUseBom() { + return true; + } +} diff --git a/test/org/apache/catalina/servlets/TestDefaultServletEncodingWithoutBom.java b/test/org/apache/catalina/servlets/TestDefaultServletEncodingWithoutBom.java new file mode 100644 index 0000000..9deec1d --- /dev/null +++ b/test/org/apache/catalina/servlets/TestDefaultServletEncodingWithoutBom.java @@ -0,0 +1,26 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.catalina.servlets; + + +public class TestDefaultServletEncodingWithoutBom extends DefaultServletEncodingBaseTest { + + @Override + protected boolean getUseBom() { + return false; + } +} diff --git a/test/webapp/404.html b/test/webapp/404.html new file mode 100644 index 0000000..50c55f6 --- /dev/null +++ b/test/webapp/404.html @@ -0,0 +1 @@ +It is 404.html \ No newline at end of file diff --git a/test/webapp/bug49nnn/bug49464-cp1252.txt b/test/webapp/bug49nnn/bug49464-cp1252.txt new file mode 100644 index 0000000..9c95a6b --- /dev/null +++ b/test/webapp/bug49nnn/bug49464-cp1252.txt @@ -0,0 +1 @@ +� \ No newline at end of file diff --git a/test/webapp/bug49nnn/bug49464-ibm850.txt b/test/webapp/bug49nnn/bug49464-ibm850.txt new file mode 100644 index 0000000..f982586 --- /dev/null +++ b/test/webapp/bug49nnn/bug49464-ibm850.txt @@ -0,0 +1 @@ +� \ No newline at end of file diff --git a/test/webapp/bug49nnn/bug49464-iso-8859-1.txt b/test/webapp/bug49nnn/bug49464-iso-8859-1.txt new file mode 100644 index 0000000..9c95a6b --- /dev/null +++ b/test/webapp/bug49nnn/bug49464-iso-8859-1.txt @@ -0,0 +1 @@ +� \ No newline at end of file diff --git a/test/webapp/bug49nnn/bug49464-utf-8-bom.txt b/test/webapp/bug49nnn/bug49464-utf-8-bom.txt new file mode 100644 index 0000000..6ee21c1 --- /dev/null +++ b/test/webapp/bug49nnn/bug49464-utf-8-bom.txt @@ -0,0 +1 @@ +½ \ No newline at end of file diff --git a/test/webapp/bug49nnn/bug49464-utf-8.txt b/test/webapp/bug49nnn/bug49464-utf-8.txt new file mode 100644 index 0000000..66f5eb7 --- /dev/null +++ b/test/webapp/bug49nnn/bug49464-utf-8.txt @@ -0,0 +1 @@ +½ \ No newline at end of file diff --git a/webapps/docs/changelog.xml b/webapps/docs/changelog.xml index 67ffece..8328fb2 100644 --- a/webapps/docs/changelog.xml +++ b/webapps/docs/changelog.xml @@ -65,6 +65,11 @@ rather than fails (with a 500 response). This enables Tomcat to pass two additional tests from the Litmus WebDAV test suite. (markt) </fix> + <fix> + <bug>49464</bug>: Improve the Default Servlet's handling of static files + when the file encoding is not compatible with the required response + encoding. (markt) + </fix> </changelog> </subsection> <subsection name="Jasper"> --------------------------------------------------------------------- To unsubscribe, e-mail: dev-unsubscr...@tomcat.apache.org For additional commands, e-mail: dev-h...@tomcat.apache.org