This is an automated email from the ASF dual-hosted git repository.
markt pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/tomcat.git
The following commit(s) were added to refs/heads/master by this push:
new 8607e1a Improve BoM detection for rarely used UTF-32
8607e1a is described below
commit 8607e1a0d2c283e443ce1ba2ccfb55b1884a580e
Author: Mark Thomas <[email protected]>
AuthorDate: Tue May 14 14:50:43 2019 +0100
Improve BoM detection for rarely used UTF-32
Identified by Coverity Scan which reported unreachable code.
---
java/org/apache/catalina/servlets/DefaultServlet.java | 12 +++++++++++-
webapps/docs/changelog.xml | 5 +++++
2 files changed, 16 insertions(+), 1 deletion(-)
diff --git a/java/org/apache/catalina/servlets/DefaultServlet.java
b/java/org/apache/catalina/servlets/DefaultServlet.java
index 205d302..2e669d4 100644
--- a/java/org/apache/catalina/servlets/DefaultServlet.java
+++ b/java/org/apache/catalina/servlets/DefaultServlet.java
@@ -1212,7 +1212,9 @@ public class DefaultServlet extends HttpServlet {
skip(is, 2);
return StandardCharsets.UTF_16BE;
}
- if (b0 == 0xFF && b1 == 0xFE) {
+ // Delay the UTF_16LE check if there are more that 2 bytes since it
+ // overlaps with UTF32-LE.
+ if (count == 2 && b0 == 0xFF && b1 == 0xFE) {
skip(is, 2);
return StandardCharsets.UTF_16LE;
}
@@ -1244,6 +1246,14 @@ public class DefaultServlet extends HttpServlet {
return Charset.forName("UTF32-LE");
}
+ // Now we can check for UTF16-LE. There is an assumption here that we
+ // won't see a UTF16-LE file with a BOM where the first real data is
+ // 0x00 0x00
+ if (b0 == 0xFF && b1 == 0xFE) {
+ skip(is, 2);
+ return StandardCharsets.UTF_16LE;
+ }
+
skip(is, 0);
return null;
}
diff --git a/webapps/docs/changelog.xml b/webapps/docs/changelog.xml
index 92f2aa0..4f83bb2 100644
--- a/webapps/docs/changelog.xml
+++ b/webapps/docs/changelog.xml
@@ -63,6 +63,11 @@
Make a best efforts attempt to clean-up if a request fails during
processing due to an <code>OutOfMemoryException</code>. (markt)
</fix>
+ <fix>
+ Improve the BoM detection for static files handled by the default
+ servlet for the rarely used UTF-32 encodings. Identified by Coverity
+ Scan. (markt)
+ </fix>
</changelog>
</subsection>
<subsection name="Coyote">
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]