Author: markt
Date: Thu Dec 15 21:34:06 2016
New Revision: 1774526

URL: http://svn.apache.org/viewvc?rev=1774526&view=rev
Log:
Add a new encoding detector implementation.
The BoM encoding detection is based in the previous code.
The prolog encoding detection delegates to the JRE's XM<L parser rather than 
the custom Jasper parser.

Added:
    tomcat/trunk/java/org/apache/jasper/compiler/EncodingDetector.java   (with 
props)
Modified:
    tomcat/trunk/java/org/apache/jasper/security/SecurityClassLoad.java

Added: tomcat/trunk/java/org/apache/jasper/compiler/EncodingDetector.java
URL: 
http://svn.apache.org/viewvc/tomcat/trunk/java/org/apache/jasper/compiler/EncodingDetector.java?rev=1774526&view=auto
==============================================================================
--- tomcat/trunk/java/org/apache/jasper/compiler/EncodingDetector.java (added)
+++ tomcat/trunk/java/org/apache/jasper/compiler/EncodingDetector.java Thu Dec 
15 21:34:06 2016
@@ -0,0 +1,214 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.jasper.compiler;
+
+import java.io.BufferedInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+
+import javax.xml.stream.XMLInputFactory;
+import javax.xml.stream.XMLStreamException;
+import javax.xml.stream.XMLStreamReader;
+
+/*
+ * The BoM detection is derived from:
+ * 
http://svn.us.apache.org/viewvc/tomcat/trunk/java/org/apache/jasper/xmlparser/XMLEncodingDetector.java?annotate=1742248
+ */
+class EncodingDetector {
+
+    private static final XMLInputFactory XML_INPUT_FACTORY;
+    static {
+        XML_INPUT_FACTORY = XMLInputFactory.newFactory();
+    }
+
+    private final BomResult bomResult;
+    private final String prologEncoding;
+
+
+    /*
+     * TODO: Refactor Jasper InputStream creation and handling so the
+     *       InputStream passed to this method is buffered and therefore saves
+     *       on multiple opening and re-opening of the same file.
+     */
+    EncodingDetector(InputStream is) throws IOException {
+        // Keep buffer size to a minimum here. BoM will be no more than 4 bytes
+        // so that is the maximum we need to buffer
+        BufferedInputStream bis = new BufferedInputStream(is, 4);
+        bis.mark(4);
+
+        bomResult = processBom(bis);
+
+        // Reset the stream back to the start to allow the XML prolog detection
+        // to work. Skip any BoM we discovered.
+        bis.reset();
+        if (bomResult != null) {
+            for (int i = 0; i < bomResult.skip; i++) {
+                is.read();
+            }
+        }
+
+        prologEncoding = getPrologEncoding(bis);
+    }
+
+
+    String getBomEncoding() {
+        return bomResult.encoding;
+    }
+
+
+    Boolean getBigEndian() {
+        return bomResult.bigEndian;
+    }
+
+
+    int getSkip() {
+        return bomResult.skip;
+    }
+
+
+    String getPrologEncoding() {
+        return prologEncoding;
+    }
+
+
+    private String getPrologEncoding(InputStream stream) {
+        String encoding = null;
+        try {
+            XMLStreamReader xmlStreamReader = 
XML_INPUT_FACTORY.createXMLStreamReader(stream);
+            encoding = xmlStreamReader.getCharacterEncodingScheme();
+        } catch (XMLStreamException e) {
+            // Ignore
+        }
+        return encoding;
+    }
+
+
+    private BomResult processBom(InputStream stream) {
+        // Read first four bytes (or as many are available) and determine
+        // encoding
+        try {
+            final byte[] b4 = new byte[4];
+            int count = 0;
+            int singleByteRead;
+            while (count < 4) {
+                singleByteRead = stream.read();
+                if (singleByteRead == -1) {
+                    break;
+                }
+                b4[count] = (byte) singleByteRead;
+                count++;
+            }
+
+            return parseBom(b4, count);
+        } catch (IOException ioe) {
+            // Failed.
+            return new BomResult("UTF-8", null,  0);
+        }
+    }
+
+
+    private BomResult parseBom(byte[] b4, int count) {
+
+        if (count < 2) {
+            return new BomResult("UTF-8", null,  0);
+        }
+
+        // UTF-16, with BOM
+        int b0 = b4[0] & 0xFF;
+        int b1 = b4[1] & 0xFF;
+        if (b0 == 0xFE && b1 == 0xFF) {
+            // UTF-16, big-endian
+            return new BomResult("UTF-16BE", Boolean.TRUE, 2);
+        }
+        if (b0 == 0xFF && b1 == 0xFE) {
+            // UTF-16, little-endian
+            return new BomResult("UTF-16LE", Boolean.FALSE, 2);
+        }
+
+        // default to UTF-8 if we don't have enough bytes to make a
+        // good determination of the encoding
+        if (count < 3) {
+            return new BomResult("UTF-8", null,  0);
+        }
+
+        // UTF-8 with a BOM
+        int b2 = b4[2] & 0xFF;
+        if (b0 == 0xEF && b1 == 0xBB && b2 == 0xBF) {
+            return new BomResult("UTF-8", null, 3);
+        }
+
+        // default to UTF-8 if we don't have enough bytes to make a
+        // good determination of the encoding
+        if (count < 4) {
+            return new BomResult("UTF-8", null,  0);
+        }
+
+        // other encodings
+        int b3 = b4[3] & 0xFF;
+        if (b0 == 0x00 && b1 == 0x00 && b2 == 0x00 && b3 == 0x3C) {
+            // UCS-4, big endian (1234)
+            return new BomResult("ISO-10646-UCS-4", Boolean.TRUE, 4);
+        }
+        if (b0 == 0x3C && b1 == 0x00 && b2 == 0x00 && b3 == 0x00) {
+            // UCS-4, little endian (4321)
+            return new BomResult("ISO-10646-UCS-4", Boolean.FALSE, 4);
+        }
+        if (b0 == 0x00 && b1 == 0x00 && b2 == 0x3C && b3 == 0x00) {
+            // UCS-4, unusual octet order (2143)
+            // REVISIT: What should this be?
+            return new BomResult("ISO-10646-UCS-4", null, 4);
+        }
+        if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x00) {
+            // UCS-4, unusual octect order (3412)
+            // REVISIT: What should this be?
+            return new BomResult("ISO-10646-UCS-4", null, 4);
+        }
+        if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x3F) {
+            // UTF-16, big-endian, no BOM
+            // (or could turn out to be UCS-2...
+            // REVISIT: What should this be?
+            return new BomResult("UTF-16BE", Boolean.TRUE, 4);
+        }
+        if (b0 == 0x3C && b1 == 0x00 && b2 == 0x3F && b3 == 0x00) {
+            // UTF-16, little-endian, no BOM
+            // (or could turn out to be UCS-2...
+            return new BomResult("UTF-16LE", Boolean.FALSE, 4);
+        }
+        if (b0 == 0x4C && b1 == 0x6F && b2 == 0xA7 && b3 == 0x94) {
+            // EBCDIC
+            // a la xerces1, return CP037 instead of EBCDIC here
+            return new BomResult("CP037", null, 4);
+        }
+
+        // default encoding
+        return new BomResult("UTF-8", null,  0);
+    }
+
+
+    private static class BomResult {
+
+        public final String encoding;
+        public final Boolean bigEndian;
+        public final int skip;
+
+        public BomResult(String encoding,  Boolean bigEndian, int skip) {
+            this.encoding = encoding;
+            this.bigEndian = bigEndian;
+            this.skip = skip;
+        }
+    }
+}

Propchange: tomcat/trunk/java/org/apache/jasper/compiler/EncodingDetector.java
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: tomcat/trunk/java/org/apache/jasper/security/SecurityClassLoad.java
URL: 
http://svn.apache.org/viewvc/tomcat/trunk/java/org/apache/jasper/security/SecurityClassLoad.java?rev=1774526&r1=1774525&r2=1774526&view=diff
==============================================================================
--- tomcat/trunk/java/org/apache/jasper/security/SecurityClassLoad.java 
(original)
+++ tomcat/trunk/java/org/apache/jasper/security/SecurityClassLoad.java Thu Dec 
15 21:34:06 2016
@@ -39,6 +39,10 @@ public final class SecurityClassLoad {
 
         final String basePackage = "org.apache.jasper.";
         try {
+            // Ensure XMLInputFactory is loaded with Tomcat's class loader
+            loader.loadClass( basePackage +
+                    "comppiler.EncodingDetector");
+
             loader.loadClass( basePackage +
                 "runtime.JspFactoryImpl$PrivilegedGetPageContext");
             loader.loadClass( basePackage +



---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscr...@tomcat.apache.org
For additional commands, e-mail: dev-h...@tomcat.apache.org

Reply via email to