Hi Mark,

On Thu, Dec 15, 2016 at 10:34 PM, <ma...@apache.org> wrote:

> Author: markt
> Date: Thu Dec 15 21:34:06 2016
> New Revision: 1774526
>
> URL: http://svn.apache.org/viewvc?rev=1774526&view=rev
> Log:
> Add a new encoding detector implementation.
> The BoM encoding detection is based in the previous code.
> The prolog encoding detection delegates to the JRE's XM<L parser rather
> than the custom Jasper parser.
>
> Added:
>     tomcat/trunk/java/org/apache/jasper/compiler/EncodingDetector.java
>  (with props)
> Modified:
>     tomcat/trunk/java/org/apache/jasper/security/SecurityClassLoad.java
>
> Added: tomcat/trunk/java/org/apache/jasper/compiler/EncodingDetector.java
> URL: http://svn.apache.org/viewvc/tomcat/trunk/java/org/apache/
> jasper/compiler/EncodingDetector.java?rev=1774526&view=auto
> ============================================================
> ==================
> --- tomcat/trunk/java/org/apache/jasper/compiler/EncodingDetector.java
> (added)
> +++ tomcat/trunk/java/org/apache/jasper/compiler/EncodingDetector.java
> Thu Dec 15 21:34:06 2016
> @@ -0,0 +1,214 @@
> +/*
> + * Licensed to the Apache Software Foundation (ASF) under one or more
> + * contributor license agreements.  See the NOTICE file distributed with
> + * this work for additional information regarding copyright ownership.
> + * The ASF licenses this file to You under the Apache License, Version 2.0
> + * (the "License"); you may not use this file except in compliance with
> + * the License.  You may obtain a copy of the License at
> + *
> + *      http://www.apache.org/licenses/LICENSE-2.0
> + *
> + * Unless required by applicable law or agreed to in writing, software
> + * distributed under the License is distributed on an "AS IS" BASIS,
> + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
> implied.
> + * See the License for the specific language governing permissions and
> + * limitations under the License.
> + */
> +package org.apache.jasper.compiler;
> +
> +import java.io.BufferedInputStream;
> +import java.io.IOException;
> +import java.io.InputStream;
> +
> +import javax.xml.stream.XMLInputFactory;
> +import javax.xml.stream.XMLStreamException;
> +import javax.xml.stream.XMLStreamReader;
> +
> +/*
> + * The BoM detection is derived from:
> + * http://svn.us.apache.org/viewvc/tomcat/trunk/java/org/
> apache/jasper/xmlparser/XMLEncodingDetector.java?annotate=1742248
> + */
> +class EncodingDetector {
> +
> +    private static final XMLInputFactory XML_INPUT_FACTORY;
> +    static {
> +        XML_INPUT_FACTORY = XMLInputFactory.newFactory();
> +    }
> +
> +    private final BomResult bomResult;
> +    private final String prologEncoding;
> +
> +
> +    /*
> +     * TODO: Refactor Jasper InputStream creation and handling so the
> +     *       InputStream passed to this method is buffered and therefore
> saves
> +     *       on multiple opening and re-opening of the same file.
> +     */
> +    EncodingDetector(InputStream is) throws IOException {
> +        // Keep buffer size to a minimum here. BoM will be no more than 4
> bytes
> +        // so that is the maximum we need to buffer
> +        BufferedInputStream bis = new BufferedInputStream(is, 4);
> +        bis.mark(4);
> +
> +        bomResult = processBom(bis);
> +
> +        // Reset the stream back to the start to allow the XML prolog
> detection
> +        // to work. Skip any BoM we discovered.
> +        bis.reset();
> +        if (bomResult != null) {
> +            for (int i = 0; i < bomResult.skip; i++) {
> +                is.read();
> +            }
> +        }
> +
> +        prologEncoding = getPrologEncoding(bis);
> +    }
> +
> +
> +    String getBomEncoding() {
> +        return bomResult.encoding;
> +    }
> +
> +
> +    Boolean getBigEndian() {
> +        return bomResult.bigEndian;
> +    }
> +
> +
> +    int getSkip() {
> +        return bomResult.skip;
> +    }
> +
> +
> +    String getPrologEncoding() {
> +        return prologEncoding;
> +    }
> +
> +
> +    private String getPrologEncoding(InputStream stream) {
> +        String encoding = null;
> +        try {
> +            XMLStreamReader xmlStreamReader = XML_INPUT_FACTORY.
> createXMLStreamReader(stream);
> +            encoding = xmlStreamReader.getCharacterEncodingScheme();
> +        } catch (XMLStreamException e) {
> +            // Ignore
> +        }
> +        return encoding;
> +    }
> +
> +
> +    private BomResult processBom(InputStream stream) {
> +        // Read first four bytes (or as many are available) and determine
> +        // encoding
> +        try {
> +            final byte[] b4 = new byte[4];
> +            int count = 0;
> +            int singleByteRead;
> +            while (count < 4) {
> +                singleByteRead = stream.read();
> +                if (singleByteRead == -1) {
> +                    break;
> +                }
> +                b4[count] = (byte) singleByteRead;
> +                count++;
> +            }
> +
> +            return parseBom(b4, count);
> +        } catch (IOException ioe) {
> +            // Failed.
> +            return new BomResult("UTF-8", null,  0);
> +        }
> +    }
> +
> +
> +    private BomResult parseBom(byte[] b4, int count) {
> +
> +        if (count < 2) {
> +            return new BomResult("UTF-8", null,  0);
> +        }
> +
> +        // UTF-16, with BOM
> +        int b0 = b4[0] & 0xFF;
> +        int b1 = b4[1] & 0xFF;
> +        if (b0 == 0xFE && b1 == 0xFF) {
> +            // UTF-16, big-endian
> +            return new BomResult("UTF-16BE", Boolean.TRUE, 2);
> +        }
> +        if (b0 == 0xFF && b1 == 0xFE) {
> +            // UTF-16, little-endian
> +            return new BomResult("UTF-16LE", Boolean.FALSE, 2);
> +        }
> +
> +        // default to UTF-8 if we don't have enough bytes to make a
> +        // good determination of the encoding
> +        if (count < 3) {
> +            return new BomResult("UTF-8", null,  0);
> +        }
> +
> +        // UTF-8 with a BOM
> +        int b2 = b4[2] & 0xFF;
> +        if (b0 == 0xEF && b1 == 0xBB && b2 == 0xBF) {
> +            return new BomResult("UTF-8", null, 3);
> +        }
> +
> +        // default to UTF-8 if we don't have enough bytes to make a
> +        // good determination of the encoding
> +        if (count < 4) {
> +            return new BomResult("UTF-8", null,  0);
> +        }
> +
> +        // other encodings
> +        int b3 = b4[3] & 0xFF;
> +        if (b0 == 0x00 && b1 == 0x00 && b2 == 0x00 && b3 == 0x3C) {
> +            // UCS-4, big endian (1234)
> +            return new BomResult("ISO-10646-UCS-4", Boolean.TRUE, 4);
> +        }
> +        if (b0 == 0x3C && b1 == 0x00 && b2 == 0x00 && b3 == 0x00) {
> +            // UCS-4, little endian (4321)
> +            return new BomResult("ISO-10646-UCS-4", Boolean.FALSE, 4);
> +        }
> +        if (b0 == 0x00 && b1 == 0x00 && b2 == 0x3C && b3 == 0x00) {
> +            // UCS-4, unusual octet order (2143)
> +            // REVISIT: What should this be?
> +            return new BomResult("ISO-10646-UCS-4", null, 4);
> +        }
> +        if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x00) {
> +            // UCS-4, unusual octect order (3412)
> +            // REVISIT: What should this be?
> +            return new BomResult("ISO-10646-UCS-4", null, 4);
> +        }
> +        if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x3F) {
> +            // UTF-16, big-endian, no BOM
> +            // (or could turn out to be UCS-2...
> +            // REVISIT: What should this be?
> +            return new BomResult("UTF-16BE", Boolean.TRUE, 4);
> +        }
> +        if (b0 == 0x3C && b1 == 0x00 && b2 == 0x3F && b3 == 0x00) {
> +            // UTF-16, little-endian, no BOM
> +            // (or could turn out to be UCS-2...
> +            return new BomResult("UTF-16LE", Boolean.FALSE, 4);
> +        }
> +        if (b0 == 0x4C && b1 == 0x6F && b2 == 0xA7 && b3 == 0x94) {
> +            // EBCDIC
> +            // a la xerces1, return CP037 instead of EBCDIC here
> +            return new BomResult("CP037", null, 4);
> +        }
> +
> +        // default encoding
> +        return new BomResult("UTF-8", null,  0);
> +    }
> +
> +
> +    private static class BomResult {
> +
> +        public final String encoding;
> +        public final Boolean bigEndian;
> +        public final int skip;
> +
> +        public BomResult(String encoding,  Boolean bigEndian, int skip) {
> +            this.encoding = encoding;
> +            this.bigEndian = bigEndian;
> +            this.skip = skip;
> +        }
> +    }
> +}
>
> Propchange: tomcat/trunk/java/org/apache/jasper/compiler/
> EncodingDetector.java
> ------------------------------------------------------------
> ------------------
>     svn:eol-style = native
>
> Modified: tomcat/trunk/java/org/apache/jasper/security/
> SecurityClassLoad.java
> URL: http://svn.apache.org/viewvc/tomcat/trunk/java/org/apache/
> jasper/security/SecurityClassLoad.java?rev=1774526&r1=1774525&r2=1774526&
> view=diff
> ============================================================
> ==================
> --- tomcat/trunk/java/org/apache/jasper/security/SecurityClassLoad.java
> (original)
> +++ tomcat/trunk/java/org/apache/jasper/security/SecurityClassLoad.java
> Thu Dec 15 21:34:06 2016
> @@ -39,6 +39,10 @@ public final class SecurityClassLoad {
>
>          final String basePackage = "org.apache.jasper.";
>          try {
> +            // Ensure XMLInputFactory is loaded with Tomcat's class loader
> +            loader.loadClass( basePackage +
> +                    "comppiler.EncodingDetector");
>

There is one 'p' too much in "comppiler.EncodingDetector


> +
>              loader.loadClass( basePackage +
>                  "runtime.JspFactoryImpl$PrivilegedGetPageContext");
>              loader.loadClass( basePackage +
>
>
>
> ---------------------------------------------------------------------
> To unsubscribe, e-mail: dev-unsubscr...@tomcat.apache.org
> For additional commands, e-mail: dev-h...@tomcat.apache.org
>
>

Reply via email to