Hi Mark, On Thu, Dec 15, 2016 at 10:34 PM, <ma...@apache.org> wrote:
> Author: markt > Date: Thu Dec 15 21:34:06 2016 > New Revision: 1774526 > > URL: http://svn.apache.org/viewvc?rev=1774526&view=rev > Log: > Add a new encoding detector implementation. > The BoM encoding detection is based in the previous code. > The prolog encoding detection delegates to the JRE's XM<L parser rather > than the custom Jasper parser. > > Added: > tomcat/trunk/java/org/apache/jasper/compiler/EncodingDetector.java > (with props) > Modified: > tomcat/trunk/java/org/apache/jasper/security/SecurityClassLoad.java > > Added: tomcat/trunk/java/org/apache/jasper/compiler/EncodingDetector.java > URL: http://svn.apache.org/viewvc/tomcat/trunk/java/org/apache/ > jasper/compiler/EncodingDetector.java?rev=1774526&view=auto > ============================================================ > ================== > --- tomcat/trunk/java/org/apache/jasper/compiler/EncodingDetector.java > (added) > +++ tomcat/trunk/java/org/apache/jasper/compiler/EncodingDetector.java > Thu Dec 15 21:34:06 2016 > @@ -0,0 +1,214 @@ > +/* > + * Licensed to the Apache Software Foundation (ASF) under one or more > + * contributor license agreements. See the NOTICE file distributed with > + * this work for additional information regarding copyright ownership. > + * The ASF licenses this file to You under the Apache License, Version 2.0 > + * (the "License"); you may not use this file except in compliance with > + * the License. You may obtain a copy of the License at > + * > + * http://www.apache.org/licenses/LICENSE-2.0 > + * > + * Unless required by applicable law or agreed to in writing, software > + * distributed under the License is distributed on an "AS IS" BASIS, > + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or > implied. > + * See the License for the specific language governing permissions and > + * limitations under the License. > + */ > +package org.apache.jasper.compiler; > + > +import java.io.BufferedInputStream; > +import java.io.IOException; > +import java.io.InputStream; > + > +import javax.xml.stream.XMLInputFactory; > +import javax.xml.stream.XMLStreamException; > +import javax.xml.stream.XMLStreamReader; > + > +/* > + * The BoM detection is derived from: > + * http://svn.us.apache.org/viewvc/tomcat/trunk/java/org/ > apache/jasper/xmlparser/XMLEncodingDetector.java?annotate=1742248 > + */ > +class EncodingDetector { > + > + private static final XMLInputFactory XML_INPUT_FACTORY; > + static { > + XML_INPUT_FACTORY = XMLInputFactory.newFactory(); > + } > + > + private final BomResult bomResult; > + private final String prologEncoding; > + > + > + /* > + * TODO: Refactor Jasper InputStream creation and handling so the > + * InputStream passed to this method is buffered and therefore > saves > + * on multiple opening and re-opening of the same file. > + */ > + EncodingDetector(InputStream is) throws IOException { > + // Keep buffer size to a minimum here. BoM will be no more than 4 > bytes > + // so that is the maximum we need to buffer > + BufferedInputStream bis = new BufferedInputStream(is, 4); > + bis.mark(4); > + > + bomResult = processBom(bis); > + > + // Reset the stream back to the start to allow the XML prolog > detection > + // to work. Skip any BoM we discovered. > + bis.reset(); > + if (bomResult != null) { > + for (int i = 0; i < bomResult.skip; i++) { > + is.read(); > + } > + } > + > + prologEncoding = getPrologEncoding(bis); > + } > + > + > + String getBomEncoding() { > + return bomResult.encoding; > + } > + > + > + Boolean getBigEndian() { > + return bomResult.bigEndian; > + } > + > + > + int getSkip() { > + return bomResult.skip; > + } > + > + > + String getPrologEncoding() { > + return prologEncoding; > + } > + > + > + private String getPrologEncoding(InputStream stream) { > + String encoding = null; > + try { > + XMLStreamReader xmlStreamReader = XML_INPUT_FACTORY. > createXMLStreamReader(stream); > + encoding = xmlStreamReader.getCharacterEncodingScheme(); > + } catch (XMLStreamException e) { > + // Ignore > + } > + return encoding; > + } > + > + > + private BomResult processBom(InputStream stream) { > + // Read first four bytes (or as many are available) and determine > + // encoding > + try { > + final byte[] b4 = new byte[4]; > + int count = 0; > + int singleByteRead; > + while (count < 4) { > + singleByteRead = stream.read(); > + if (singleByteRead == -1) { > + break; > + } > + b4[count] = (byte) singleByteRead; > + count++; > + } > + > + return parseBom(b4, count); > + } catch (IOException ioe) { > + // Failed. > + return new BomResult("UTF-8", null, 0); > + } > + } > + > + > + private BomResult parseBom(byte[] b4, int count) { > + > + if (count < 2) { > + return new BomResult("UTF-8", null, 0); > + } > + > + // UTF-16, with BOM > + int b0 = b4[0] & 0xFF; > + int b1 = b4[1] & 0xFF; > + if (b0 == 0xFE && b1 == 0xFF) { > + // UTF-16, big-endian > + return new BomResult("UTF-16BE", Boolean.TRUE, 2); > + } > + if (b0 == 0xFF && b1 == 0xFE) { > + // UTF-16, little-endian > + return new BomResult("UTF-16LE", Boolean.FALSE, 2); > + } > + > + // default to UTF-8 if we don't have enough bytes to make a > + // good determination of the encoding > + if (count < 3) { > + return new BomResult("UTF-8", null, 0); > + } > + > + // UTF-8 with a BOM > + int b2 = b4[2] & 0xFF; > + if (b0 == 0xEF && b1 == 0xBB && b2 == 0xBF) { > + return new BomResult("UTF-8", null, 3); > + } > + > + // default to UTF-8 if we don't have enough bytes to make a > + // good determination of the encoding > + if (count < 4) { > + return new BomResult("UTF-8", null, 0); > + } > + > + // other encodings > + int b3 = b4[3] & 0xFF; > + if (b0 == 0x00 && b1 == 0x00 && b2 == 0x00 && b3 == 0x3C) { > + // UCS-4, big endian (1234) > + return new BomResult("ISO-10646-UCS-4", Boolean.TRUE, 4); > + } > + if (b0 == 0x3C && b1 == 0x00 && b2 == 0x00 && b3 == 0x00) { > + // UCS-4, little endian (4321) > + return new BomResult("ISO-10646-UCS-4", Boolean.FALSE, 4); > + } > + if (b0 == 0x00 && b1 == 0x00 && b2 == 0x3C && b3 == 0x00) { > + // UCS-4, unusual octet order (2143) > + // REVISIT: What should this be? > + return new BomResult("ISO-10646-UCS-4", null, 4); > + } > + if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x00) { > + // UCS-4, unusual octect order (3412) > + // REVISIT: What should this be? > + return new BomResult("ISO-10646-UCS-4", null, 4); > + } > + if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x3F) { > + // UTF-16, big-endian, no BOM > + // (or could turn out to be UCS-2... > + // REVISIT: What should this be? > + return new BomResult("UTF-16BE", Boolean.TRUE, 4); > + } > + if (b0 == 0x3C && b1 == 0x00 && b2 == 0x3F && b3 == 0x00) { > + // UTF-16, little-endian, no BOM > + // (or could turn out to be UCS-2... > + return new BomResult("UTF-16LE", Boolean.FALSE, 4); > + } > + if (b0 == 0x4C && b1 == 0x6F && b2 == 0xA7 && b3 == 0x94) { > + // EBCDIC > + // a la xerces1, return CP037 instead of EBCDIC here > + return new BomResult("CP037", null, 4); > + } > + > + // default encoding > + return new BomResult("UTF-8", null, 0); > + } > + > + > + private static class BomResult { > + > + public final String encoding; > + public final Boolean bigEndian; > + public final int skip; > + > + public BomResult(String encoding, Boolean bigEndian, int skip) { > + this.encoding = encoding; > + this.bigEndian = bigEndian; > + this.skip = skip; > + } > + } > +} > > Propchange: tomcat/trunk/java/org/apache/jasper/compiler/ > EncodingDetector.java > ------------------------------------------------------------ > ------------------ > svn:eol-style = native > > Modified: tomcat/trunk/java/org/apache/jasper/security/ > SecurityClassLoad.java > URL: http://svn.apache.org/viewvc/tomcat/trunk/java/org/apache/ > jasper/security/SecurityClassLoad.java?rev=1774526&r1=1774525&r2=1774526& > view=diff > ============================================================ > ================== > --- tomcat/trunk/java/org/apache/jasper/security/SecurityClassLoad.java > (original) > +++ tomcat/trunk/java/org/apache/jasper/security/SecurityClassLoad.java > Thu Dec 15 21:34:06 2016 > @@ -39,6 +39,10 @@ public final class SecurityClassLoad { > > final String basePackage = "org.apache.jasper."; > try { > + // Ensure XMLInputFactory is loaded with Tomcat's class loader > + loader.loadClass( basePackage + > + "comppiler.EncodingDetector"); > There is one 'p' too much in "comppiler.EncodingDetector > + > loader.loadClass( basePackage + > "runtime.JspFactoryImpl$PrivilegedGetPageContext"); > loader.loadClass( basePackage + > > > > --------------------------------------------------------------------- > To unsubscribe, e-mail: dev-unsubscr...@tomcat.apache.org > For additional commands, e-mail: dev-h...@tomcat.apache.org > >