Author: remm Date: Wed Nov 1 19:50:14 2006 New Revision: 470216 URL: http://svn.apache.org/viewvc?view=rev&rev=470216 Log: - i18n handling fixes (according to what the spec authors think is what should be done, at least).
Modified: tomcat/tc6.0.x/trunk/java/org/apache/jasper/compiler/JspDocumentParser.java tomcat/tc6.0.x/trunk/java/org/apache/jasper/compiler/Node.java tomcat/tc6.0.x/trunk/java/org/apache/jasper/compiler/Parser.java tomcat/tc6.0.x/trunk/java/org/apache/jasper/compiler/ParserController.java tomcat/tc6.0.x/trunk/java/org/apache/jasper/compiler/Validator.java tomcat/tc6.0.x/trunk/java/org/apache/jasper/runtime/PageContextImpl.java tomcat/tc6.0.x/trunk/java/org/apache/jasper/xmlparser/XMLEncodingDetector.java Modified: tomcat/tc6.0.x/trunk/java/org/apache/jasper/compiler/JspDocumentParser.java URL: http://svn.apache.org/viewvc/tomcat/tc6.0.x/trunk/java/org/apache/jasper/compiler/JspDocumentParser.java?view=diff&rev=470216&r1=470215&r2=470216 ============================================================================== --- tomcat/tc6.0.x/trunk/java/org/apache/jasper/compiler/JspDocumentParser.java (original) +++ tomcat/tc6.0.x/trunk/java/org/apache/jasper/compiler/JspDocumentParser.java Wed Nov 1 19:50:14 2006 @@ -143,7 +143,8 @@ boolean directivesOnly, String pageEnc, String jspConfigPageEnc, - boolean isEncodingSpecifiedInProlog) + boolean isEncodingSpecifiedInProlog, + boolean isBomPresent) throws JasperException { JspDocumentParser jspDocParser = @@ -158,6 +159,7 @@ dummyRoot.setJspConfigPageEncoding(jspConfigPageEnc); dummyRoot.setIsEncodingSpecifiedInProlog( isEncodingSpecifiedInProlog); + dummyRoot.setIsBomPresent(isBomPresent); jspDocParser.current = dummyRoot; if (parent == null) { jspDocParser.addInclude( Modified: tomcat/tc6.0.x/trunk/java/org/apache/jasper/compiler/Node.java URL: http://svn.apache.org/viewvc/tomcat/tc6.0.x/trunk/java/org/apache/jasper/compiler/Node.java?view=diff&rev=470216&r1=470215&r2=470216 ============================================================================== --- tomcat/tc6.0.x/trunk/java/org/apache/jasper/compiler/Node.java (original) +++ tomcat/tc6.0.x/trunk/java/org/apache/jasper/compiler/Node.java Wed Nov 1 19:50:14 2006 @@ -464,6 +464,12 @@ private boolean isEncodingSpecifiedInProlog; /* + * Indicates whether an encoding has been explicitly specified in the + * page's bom. + */ + private boolean isBomPresent; + + /* * Constructor. */ Root(Mark start, Node parent, boolean isXmlSyntax) { @@ -525,6 +531,14 @@ public boolean isEncodingSpecifiedInProlog() { return isEncodingSpecifiedInProlog; + } + + public void setIsBomPresent(boolean isBom) { + isBomPresent = isBom; + } + + public boolean isBomPresent() { + return isBomPresent; } /** Modified: tomcat/tc6.0.x/trunk/java/org/apache/jasper/compiler/Parser.java URL: http://svn.apache.org/viewvc/tomcat/tc6.0.x/trunk/java/org/apache/jasper/compiler/Parser.java?view=diff&rev=470216&r1=470215&r2=470216 ============================================================================== --- tomcat/tc6.0.x/trunk/java/org/apache/jasper/compiler/Parser.java (original) +++ tomcat/tc6.0.x/trunk/java/org/apache/jasper/compiler/Parser.java Wed Nov 1 19:50:14 2006 @@ -108,7 +108,7 @@ public static Node.Nodes parse(ParserController pc, JspReader reader, Node parent, boolean isTagFile, boolean directivesOnly, URL jarFileUrl, String pageEnc, String jspConfigPageEnc, - boolean isDefaultPageEncoding) throws JasperException { + boolean isDefaultPageEncoding, boolean isBomPresent) throws JasperException { Parser parser = new Parser(pc, reader, isTagFile, directivesOnly, jarFileUrl); @@ -117,6 +117,7 @@ root.setPageEncoding(pageEnc); root.setJspConfigPageEncoding(jspConfigPageEnc); root.setIsDefaultPageEncoding(isDefaultPageEncoding); + root.setIsBomPresent(isBomPresent); if (directivesOnly) { parser.parseTagFileDirectives(root); Modified: tomcat/tc6.0.x/trunk/java/org/apache/jasper/compiler/ParserController.java URL: http://svn.apache.org/viewvc/tomcat/tc6.0.x/trunk/java/org/apache/jasper/compiler/ParserController.java?view=diff&rev=470216&r1=470215&r2=470216 ============================================================================== --- tomcat/tc6.0.x/trunk/java/org/apache/jasper/compiler/ParserController.java (original) +++ tomcat/tc6.0.x/trunk/java/org/apache/jasper/compiler/ParserController.java Wed Nov 1 19:50:14 2006 @@ -61,6 +61,7 @@ private Stack baseDirStack = new Stack(); private boolean isEncodingSpecifiedInProlog; + private boolean isBomPresent; private String sourceEnc; @@ -159,6 +160,7 @@ Node.Nodes parsedPage = null; isEncodingSpecifiedInProlog = false; + isBomPresent = false; isDefaultPageEncoding = false; JarFile jarFile = getJarFile(jarFileUrl); @@ -174,7 +176,7 @@ compiler.getPageInfo().addDependant(absFileName); } - if (isXml && isEncodingSpecifiedInProlog) { + if ((isXml && isEncodingSpecifiedInProlog) || isBomPresent) { /* * Make sure the encoding explicitly specified in the XML * prolog (if any) matches that in the JSP config element @@ -183,7 +185,7 @@ */ if (jspConfigPageEnc != null && !jspConfigPageEnc.equals(sourceEnc) && (!jspConfigPageEnc.startsWith("UTF-16") - || !sourceEnc.startsWith("UTF-16"))) { + || !sourceEnc.startsWith("UTF-16"))) { err.jspError("jsp.error.prolog_config_encoding_mismatch", sourceEnc, jspConfigPageEnc); } @@ -199,7 +201,8 @@ isTagFile, directiveOnly, sourceEnc, jspConfigPageEnc, - isEncodingSpecifiedInProlog); + isEncodingSpecifiedInProlog, + isBomPresent); } else { // Standard syntax InputStreamReader inStreamReader = null; @@ -212,7 +215,7 @@ parsedPage = Parser.parse(this, jspReader, parent, isTagFile, directiveOnly, jarFileUrl, sourceEnc, jspConfigPageEnc, - isDefaultPageEncoding); + isDefaultPageEncoding, isBomPresent); } finally { if (inStreamReader != null) { try { @@ -298,7 +301,7 @@ if (sourceEnc != null) { return; } - // We don't know the encoding + // We don't know the encoding, so use BOM to determine it sourceEnc = "ISO-8859-1"; } else { // XML syntax or unknown, (auto)detect encoding ... @@ -306,10 +309,13 @@ jarFile, ctxt, err); sourceEnc = (String) ret[0]; if (((Boolean) ret[1]).booleanValue()) { - isEncodingSpecifiedInProlog = true; + isEncodingSpecifiedInProlog = true; + } + if (((Boolean) ret[2]).booleanValue()) { + isBomPresent = true; } - if (!isXml && sourceEnc.equals("UTF-8")) { + if (!isXml && sourceEnc.equals("UTF-8")) { /* * We don't know if we're dealing with XML or standard syntax. * Therefore, we need to check to see if the page contains @@ -359,10 +365,11 @@ if (!isExternal) { jspReader.reset(startMark); if (hasJspRoot(jspReader)) { + if (revert) sourceEnc = "UTF-8"; isXml = true; - if (revert) sourceEnc = "UTF-8"; return; } else { + if (revert && isBomPresent) sourceEnc = "UTF-8"; isXml = false; } } @@ -373,15 +380,17 @@ * Determine the page encoding from the page directive, unless it's * specified via JSP config. */ - sourceEnc = jspConfigPageEnc; - if (sourceEnc == null) { - sourceEnc = getPageEncodingForJspSyntax(jspReader, startMark); - if (sourceEnc == null) { - // Default to "ISO-8859-1" per JSP spec - sourceEnc = "ISO-8859-1"; - isDefaultPageEncoding = true; - } - } + if (sourceEnc == null) { + sourceEnc = jspConfigPageEnc; + if (sourceEnc == null) { + sourceEnc = getPageEncodingForJspSyntax(jspReader, startMark); + if (sourceEnc == null) { + // Default to "ISO-8859-1" per JSP spec + sourceEnc = "ISO-8859-1"; + isDefaultPageEncoding = true; + } + } + } } /* Modified: tomcat/tc6.0.x/trunk/java/org/apache/jasper/compiler/Validator.java URL: http://svn.apache.org/viewvc/tomcat/tc6.0.x/trunk/java/org/apache/jasper/compiler/Validator.java?view=diff&rev=470216&r1=470215&r2=470216 ============================================================================== --- tomcat/tc6.0.x/trunk/java/org/apache/jasper/compiler/Validator.java (original) +++ tomcat/tc6.0.x/trunk/java/org/apache/jasper/compiler/Validator.java Wed Nov 1 19:50:14 2006 @@ -196,7 +196,8 @@ err.jspError(n, "jsp.error.page.multi.pageencoding"); // 'pageEncoding' can occur at most once per file pageEncodingSeen = true; - comparePageEncodings(value, n); + String actual = comparePageEncodings(value, n); + n.getRoot().setPageEncoding(actual); } else if ("deferredSyntaxAllowedAsLiteral".equals(attr)) { if (pageInfo.getDeferredSyntaxAllowedAsLiteral() == null) { pageInfo.setDeferredSyntaxAllowedAsLiteral(value, n, @@ -266,6 +267,7 @@ if (pageEncodingSeen) err.jspError(n, "jsp.error.tag.multi.pageencoding"); pageEncodingSeen = true; + compareTagEncodings(value, n); n.getRoot().setPageEncoding(value); } else if ("deferredSyntaxAllowedAsLiteral".equals(attr)) { if (pageInfo.getDeferredSyntaxAllowedAsLiteral() == null) { @@ -323,7 +325,7 @@ * * @throws JasperException in case of page encoding mismatch */ - private void comparePageEncodings(String pageDirEnc, + private String comparePageEncodings(String pageDirEnc, Node.PageDirective pageDir) throws JasperException { Node.Root root = pageDir.getRoot(); @@ -335,13 +337,16 @@ * pattern matches this page. Treat "UTF-16", "UTF-16BE", and * "UTF-16LE" as identical. */ - if (configEnc != null - && !pageDirEnc.equals(configEnc) - && (!pageDirEnc.startsWith("UTF-16") || !configEnc - .startsWith("UTF-16"))) { - err.jspError(pageDir, - "jsp.error.config_pagedir_encoding_mismatch", - configEnc, pageDirEnc); + if (configEnc != null) { + if (!pageDirEnc.equals(configEnc) + && (!pageDirEnc.startsWith("UTF-16") || !configEnc + .startsWith("UTF-16"))) { + err.jspError(pageDir, + "jsp.error.config_pagedir_encoding_mismatch", + configEnc, pageDirEnc); + } else { + return configEnc; + } } /* @@ -351,7 +356,7 @@ * declaration). Treat "UTF-16", "UTF-16BE", and "UTF-16LE" as * identical. */ - if (root.isXmlSyntax() && root.isEncodingSpecifiedInProlog()) { + if ((root.isXmlSyntax() && root.isEncodingSpecifiedInProlog()) || root.isBomPresent()) { String pageEnc = root.getPageEncoding(); if (!pageDirEnc.equals(pageEnc) && (!pageDirEnc.startsWith("UTF-16") || !pageEnc @@ -359,9 +364,47 @@ err.jspError(pageDir, "jsp.error.prolog_pagedir_encoding_mismatch", pageEnc, pageDirEnc); + } else { + return pageEnc; } } + + return pageDirEnc; } + + /* + * Compares page encodings specified in various places, and throws + * exception in case of page encoding mismatch. + * + * @param pageDirEnc The value of the pageEncoding attribute of the page + * directive @param pageDir The page directive node + * + * @throws JasperException in case of page encoding mismatch + */ + private void compareTagEncodings(String pageDirEnc, + Node.TagDirective pageDir) throws JasperException { + + Node.Root root = pageDir.getRoot(); + + /* + * Compare the 'pageEncoding' attribute of the page directive with + * the encoding specified in the XML prolog (only for XML syntax, + * and only if JSP document contains XML prolog with encoding + * declaration). Treat "UTF-16", "UTF-16BE", and "UTF-16LE" as + * identical. + */ + if ((root.isXmlSyntax() && root.isEncodingSpecifiedInProlog()) || root.isBomPresent()) { + String pageEnc = root.getPageEncoding(); + if (!pageDirEnc.equals(pageEnc) + && (!pageDirEnc.startsWith("UTF-16") || !pageEnc + .startsWith("UTF-16"))) { + err.jspError(pageDir, + "jsp.error.prolog_pagedir_encoding_mismatch", + pageEnc, pageDirEnc); + } + } + } + } /** Modified: tomcat/tc6.0.x/trunk/java/org/apache/jasper/runtime/PageContextImpl.java URL: http://svn.apache.org/viewvc/tomcat/tc6.0.x/trunk/java/org/apache/jasper/runtime/PageContextImpl.java?view=diff&rev=470216&r1=470215&r2=470216 ============================================================================== --- tomcat/tc6.0.x/trunk/java/org/apache/jasper/runtime/PageContextImpl.java (original) +++ tomcat/tc6.0.x/trunk/java/org/apache/jasper/runtime/PageContextImpl.java Wed Nov 1 19:50:14 2006 @@ -191,22 +191,22 @@ ((JspWriterImpl) out).flushBuffer(); } } catch (IOException ex) { - log.warn("Internal error flushing the buffer in release()"); - } - - servlet = null; - config = null; - context = null; - applicationContext = null; - elContext = null; - errorPageURL = null; - request = null; - response = null; - depth = -1; - baseOut.recycle(); - session = null; - - attributes.clear(); + IllegalStateException ise = new IllegalStateException("Internal error flushing the buffer in release()", ex); + throw ise; + } finally { + servlet = null; + config = null; + context = null; + applicationContext = null; + elContext = null; + errorPageURL = null; + request = null; + response = null; + depth = -1; + baseOut.recycle(); + session = null; + attributes.clear(); + } } public Object getAttribute(final String name) { Modified: tomcat/tc6.0.x/trunk/java/org/apache/jasper/xmlparser/XMLEncodingDetector.java URL: http://svn.apache.org/viewvc/tomcat/tc6.0.x/trunk/java/org/apache/jasper/xmlparser/XMLEncodingDetector.java?view=diff&rev=470216&r1=470215&r2=470216 ============================================================================== --- tomcat/tc6.0.x/trunk/java/org/apache/jasper/xmlparser/XMLEncodingDetector.java (original) +++ tomcat/tc6.0.x/trunk/java/org/apache/jasper/xmlparser/XMLEncodingDetector.java Wed Nov 1 19:50:14 2006 @@ -43,6 +43,7 @@ private InputStream stream; private String encoding; private boolean isEncodingSetInProlog; + private boolean isBomPresent; private Boolean isBigEndian; private Reader reader; @@ -121,7 +122,8 @@ scanXMLDecl(); return new Object[] { this.encoding, - new Boolean(this.isEncodingSetInProlog) }; + new Boolean(this.isEncodingSetInProlog), + new Boolean(this.isBomPresent) }; } // stub method @@ -147,6 +149,11 @@ Object [] encodingDesc = getEncodingName(b4, count); encoding = (String)(encodingDesc[0]); isBigEndian = (Boolean)(encodingDesc[1]); + if (encodingDesc.length > 2) { + isBomPresent = (Boolean)(encodingDesc[2]); + } else { + isBomPresent = true; + } stream.reset(); // Special case UTF-8 files with BOM created by Microsoft @@ -278,7 +285,7 @@ private Object[] getEncodingName(byte[] b4, int count) { if (count < 2) { - return new Object[]{"UTF-8", null}; + return new Object[]{"UTF-8", null, Boolean.FALSE}; } // UTF-16, with BOM @@ -286,17 +293,17 @@ int b1 = b4[1] & 0xFF; if (b0 == 0xFE && b1 == 0xFF) { // UTF-16, big-endian - return new Object [] {"UTF-16BE", new Boolean(true)}; + return new Object [] {"UTF-16BE", Boolean.TRUE}; } if (b0 == 0xFF && b1 == 0xFE) { // UTF-16, little-endian - return new Object [] {"UTF-16LE", new Boolean(false)}; + return new Object [] {"UTF-16LE", Boolean.FALSE}; } // default to UTF-8 if we don't have enough bytes to make a // good determination of the encoding if (count < 3) { - return new Object [] {"UTF-8", null}; + return new Object [] {"UTF-8", null, Boolean.FALSE}; } // UTF-8 with a BOM @@ -349,7 +356,7 @@ } // default encoding - return new Object [] {"UTF-8", null}; + return new Object [] {"UTF-8", null, Boolean.FALSE}; } --------------------------------------------------------------------- To unsubscribe, e-mail: [EMAIL PROTECTED] For additional commands, e-mail: [EMAIL PROTECTED]