Author: ltheussl Date: Fri May 7 09:47:55 2010 New Revision: 942043 URL: http://svn.apache.org/viewvc?rev=942043&view=rev Log: extract validation routine into a separate class
Added: maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/util/XmlValidator.java Modified: maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/parser/AbstractXmlParser.java Modified: maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/parser/AbstractXmlParser.java URL: http://svn.apache.org/viewvc/maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/parser/AbstractXmlParser.java?rev=942043&r1=942042&r2=942043&view=diff ============================================================================== --- maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/parser/AbstractXmlParser.java (original) +++ maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/parser/AbstractXmlParser.java Fri May 7 09:47:55 2010 @@ -37,20 +37,19 @@ import java.util.Map; import java.util.regex.Matcher; import java.util.regex.Pattern; -import javax.xml.XMLConstants; - import org.apache.commons.httpclient.DefaultHttpMethodRetryHandler; import org.apache.commons.httpclient.HttpClient; import org.apache.commons.httpclient.HttpException; import org.apache.commons.httpclient.HttpStatus; import org.apache.commons.httpclient.methods.GetMethod; import org.apache.commons.httpclient.params.HttpMethodParams; -import org.apache.maven.doxia.logging.Log; + import org.apache.maven.doxia.macro.MacroExecutionException; import org.apache.maven.doxia.markup.XmlMarkup; import org.apache.maven.doxia.sink.Sink; import org.apache.maven.doxia.sink.SinkEventAttributeSet; import org.apache.maven.doxia.util.HtmlTools; +import org.apache.maven.doxia.util.XmlValidator; import org.codehaus.plexus.util.FileUtils; import org.codehaus.plexus.util.IOUtil; @@ -62,10 +61,6 @@ import org.codehaus.plexus.util.xml.pull import org.xml.sax.EntityResolver; import org.xml.sax.InputSource; import org.xml.sax.SAXException; -import org.xml.sax.SAXParseException; -import org.xml.sax.XMLReader; -import org.xml.sax.helpers.DefaultHandler; -import org.xml.sax.helpers.XMLReaderFactory; /** * An abstract class that defines some convenience methods for <code>XML</code> parsers. @@ -96,15 +91,6 @@ public abstract class AbstractXmlParser private static final Pattern PATTERN_ENTITY_2 = Pattern.compile( ENTITY_START + "(\\s)+([^>|^\\s]+)(\\s)+\"(\\s)*(&(#x?[0-9a-fA-F]{1,5};)*)(\\s)*\"(\\s)*>" ); - /** - * Doctype pattern i.e. ".*<!DOCTYPE([^>]*)>.*" - * see <a href="http://www.w3.org/TR/REC-xml/#NT-doctypedecl">http://www.w3.org/TR/REC-xml/#NT-doctypedecl</a>. - */ - private static final Pattern PATTERN_DOCTYPE = Pattern.compile( ".*" + DOCTYPE_START + "([^>]*)>.*" ); - - /** Tag pattern as defined in http://www.w3.org/TR/REC-xml/#NT-Name */ - private static final Pattern PATTERN_TAG = Pattern.compile( ".*<([A-Za-z][A-Za-z0-9:_.-]*)([^>]*)>.*" ); - private boolean ignorableWhitespace; private boolean collapsibleWhitespace; @@ -115,9 +101,6 @@ public abstract class AbstractXmlParser private boolean validate = true; - /** lazy xmlReader to validate xml content*/ - private XMLReader xmlReader; - /** {...@inheritdoc} */ public void parse( Reader source, Sink sink ) throws ParseException @@ -137,7 +120,7 @@ public abstract class AbstractXmlParser throw new ParseException( "Error reading the model: " + e.getMessage(), e ); } - validate( content ); + new XmlValidator( getLog() ).validate( content ); source = new StringReader( content ); } @@ -592,82 +575,6 @@ public abstract class AbstractXmlParser // ---------------------------------------------------------------------- /** - * Validate an XML content with SAX. - * - * @param content a not null xml content - * @throws ParseException if any. - */ - private void validate( String content ) - throws ParseException - { - try - { - // 1 if there's a doctype - boolean hasDoctype = false; - Matcher matcher = PATTERN_DOCTYPE.matcher( content ); - if ( matcher.find() ) - { - hasDoctype = true; - } - - // 2 check for an xmlns instance - boolean hasXsd = false; - matcher = PATTERN_TAG.matcher( content ); - if ( matcher.find() ) - { - String value = matcher.group( 2 ); - - if ( value.indexOf( XMLConstants.W3C_XML_SCHEMA_INSTANCE_NS_URI ) != -1 ) - { - hasXsd = true; - } - } - - // 3 validate content if doctype or xsd - if ( hasDoctype || hasXsd ) - { - if ( getLog().isDebugEnabled() ) - { - getLog().debug( "Validating the content..." ); - } - getXmlReader( hasXsd && hasDoctype ).parse( new InputSource( new StringReader( content ) ) ); - } - } - catch ( IOException e ) - { - throw new ParseException( "Error validating the model: " + e.getMessage(), e ); - } - catch ( SAXException e ) - { - throw new ParseException( "Error validating the model: " + e.getMessage(), e ); - } - } - - /** - * @param hasDtdAndXsd to flag the <code>ErrorHandler</code>. - * @return an xmlReader instance. - * @throws SAXException if any - */ - private XMLReader getXmlReader( boolean hasDtdAndXsd ) - throws SAXException - { - if ( xmlReader == null ) - { - MessagesErrorHandler errorHandler = new MessagesErrorHandler( getLog() ); - - xmlReader = XMLReaderFactory.createXMLReader( "org.apache.xerces.parsers.SAXParser" ); - xmlReader.setFeature( "http://xml.org/sax/features/validation", true ); - xmlReader.setFeature( "http://apache.org/xml/features/validation/schema", true ); - xmlReader.setErrorHandler( errorHandler ); - xmlReader.setEntityResolver( new CachedFileEntityResolver() ); - } - - ( (MessagesErrorHandler) xmlReader.getErrorHandler() ).setHasDtdAndXsd( hasDtdAndXsd ); - - return xmlReader; - } - - /** * Add an entity given by <code>entityName</code> and <code>entityValue</code> to {...@link #entities}. * <br/> * By default, we exclude the default XML entities: &amp;, &lt;, &gt;, &quot; and &apos;. @@ -787,125 +694,6 @@ public abstract class AbstractXmlParser } /** - * Convenience class to beautify <code>SAXParseException</code> messages. - */ - static class MessagesErrorHandler - extends DefaultHandler - { - private static final int TYPE_UNKNOWN = 0; - - private static final int TYPE_WARNING = 1; - - private static final int TYPE_ERROR = 2; - - private static final int TYPE_FATAL = 3; - - /** @see org/apache/xerces/impl/msg/XMLMessages.properties#MSG_ELEMENT_NOT_DECLARED */ - private static final Pattern ELEMENT_TYPE_PATTERN = - Pattern.compile( "Element type \".*\" must be declared.", Pattern.DOTALL ); - - private final Log log; - - private boolean hasDtdAndXsd; - - public MessagesErrorHandler( Log log ) - { - this.log = log; - } - - /** - * @param hasDtdAndXsd the hasDtdAndXsd to set - */ - protected void setHasDtdAndXsd( boolean hasDtdAndXsd ) - { - this.hasDtdAndXsd = hasDtdAndXsd; - } - - /** {...@inheritdoc} */ - public void warning( SAXParseException e ) - throws SAXException - { - processException( TYPE_WARNING, e ); - } - - /** {...@inheritdoc} */ - public void error( SAXParseException e ) - throws SAXException - { - // Workaround for Xerces complaints when an XML with XSD needs also a <!DOCTYPE []> to specify entities - // like - // See http://xsd.stylusstudio.com/2001Nov/post08021.htm - if ( !hasDtdAndXsd ) - { - processException( TYPE_ERROR, e ); - return; - } - - Matcher m = ELEMENT_TYPE_PATTERN.matcher( e.getMessage() ); - if ( !m.find() ) - { - processException( TYPE_ERROR, e ); - } - } - - /** {...@inheritdoc} */ - public void fatalError( SAXParseException e ) - throws SAXException - { - processException( TYPE_FATAL, e ); - } - - private void processException( int type, SAXParseException e ) - throws SAXException - { - StringBuffer message = new StringBuffer(); - - switch ( type ) - { - case TYPE_WARNING: - message.append( "Warning:" ); - break; - - case TYPE_ERROR: - message.append( "Error:" ); - break; - - case TYPE_FATAL: - message.append( "Fatal error:" ); - break; - - case TYPE_UNKNOWN: - default: - message.append( "Unknown:" ); - break; - } - - message.append( EOL ); - message.append( " Public ID: " + e.getPublicId() ).append( EOL ); - message.append( " System ID: " + e.getSystemId() ).append( EOL ); - message.append( " Line number: " + e.getLineNumber() ).append( EOL ); - message.append( " Column number: " + e.getColumnNumber() ).append( EOL ); - message.append( " Message: " + e.getMessage() ).append( EOL ); - - switch ( type ) - { - case TYPE_WARNING: - if ( log.isWarnEnabled() ) - { - log.warn( message.toString() ); - } - break; - - case TYPE_UNKNOWN: - case TYPE_ERROR: - case TYPE_FATAL: - default: - throw new SAXException( message.toString() ); - } - } - } - - /** * Implementation of the callback mechanism <code>EntityResolver</code>. * Using a mechanism of cached files in temp dir to improve performance when using the <code>XMLReader</code>. */ Added: maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/util/XmlValidator.java URL: http://svn.apache.org/viewvc/maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/util/XmlValidator.java?rev=942043&view=auto ============================================================================== --- maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/util/XmlValidator.java (added) +++ maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/util/XmlValidator.java Fri May 7 09:47:55 2010 @@ -0,0 +1,271 @@ +package org.apache.maven.doxia.util; + +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import java.io.IOException; +import java.io.StringReader; + +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import javax.xml.XMLConstants; + +import org.apache.maven.doxia.logging.Log; +import org.apache.maven.doxia.markup.XmlMarkup; +import org.apache.maven.doxia.parser.AbstractXmlParser.CachedFileEntityResolver; +import org.apache.maven.doxia.parser.ParseException; + +import org.xml.sax.InputSource; +import org.xml.sax.SAXException; +import org.xml.sax.SAXParseException; +import org.xml.sax.XMLReader; +import org.xml.sax.helpers.DefaultHandler; +import org.xml.sax.helpers.XMLReaderFactory; + +/** + * A class to validate xml documents. + * + * @version $Id$ + * @since 1.1.3 + */ +public class XmlValidator +{ + /** + * Doctype pattern i.e. ".*<!DOCTYPE([^>]*)>.*" + * see <a href="http://www.w3.org/TR/REC-xml/#NT-doctypedecl">http://www.w3.org/TR/REC-xml/#NT-doctypedecl</a>. + */ + private static final Pattern PATTERN_DOCTYPE = Pattern.compile( ".*" + XmlMarkup.DOCTYPE_START + "([^>]*)>.*" ); + + /** Tag pattern as defined in http://www.w3.org/TR/REC-xml/#NT-Name */ + private static final Pattern PATTERN_TAG = Pattern.compile( ".*<([A-Za-z][A-Za-z0-9:_.-]*)([^>]*)>.*" ); + + /** lazy xmlReader to validate xml content*/ + private XMLReader xmlReader; + + private Log logger; + + /** + * Constructor. + * + * @param log a logger, not null. + */ + public XmlValidator( Log log ) + { + this.logger = log; + } + + /** + * Validate an XML content with SAX. + * + * @param content a not null xml content + * @throws ParseException if any. + */ + public void validate( String content ) + throws ParseException + { + try + { + // 1 if there's a doctype + boolean hasDoctype = false; + Matcher matcher = PATTERN_DOCTYPE.matcher( content ); + if ( matcher.find() ) + { + hasDoctype = true; + } + + // 2 check for an xmlns instance + boolean hasXsd = false; + matcher = PATTERN_TAG.matcher( content ); + if ( matcher.find() ) + { + String value = matcher.group( 2 ); + + if ( value.indexOf( XMLConstants.W3C_XML_SCHEMA_INSTANCE_NS_URI ) != -1 ) + { + hasXsd = true; + } + } + + // 3 validate content if doctype or xsd + if ( hasDoctype || hasXsd ) + { + getLog().debug( "Validating the content..." ); + getXmlReader( hasXsd && hasDoctype ).parse( new InputSource( new StringReader( content ) ) ); + } + } + catch ( IOException e ) + { + throw new ParseException( "Error validating the model: " + e.getMessage(), e ); + } + catch ( SAXException e ) + { + throw new ParseException( "Error validating the model: " + e.getMessage(), e ); + } + } + + /** + * @param hasDtdAndXsd to flag the <code>ErrorHandler</code>. + * @return an xmlReader instance. + * @throws SAXException if any + */ + private XMLReader getXmlReader( boolean hasDtdAndXsd ) + throws SAXException + { + if ( xmlReader == null ) + { + MessagesErrorHandler errorHandler = new MessagesErrorHandler( getLog() ); + + xmlReader = XMLReaderFactory.createXMLReader( "org.apache.xerces.parsers.SAXParser" ); + xmlReader.setFeature( "http://xml.org/sax/features/validation", true ); + xmlReader.setFeature( "http://apache.org/xml/features/validation/schema", true ); + xmlReader.setErrorHandler( errorHandler ); + xmlReader.setEntityResolver( new CachedFileEntityResolver() ); + } + + ( (MessagesErrorHandler) xmlReader.getErrorHandler() ).setHasDtdAndXsd( hasDtdAndXsd ); + + return xmlReader; + } + + private Log getLog() + { + return logger; + } + + /** + * Convenience class to beautify <code>SAXParseException</code> messages. + */ + static class MessagesErrorHandler + extends DefaultHandler + { + private static final int TYPE_UNKNOWN = 0; + + private static final int TYPE_WARNING = 1; + + private static final int TYPE_ERROR = 2; + + private static final int TYPE_FATAL = 3; + + private static final String EOL = XmlMarkup.EOL; + + /** @see org/apache/xerces/impl/msg/XMLMessages.properties#MSG_ELEMENT_NOT_DECLARED */ + private static final Pattern ELEMENT_TYPE_PATTERN = + Pattern.compile( "Element type \".*\" must be declared.", Pattern.DOTALL ); + + private final Log log; + + private boolean hasDtdAndXsd; + + public MessagesErrorHandler( Log log ) + { + this.log = log; + } + + /** + * @param hasDtdAndXsd the hasDtdAndXsd to set + */ + protected void setHasDtdAndXsd( boolean hasDtdAndXsd ) + { + this.hasDtdAndXsd = hasDtdAndXsd; + } + + /** {...@inheritdoc} */ + public void warning( SAXParseException e ) + throws SAXException + { + processException( TYPE_WARNING, e ); + } + + /** {...@inheritdoc} */ + public void error( SAXParseException e ) + throws SAXException + { + // Workaround for Xerces complaints when an XML with XSD needs also a <!DOCTYPE []> to specify entities + // like + // See http://xsd.stylusstudio.com/2001Nov/post08021.htm + if ( !hasDtdAndXsd ) + { + processException( TYPE_ERROR, e ); + return; + } + + Matcher m = ELEMENT_TYPE_PATTERN.matcher( e.getMessage() ); + if ( !m.find() ) + { + processException( TYPE_ERROR, e ); + } + } + + /** {...@inheritdoc} */ + public void fatalError( SAXParseException e ) + throws SAXException + { + processException( TYPE_FATAL, e ); + } + + private void processException( int type, SAXParseException e ) + throws SAXException + { + StringBuffer message = new StringBuffer(); + + switch ( type ) + { + case TYPE_WARNING: + message.append( "Warning:" ); + break; + + case TYPE_ERROR: + message.append( "Error:" ); + break; + + case TYPE_FATAL: + message.append( "Fatal error:" ); + break; + + case TYPE_UNKNOWN: + default: + message.append( "Unknown:" ); + break; + } + + message.append( EOL ); + message.append( " Public ID: " + e.getPublicId() ).append( EOL ); + message.append( " System ID: " + e.getSystemId() ).append( EOL ); + message.append( " Line number: " + e.getLineNumber() ).append( EOL ); + message.append( " Column number: " + e.getColumnNumber() ).append( EOL ); + message.append( " Message: " + e.getMessage() ).append( EOL ); + + final String logMessage = message.toString(); + + switch ( type ) + { + case TYPE_WARNING: + log.warn( logMessage ); + break; + + case TYPE_UNKNOWN: + case TYPE_ERROR: + case TYPE_FATAL: + default: + throw new SAXException( logMessage ); + } + } + } +}