This is an automated email from the ASF dual-hosted git repository. michaelo pushed a commit to branch DOXIA-716 in repository https://gitbox.apache.org/repos/asf/maven-doxia.git
commit e43b2001d52119a1f48815180effd2ce643809ae Author: Michael Osipov <micha...@apache.org> AuthorDate: Fri Dec 29 21:52:37 2023 +0100 [DOXIA-716] Update and unify XMLReader creation and configuration This closes #187 --- .../maven/doxia/parser/AbstractXmlParser.java | 6 +- .../org/apache/maven/doxia/util/XmlValidator.java | 117 +++++++++------------ .../apache/maven/doxia/util/XmlValidatorTest.java | 3 + .../maven/doxia/xsd/AbstractXmlValidator.java | 23 ++-- 4 files changed, 74 insertions(+), 75 deletions(-) diff --git a/doxia-core/src/main/java/org/apache/maven/doxia/parser/AbstractXmlParser.java b/doxia-core/src/main/java/org/apache/maven/doxia/parser/AbstractXmlParser.java index d0d143d6..aa60cb52 100644 --- a/doxia-core/src/main/java/org/apache/maven/doxia/parser/AbstractXmlParser.java +++ b/doxia-core/src/main/java/org/apache/maven/doxia/parser/AbstractXmlParser.java @@ -114,7 +114,11 @@ public abstract class AbstractXmlParser extends AbstractParser implements XmlMar throw new ParseException("Error reading the model", e); } - new XmlValidator().validate(content); + XmlValidator validator = new XmlValidator(); + validator.setDefaultHandler(new XmlValidator.MessagesErrorHandler()); + validator.setEntityResolver(new CachedFileEntityResolver()); + + validator.validate(content); src = new StringReader(content); } diff --git a/doxia-core/src/main/java/org/apache/maven/doxia/util/XmlValidator.java b/doxia-core/src/main/java/org/apache/maven/doxia/util/XmlValidator.java index 34c0530d..709efc46 100644 --- a/doxia-core/src/main/java/org/apache/maven/doxia/util/XmlValidator.java +++ b/doxia-core/src/main/java/org/apache/maven/doxia/util/XmlValidator.java @@ -18,24 +18,26 @@ */ package org.apache.maven.doxia.util; -import javax.xml.XMLConstants; +import javax.xml.parsers.ParserConfigurationException; +import javax.xml.parsers.SAXParser; +import javax.xml.parsers.SAXParserFactory; import java.io.IOException; import java.io.StringReader; +import java.util.Locale; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.apache.maven.doxia.markup.XmlMarkup; -import org.apache.maven.doxia.parser.AbstractXmlParser.CachedFileEntityResolver; import org.apache.maven.doxia.parser.ParseException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.xml.sax.EntityResolver; import org.xml.sax.InputSource; import org.xml.sax.SAXException; import org.xml.sax.SAXParseException; import org.xml.sax.XMLReader; import org.xml.sax.helpers.DefaultHandler; -import org.xml.sax.helpers.XMLReaderFactory; /** * A class to validate xml documents. @@ -45,18 +47,37 @@ import org.xml.sax.helpers.XMLReaderFactory; public class XmlValidator { private static final Logger LOGGER = LoggerFactory.getLogger(XmlValidator.class); - /** - * Doctype pattern i.e. ".*<!DOCTYPE([^>]*)>.*" - * see <a href="http://www.w3.org/TR/REC-xml/#NT-doctypedecl">http://www.w3.org/TR/REC-xml/#NT-doctypedecl</a>. - */ - private static final Pattern PATTERN_DOCTYPE = Pattern.compile(".*" + XmlMarkup.DOCTYPE_START + "([^>]*)>.*"); - - /** Tag pattern as defined in http://www.w3.org/TR/REC-xml/#NT-Name */ - private static final Pattern PATTERN_TAG = Pattern.compile(".*<([A-Za-z][A-Za-z0-9:_.-]*)([^>]*)>.*"); - /** lazy xmlReader to validate xml content*/ private XMLReader xmlReader; + private boolean validate = true; + private DefaultHandler defaultHandler; + private EntityResolver entityResolver; + + public boolean isValidate() { + return validate; + } + + public void setValidate(boolean validate) { + this.validate = validate; + } + + public DefaultHandler getDefaultHandler() { + return defaultHandler; + } + + public void setDefaultHandler(DefaultHandler defaultHandler) { + this.defaultHandler = defaultHandler; + } + + public EntityResolver getEntityResolver() { + return entityResolver; + } + + public void setEntityResolver(EntityResolver entityResolver) { + this.entityResolver = entityResolver; + } + /** * Validate an XML content with SAX. * @@ -65,57 +86,42 @@ public class XmlValidator { */ public void validate(String content) throws ParseException { try { - // 1 if there's a doctype - boolean hasDoctype = false; - Matcher matcher = PATTERN_DOCTYPE.matcher(content); - if (matcher.find()) { - hasDoctype = true; - } - - // 2 check for an xmlns instance - boolean hasXsd = false; - matcher = PATTERN_TAG.matcher(content); - if (matcher.find()) { - String value = matcher.group(2); - - if (value.contains(XMLConstants.W3C_XML_SCHEMA_INSTANCE_NS_URI)) { - hasXsd = true; - } - } - - // 3 validate content - getXmlReader(hasXsd && hasDoctype).parse(new InputSource(new StringReader(content))); - } catch (IOException | SAXException e) { + getXmlReader().parse(new InputSource(new StringReader(content))); + } catch (IOException | SAXException | ParserConfigurationException e) { throw new ParseException("Error validating the model", e); } } /** - * @param hasDtdAndXsd to flag the <code>ErrorHandler</code>. * @return an xmlReader instance. * @throws SAXException if any + * @throws ParserConfigurationException */ - private XMLReader getXmlReader(boolean hasDtdAndXsd) throws SAXException { + public XMLReader getXmlReader() throws SAXException, ParserConfigurationException { if (xmlReader == null) { - MessagesErrorHandler errorHandler = new MessagesErrorHandler(); - - xmlReader = XMLReaderFactory.createXMLReader(); - xmlReader.setFeature("http://xml.org/sax/features/validation", true); - xmlReader.setFeature("http://apache.org/xml/features/validation/dynamic", true); - xmlReader.setFeature("http://apache.org/xml/features/validation/schema", true); - xmlReader.setErrorHandler(errorHandler); - xmlReader.setEntityResolver(new CachedFileEntityResolver()); + SAXParserFactory parserFactory = SAXParserFactory.newInstance(); + parserFactory.setNamespaceAware(true); + SAXParser parser = parserFactory.newSAXParser(); + // If both DTD and XSD are provided, force XSD + parser.setProperty( + "http://java.sun.com/xml/jaxp/properties/schemaLanguage", "http://www.w3.org/2001/XMLSchema"); + // Always force language-neutral exception messages for MessagesErrorHandler + parser.setProperty("http://apache.org/xml/properties/locale", Locale.ROOT); + xmlReader = parser.getXMLReader(); + xmlReader.setFeature("http://xml.org/sax/features/validation", isValidate()); + xmlReader.setFeature("http://apache.org/xml/features/validation/dynamic", isValidate()); + xmlReader.setFeature("http://apache.org/xml/features/validation/schema", isValidate()); + xmlReader.setErrorHandler(getDefaultHandler()); + xmlReader.setEntityResolver(getEntityResolver()); } - ((MessagesErrorHandler) xmlReader.getErrorHandler()).setHasDtdAndXsd(hasDtdAndXsd); - return xmlReader; } /** * Convenience class to beautify <code>SAXParseException</code> messages. */ - private static class MessagesErrorHandler extends DefaultHandler { + public static class MessagesErrorHandler extends DefaultHandler { private static final int TYPE_UNKNOWN = 0; private static final int TYPE_WARNING = 1; @@ -130,17 +136,6 @@ public class XmlValidator { private static final Pattern ELEMENT_TYPE_PATTERN = Pattern.compile("Element type \".*\" must be declared.", Pattern.DOTALL); - private boolean hasDtdAndXsd; - - private MessagesErrorHandler() {} - - /** - * @param hasDtdAndXsd the hasDtdAndXsd to set - */ - protected void setHasDtdAndXsd(boolean hasDtdAndXsd) { - this.hasDtdAndXsd = hasDtdAndXsd; - } - /** {@inheritDoc} */ @Override public void warning(SAXParseException e) throws SAXException { @@ -150,14 +145,6 @@ public class XmlValidator { /** {@inheritDoc} */ @Override public void error(SAXParseException e) throws SAXException { - // Workaround for Xerces complaints when an XML with XSD needs also a <!DOCTYPE []> to specify entities - // like - // See http://xsd.stylusstudio.com/2001Nov/post08021.htm - if (!hasDtdAndXsd) { - processException(TYPE_ERROR, e); - return; - } - Matcher m = ELEMENT_TYPE_PATTERN.matcher(e.getMessage()); if (!m.find()) { processException(TYPE_ERROR, e); diff --git a/doxia-core/src/test/java/org/apache/maven/doxia/util/XmlValidatorTest.java b/doxia-core/src/test/java/org/apache/maven/doxia/util/XmlValidatorTest.java index 1868ec1a..942d003a 100644 --- a/doxia-core/src/test/java/org/apache/maven/doxia/util/XmlValidatorTest.java +++ b/doxia-core/src/test/java/org/apache/maven/doxia/util/XmlValidatorTest.java @@ -19,6 +19,7 @@ package org.apache.maven.doxia.util; import org.apache.commons.io.IOUtils; +import org.apache.maven.doxia.parser.AbstractXmlParser.CachedFileEntityResolver; import org.codehaus.plexus.testing.PlexusTest; import org.codehaus.plexus.util.xml.XmlStreamReader; import org.junit.jupiter.api.Test; @@ -35,6 +36,8 @@ public class XmlValidatorTest { String xml = IOUtils.toString(new XmlStreamReader(this.getClass().getResourceAsStream("/test.xml"))); XmlValidator validator = new XmlValidator(); + validator.setDefaultHandler(new XmlValidator.MessagesErrorHandler()); + validator.setEntityResolver(new CachedFileEntityResolver()); validator.validate(xml); } diff --git a/doxia-core/src/test/java/org/apache/maven/doxia/xsd/AbstractXmlValidator.java b/doxia-core/src/test/java/org/apache/maven/doxia/xsd/AbstractXmlValidator.java index 7fac8ce4..4da6182b 100644 --- a/doxia-core/src/test/java/org/apache/maven/doxia/xsd/AbstractXmlValidator.java +++ b/doxia-core/src/test/java/org/apache/maven/doxia/xsd/AbstractXmlValidator.java @@ -18,6 +18,8 @@ */ package org.apache.maven.doxia.xsd; +import javax.xml.parsers.ParserConfigurationException; + import java.io.IOException; import java.io.StringReader; import java.util.ArrayList; @@ -25,6 +27,7 @@ import java.util.Collections; import java.util.List; import java.util.Map; +import org.apache.maven.doxia.util.XmlValidator; import org.codehaus.plexus.testing.PlexusTest; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.Test; @@ -38,7 +41,6 @@ import org.xml.sax.SAXNotSupportedException; import org.xml.sax.SAXParseException; import org.xml.sax.XMLReader; import org.xml.sax.helpers.DefaultHandler; -import org.xml.sax.helpers.XMLReaderFactory; import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.fail; @@ -157,17 +159,19 @@ public abstract class AbstractXmlValidator { private XMLReader getXMLReader() { if (xmlReader == null) { try { - xmlReader = XMLReaderFactory.createXMLReader("org.apache.xerces.parsers.SAXParser"); - xmlReader.setFeature("http://xml.org/sax/features/validation", validate); - xmlReader.setFeature("http://apache.org/xml/features/validation/schema", validate); - xmlReader.setErrorHandler(new MessagesErrorHandler()); - xmlReader.setEntityResolver(getEntityResolver()); + XmlValidator validator = new XmlValidator(); + validator.setValidate(validate); + validator.setDefaultHandler(new MessagesErrorHandler()); + validator.setEntityResolver(getEntityResolver()); + xmlReader = validator.getXmlReader(); } catch (SAXNotRecognizedException e) { fail("SAXNotRecognizedException: " + e.getMessage()); } catch (SAXNotSupportedException e) { fail("SAXNotSupportedException: " + e.getMessage()); } catch (SAXException e) { fail("SAXException: " + e.getMessage()); + } catch (ParserConfigurationException e) { + fail("ParserConfigurationException: " + e.getMessage()); } } @@ -185,10 +189,11 @@ public abstract class AbstractXmlValidator { private List<ErrorMessage> parseXML(String content) throws IOException, SAXException { String xmlContent = addNamespaces(content); - MessagesErrorHandler errorHandler = - (MessagesErrorHandler) getXMLReader().getErrorHandler(); + XMLReader xmlReader = getXMLReader(); + + MessagesErrorHandler errorHandler = (MessagesErrorHandler) xmlReader.getErrorHandler(); - getXMLReader().parse(new InputSource(new StringReader(xmlContent))); + xmlReader.parse(new InputSource(new StringReader(xmlContent))); return errorHandler.getMessages(); }