This is an automated email from the ASF dual-hosted git repository.

michaelo pushed a commit to branch DOXIA-716
in repository https://gitbox.apache.org/repos/asf/maven-doxia.git

commit e43b2001d52119a1f48815180effd2ce643809ae
Author: Michael Osipov <micha...@apache.org>
AuthorDate: Fri Dec 29 21:52:37 2023 +0100

    [DOXIA-716] Update and unify XMLReader creation and configuration
    
    This closes #187
---
 .../maven/doxia/parser/AbstractXmlParser.java      |   6 +-
 .../org/apache/maven/doxia/util/XmlValidator.java  | 117 +++++++++------------
 .../apache/maven/doxia/util/XmlValidatorTest.java  |   3 +
 .../maven/doxia/xsd/AbstractXmlValidator.java      |  23 ++--
 4 files changed, 74 insertions(+), 75 deletions(-)

diff --git 
a/doxia-core/src/main/java/org/apache/maven/doxia/parser/AbstractXmlParser.java 
b/doxia-core/src/main/java/org/apache/maven/doxia/parser/AbstractXmlParser.java
index d0d143d6..aa60cb52 100644
--- 
a/doxia-core/src/main/java/org/apache/maven/doxia/parser/AbstractXmlParser.java
+++ 
b/doxia-core/src/main/java/org/apache/maven/doxia/parser/AbstractXmlParser.java
@@ -114,7 +114,11 @@ public abstract class AbstractXmlParser extends 
AbstractParser implements XmlMar
                 throw new ParseException("Error reading the model", e);
             }
 
-            new XmlValidator().validate(content);
+            XmlValidator validator = new XmlValidator();
+            validator.setDefaultHandler(new 
XmlValidator.MessagesErrorHandler());
+            validator.setEntityResolver(new CachedFileEntityResolver());
+
+            validator.validate(content);
 
             src = new StringReader(content);
         }
diff --git 
a/doxia-core/src/main/java/org/apache/maven/doxia/util/XmlValidator.java 
b/doxia-core/src/main/java/org/apache/maven/doxia/util/XmlValidator.java
index 34c0530d..709efc46 100644
--- a/doxia-core/src/main/java/org/apache/maven/doxia/util/XmlValidator.java
+++ b/doxia-core/src/main/java/org/apache/maven/doxia/util/XmlValidator.java
@@ -18,24 +18,26 @@
  */
 package org.apache.maven.doxia.util;
 
-import javax.xml.XMLConstants;
+import javax.xml.parsers.ParserConfigurationException;
+import javax.xml.parsers.SAXParser;
+import javax.xml.parsers.SAXParserFactory;
 
 import java.io.IOException;
 import java.io.StringReader;
+import java.util.Locale;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
 import org.apache.maven.doxia.markup.XmlMarkup;
-import 
org.apache.maven.doxia.parser.AbstractXmlParser.CachedFileEntityResolver;
 import org.apache.maven.doxia.parser.ParseException;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
+import org.xml.sax.EntityResolver;
 import org.xml.sax.InputSource;
 import org.xml.sax.SAXException;
 import org.xml.sax.SAXParseException;
 import org.xml.sax.XMLReader;
 import org.xml.sax.helpers.DefaultHandler;
-import org.xml.sax.helpers.XMLReaderFactory;
 
 /**
  * A class to validate xml documents.
@@ -45,18 +47,37 @@ import org.xml.sax.helpers.XMLReaderFactory;
 public class XmlValidator {
     private static final Logger LOGGER = 
LoggerFactory.getLogger(XmlValidator.class);
 
-    /**
-     * Doctype pattern i.e. ".*<!DOCTYPE([^>]*)>.*"
-     * see <a 
href="http://www.w3.org/TR/REC-xml/#NT-doctypedecl";>http://www.w3.org/TR/REC-xml/#NT-doctypedecl</a>.
-     */
-    private static final Pattern PATTERN_DOCTYPE = Pattern.compile(".*" + 
XmlMarkup.DOCTYPE_START + "([^>]*)>.*");
-
-    /** Tag pattern as defined in http://www.w3.org/TR/REC-xml/#NT-Name */
-    private static final Pattern PATTERN_TAG = 
Pattern.compile(".*<([A-Za-z][A-Za-z0-9:_.-]*)([^>]*)>.*");
-
     /** lazy xmlReader to validate xml content*/
     private XMLReader xmlReader;
 
+    private boolean validate = true;
+    private DefaultHandler defaultHandler;
+    private EntityResolver entityResolver;
+
+    public boolean isValidate() {
+        return validate;
+    }
+
+    public void setValidate(boolean validate) {
+        this.validate = validate;
+    }
+
+    public DefaultHandler getDefaultHandler() {
+        return defaultHandler;
+    }
+
+    public void setDefaultHandler(DefaultHandler defaultHandler) {
+        this.defaultHandler = defaultHandler;
+    }
+
+    public EntityResolver getEntityResolver() {
+        return entityResolver;
+    }
+
+    public void setEntityResolver(EntityResolver entityResolver) {
+        this.entityResolver = entityResolver;
+    }
+
     /**
      * Validate an XML content with SAX.
      *
@@ -65,57 +86,42 @@ public class XmlValidator {
      */
     public void validate(String content) throws ParseException {
         try {
-            // 1 if there's a doctype
-            boolean hasDoctype = false;
-            Matcher matcher = PATTERN_DOCTYPE.matcher(content);
-            if (matcher.find()) {
-                hasDoctype = true;
-            }
-
-            // 2 check for an xmlns instance
-            boolean hasXsd = false;
-            matcher = PATTERN_TAG.matcher(content);
-            if (matcher.find()) {
-                String value = matcher.group(2);
-
-                if 
(value.contains(XMLConstants.W3C_XML_SCHEMA_INSTANCE_NS_URI)) {
-                    hasXsd = true;
-                }
-            }
-
-            // 3 validate content
-            getXmlReader(hasXsd && hasDoctype).parse(new InputSource(new 
StringReader(content)));
-        } catch (IOException | SAXException e) {
+            getXmlReader().parse(new InputSource(new StringReader(content)));
+        } catch (IOException | SAXException | ParserConfigurationException e) {
             throw new ParseException("Error validating the model", e);
         }
     }
 
     /**
-     * @param hasDtdAndXsd to flag the <code>ErrorHandler</code>.
      * @return an xmlReader instance.
      * @throws SAXException if any
+     * @throws ParserConfigurationException
      */
-    private XMLReader getXmlReader(boolean hasDtdAndXsd) throws SAXException {
+    public XMLReader getXmlReader() throws SAXException, 
ParserConfigurationException {
         if (xmlReader == null) {
-            MessagesErrorHandler errorHandler = new MessagesErrorHandler();
-
-            xmlReader = XMLReaderFactory.createXMLReader();
-            xmlReader.setFeature("http://xml.org/sax/features/validation";, 
true);
-            
xmlReader.setFeature("http://apache.org/xml/features/validation/dynamic";, true);
-            
xmlReader.setFeature("http://apache.org/xml/features/validation/schema";, true);
-            xmlReader.setErrorHandler(errorHandler);
-            xmlReader.setEntityResolver(new CachedFileEntityResolver());
+            SAXParserFactory parserFactory = SAXParserFactory.newInstance();
+            parserFactory.setNamespaceAware(true);
+            SAXParser parser = parserFactory.newSAXParser();
+            // If both DTD and XSD are provided, force XSD
+            parser.setProperty(
+                    "http://java.sun.com/xml/jaxp/properties/schemaLanguage";, 
"http://www.w3.org/2001/XMLSchema";);
+            // Always force language-neutral exception messages for 
MessagesErrorHandler
+            parser.setProperty("http://apache.org/xml/properties/locale";, 
Locale.ROOT);
+            xmlReader = parser.getXMLReader();
+            xmlReader.setFeature("http://xml.org/sax/features/validation";, 
isValidate());
+            
xmlReader.setFeature("http://apache.org/xml/features/validation/dynamic";, 
isValidate());
+            
xmlReader.setFeature("http://apache.org/xml/features/validation/schema";, 
isValidate());
+            xmlReader.setErrorHandler(getDefaultHandler());
+            xmlReader.setEntityResolver(getEntityResolver());
         }
 
-        ((MessagesErrorHandler) 
xmlReader.getErrorHandler()).setHasDtdAndXsd(hasDtdAndXsd);
-
         return xmlReader;
     }
 
     /**
      * Convenience class to beautify <code>SAXParseException</code> messages.
      */
-    private static class MessagesErrorHandler extends DefaultHandler {
+    public static class MessagesErrorHandler extends DefaultHandler {
         private static final int TYPE_UNKNOWN = 0;
 
         private static final int TYPE_WARNING = 1;
@@ -130,17 +136,6 @@ public class XmlValidator {
         private static final Pattern ELEMENT_TYPE_PATTERN =
                 Pattern.compile("Element type \".*\" must be declared.", 
Pattern.DOTALL);
 
-        private boolean hasDtdAndXsd;
-
-        private MessagesErrorHandler() {}
-
-        /**
-         * @param hasDtdAndXsd the hasDtdAndXsd to set
-         */
-        protected void setHasDtdAndXsd(boolean hasDtdAndXsd) {
-            this.hasDtdAndXsd = hasDtdAndXsd;
-        }
-
         /** {@inheritDoc} */
         @Override
         public void warning(SAXParseException e) throws SAXException {
@@ -150,14 +145,6 @@ public class XmlValidator {
         /** {@inheritDoc} */
         @Override
         public void error(SAXParseException e) throws SAXException {
-            // Workaround for Xerces complaints when an XML with XSD needs 
also a <!DOCTYPE []> to specify entities
-            // like &nbsp;
-            // See http://xsd.stylusstudio.com/2001Nov/post08021.htm
-            if (!hasDtdAndXsd) {
-                processException(TYPE_ERROR, e);
-                return;
-            }
-
             Matcher m = ELEMENT_TYPE_PATTERN.matcher(e.getMessage());
             if (!m.find()) {
                 processException(TYPE_ERROR, e);
diff --git 
a/doxia-core/src/test/java/org/apache/maven/doxia/util/XmlValidatorTest.java 
b/doxia-core/src/test/java/org/apache/maven/doxia/util/XmlValidatorTest.java
index 1868ec1a..942d003a 100644
--- a/doxia-core/src/test/java/org/apache/maven/doxia/util/XmlValidatorTest.java
+++ b/doxia-core/src/test/java/org/apache/maven/doxia/util/XmlValidatorTest.java
@@ -19,6 +19,7 @@
 package org.apache.maven.doxia.util;
 
 import org.apache.commons.io.IOUtils;
+import 
org.apache.maven.doxia.parser.AbstractXmlParser.CachedFileEntityResolver;
 import org.codehaus.plexus.testing.PlexusTest;
 import org.codehaus.plexus.util.xml.XmlStreamReader;
 import org.junit.jupiter.api.Test;
@@ -35,6 +36,8 @@ public class XmlValidatorTest {
         String xml = IOUtils.toString(new 
XmlStreamReader(this.getClass().getResourceAsStream("/test.xml")));
 
         XmlValidator validator = new XmlValidator();
+        validator.setDefaultHandler(new XmlValidator.MessagesErrorHandler());
+        validator.setEntityResolver(new CachedFileEntityResolver());
 
         validator.validate(xml);
     }
diff --git 
a/doxia-core/src/test/java/org/apache/maven/doxia/xsd/AbstractXmlValidator.java 
b/doxia-core/src/test/java/org/apache/maven/doxia/xsd/AbstractXmlValidator.java
index 7fac8ce4..4da6182b 100644
--- 
a/doxia-core/src/test/java/org/apache/maven/doxia/xsd/AbstractXmlValidator.java
+++ 
b/doxia-core/src/test/java/org/apache/maven/doxia/xsd/AbstractXmlValidator.java
@@ -18,6 +18,8 @@
  */
 package org.apache.maven.doxia.xsd;
 
+import javax.xml.parsers.ParserConfigurationException;
+
 import java.io.IOException;
 import java.io.StringReader;
 import java.util.ArrayList;
@@ -25,6 +27,7 @@ import java.util.Collections;
 import java.util.List;
 import java.util.Map;
 
+import org.apache.maven.doxia.util.XmlValidator;
 import org.codehaus.plexus.testing.PlexusTest;
 import org.junit.jupiter.api.AfterEach;
 import org.junit.jupiter.api.Test;
@@ -38,7 +41,6 @@ import org.xml.sax.SAXNotSupportedException;
 import org.xml.sax.SAXParseException;
 import org.xml.sax.XMLReader;
 import org.xml.sax.helpers.DefaultHandler;
-import org.xml.sax.helpers.XMLReaderFactory;
 
 import static org.junit.jupiter.api.Assertions.assertFalse;
 import static org.junit.jupiter.api.Assertions.fail;
@@ -157,17 +159,19 @@ public abstract class AbstractXmlValidator {
     private XMLReader getXMLReader() {
         if (xmlReader == null) {
             try {
-                xmlReader = 
XMLReaderFactory.createXMLReader("org.apache.xerces.parsers.SAXParser");
-                xmlReader.setFeature("http://xml.org/sax/features/validation";, 
validate);
-                
xmlReader.setFeature("http://apache.org/xml/features/validation/schema";, 
validate);
-                xmlReader.setErrorHandler(new MessagesErrorHandler());
-                xmlReader.setEntityResolver(getEntityResolver());
+                XmlValidator validator = new XmlValidator();
+                validator.setValidate(validate);
+                validator.setDefaultHandler(new MessagesErrorHandler());
+                validator.setEntityResolver(getEntityResolver());
+                xmlReader = validator.getXmlReader();
             } catch (SAXNotRecognizedException e) {
                 fail("SAXNotRecognizedException: " + e.getMessage());
             } catch (SAXNotSupportedException e) {
                 fail("SAXNotSupportedException: " + e.getMessage());
             } catch (SAXException e) {
                 fail("SAXException: " + e.getMessage());
+            } catch (ParserConfigurationException e) {
+                fail("ParserConfigurationException: " + e.getMessage());
             }
         }
 
@@ -185,10 +189,11 @@ public abstract class AbstractXmlValidator {
     private List<ErrorMessage> parseXML(String content) throws IOException, 
SAXException {
         String xmlContent = addNamespaces(content);
 
-        MessagesErrorHandler errorHandler =
-                (MessagesErrorHandler) getXMLReader().getErrorHandler();
+        XMLReader xmlReader = getXMLReader();
+
+        MessagesErrorHandler errorHandler = (MessagesErrorHandler) 
xmlReader.getErrorHandler();
 
-        getXMLReader().parse(new InputSource(new StringReader(xmlContent)));
+        xmlReader.parse(new InputSource(new StringReader(xmlContent)));
 
         return errorHandler.getMessages();
     }

Reply via email to