This is an automated email from the ASF dual-hosted git repository. paulk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/groovy.git
commit 38fc020f6cd6254094d0576a39e94864cf1f99e4 Author: Paul King <[email protected]> AuthorDate: Mon Apr 13 14:04:43 2026 +1000 GROOVY-7633: Add constructor with Map for SAXOptions to XmlSlurper (align XmlParser behavior) --- .../src/main/java/groovy/xml/XmlParser.java | 119 +++++++++++++++++---- .../groovy-xml/src/spec/doc/xml-userguide.adoc | 48 +++++++++ .../src/spec/test/UserGuideXmlParserTest.groovy | 8 ++ .../src/spec/test/UserGuideXmlSlurperTest.groovy | 8 ++ .../test/groovy/groovy/xml/XmlParserTest.groovy | 26 +++++ 5 files changed, 189 insertions(+), 20 deletions(-) diff --git a/subprojects/groovy-xml/src/main/java/groovy/xml/XmlParser.java b/subprojects/groovy-xml/src/main/java/groovy/xml/XmlParser.java index b1afa20275..6937addc59 100644 --- a/subprojects/groovy-xml/src/main/java/groovy/xml/XmlParser.java +++ b/subprojects/groovy-xml/src/main/java/groovy/xml/XmlParser.java @@ -75,21 +75,28 @@ public class XmlParser implements ContentHandler { private StringBuilder bodyText = new StringBuilder(); private final List<Node> stack = new ArrayList<Node>(); private Locator locator; - private final XMLReader reader; + private XMLReader reader; private Node parent; private boolean trimWhitespace = false; private boolean keepIgnorableWhitespace = false; - private boolean namespaceAware; + private boolean namespaceAware = true; + private boolean validating = false; + private boolean allowDocTypeDeclaration = false; /** * Creates a non-validating and namespace-aware <code>XmlParser</code> which does not allow DOCTYPE declarations in documents. + * <p> + * Parser options can be configured via setters before the first parse call: + * <pre> + * // Using Groovy named parameters: + * def parser = new XmlParser(namespaceAware: false, trimWhitespace: true) + * </pre> * * @throws ParserConfigurationException if no parser which satisfies the requested configuration can be created. * @throws SAXException for SAX errors. */ public XmlParser() throws ParserConfigurationException, SAXException { - this(false, true); } /** @@ -114,21 +121,43 @@ public class XmlParser implements ContentHandler { * @throws SAXException for SAX errors. */ public XmlParser(boolean validating, boolean namespaceAware, boolean allowDocTypeDeclaration) throws ParserConfigurationException, SAXException { + this.validating = validating; + this.namespaceAware = namespaceAware; + this.allowDocTypeDeclaration = allowDocTypeDeclaration; + } + + public XmlParser(XMLReader reader) { + this.reader = reader; + } + + public XmlParser(SAXParser parser) throws SAXException { + reader = parser.getXMLReader(); + } + + private void initReader() throws ParserConfigurationException, SAXException { SAXParserFactory factory = FactorySupport.createSaxParserFactory(); factory.setNamespaceAware(namespaceAware); - this.namespaceAware = namespaceAware; factory.setValidating(validating); setFeatureQuietly(factory, XMLConstants.FEATURE_SECURE_PROCESSING, true); setFeatureQuietly(factory, "http://apache.org/xml/features/disallow-doctype-decl", !allowDocTypeDeclaration); reader = factory.newSAXParser().getXMLReader(); } - public XmlParser(XMLReader reader) { - this.reader = reader; + private XMLReader ensureReader() { + try { + if (reader == null) { + initReader(); + } + return reader; + } catch (ParserConfigurationException | SAXException e) { + throw new RuntimeException("Failed to initialize XML reader", e); + } } - public XmlParser(SAXParser parser) throws SAXException { - reader = parser.getXMLReader(); + private void checkNotInitialized(String property) { + if (reader != null) { + throw new IllegalStateException(property + " must be set before parsing"); + } } /** @@ -304,13 +333,62 @@ public class XmlParser implements ContentHandler { /** * Enable and/or disable namespace handling. + * Must be set before the first parse call. * * @param namespaceAware the new desired value + * @throws IllegalStateException if called after parsing has started */ public void setNamespaceAware(boolean namespaceAware) { + checkNotInitialized("namespaceAware"); this.namespaceAware = namespaceAware; } + /** + * Determine if the parser validates documents. + * + * @return true if validation is enabled + * @since 6.0.0 + */ + public boolean isValidating() { + return validating; + } + + /** + * Enable and/or disable validation. + * Must be set before the first parse call. + * + * @param validating the new desired value + * @throws IllegalStateException if called after parsing has started + * @since 6.0.0 + */ + public void setValidating(boolean validating) { + checkNotInitialized("validating"); + this.validating = validating; + } + + /** + * Determine if DOCTYPE declarations are allowed. + * + * @return true if DOCTYPE declarations are allowed + * @since 6.0.0 + */ + public boolean isAllowDocTypeDeclaration() { + return allowDocTypeDeclaration; + } + + /** + * Enable and/or disable DOCTYPE declaration support. + * Must be set before the first parse call. + * + * @param allowDocTypeDeclaration the new desired value + * @throws IllegalStateException if called after parsing has started + * @since 6.0.0 + */ + public void setAllowDocTypeDeclaration(boolean allowDocTypeDeclaration) { + checkNotInitialized("allowDocTypeDeclaration"); + this.allowDocTypeDeclaration = allowDocTypeDeclaration; + } + // Delegated XMLReader methods //------------------------------------------------------------------------ @@ -318,70 +396,70 @@ public class XmlParser implements ContentHandler { * @see org.xml.sax.XMLReader#getDTDHandler() */ public DTDHandler getDTDHandler() { - return this.reader.getDTDHandler(); + return ensureReader().getDTDHandler(); } /* (non-Javadoc) * @see org.xml.sax.XMLReader#getEntityResolver() */ public EntityResolver getEntityResolver() { - return this.reader.getEntityResolver(); + return ensureReader().getEntityResolver(); } /* (non-Javadoc) * @see org.xml.sax.XMLReader#getErrorHandler() */ public ErrorHandler getErrorHandler() { - return this.reader.getErrorHandler(); + return ensureReader().getErrorHandler(); } /* (non-Javadoc) * @see org.xml.sax.XMLReader#getFeature(java.lang.String) */ public boolean getFeature(final String uri) throws SAXNotRecognizedException, SAXNotSupportedException { - return this.reader.getFeature(uri); + return ensureReader().getFeature(uri); } /* (non-Javadoc) * @see org.xml.sax.XMLReader#getProperty(java.lang.String) */ public Object getProperty(final String uri) throws SAXNotRecognizedException, SAXNotSupportedException { - return this.reader.getProperty(uri); + return ensureReader().getProperty(uri); } /* (non-Javadoc) * @see org.xml.sax.XMLReader#setDTDHandler(org.xml.sax.DTDHandler) */ public void setDTDHandler(final DTDHandler dtdHandler) { - this.reader.setDTDHandler(dtdHandler); + ensureReader().setDTDHandler(dtdHandler); } /* (non-Javadoc) * @see org.xml.sax.XMLReader#setEntityResolver(org.xml.sax.EntityResolver) */ public void setEntityResolver(final EntityResolver entityResolver) { - this.reader.setEntityResolver(entityResolver); + ensureReader().setEntityResolver(entityResolver); } /* (non-Javadoc) * @see org.xml.sax.XMLReader#setErrorHandler(org.xml.sax.ErrorHandler) */ public void setErrorHandler(final ErrorHandler errorHandler) { - this.reader.setErrorHandler(errorHandler); + ensureReader().setErrorHandler(errorHandler); } /* (non-Javadoc) * @see org.xml.sax.XMLReader#setFeature(java.lang.String, boolean) */ public void setFeature(final String uri, final boolean value) throws SAXNotRecognizedException, SAXNotSupportedException { - this.reader.setFeature(uri, value); + ensureReader().setFeature(uri, value); } /* (non-Javadoc) * @see org.xml.sax.XMLReader#setProperty(java.lang.String, java.lang.Object) */ public void setProperty(final String uri, final Object value) throws SAXNotRecognizedException, SAXNotSupportedException { - reader.setProperty(uri, value); + ensureReader().setProperty(uri, value); } // ContentHandler interface @@ -464,8 +542,9 @@ public class XmlParser implements ContentHandler { // Implementation methods //------------------------------------------------------------------------- protected XMLReader getXMLReader() { - reader.setContentHandler(this); - return reader; + XMLReader r = ensureReader(); + r.setContentHandler(this); + return r; } protected void addTextToNode() { diff --git a/subprojects/groovy-xml/src/spec/doc/xml-userguide.adoc b/subprojects/groovy-xml/src/spec/doc/xml-userguide.adoc index 056567b3d1..ace9bc78ee 100644 --- a/subprojects/groovy-xml/src/spec/doc/xml-userguide.adoc +++ b/subprojects/groovy-xml/src/spec/doc/xml-userguide.adoc @@ -95,6 +95,54 @@ GPath expressions with them are the same (both use `breadthFirst()` and `depthFirst()` expressions). So I guess it depends on the write/read frequency. +=== Parser configuration with named parameters + +Both `XmlParser` and `XmlSlurper` support Groovy's named parameter syntax +for configuration, making it easy to set options without remembering +positional boolean argument order: + +[source,groovy] +.XmlSlurper +---- +include::../test/UserGuideXmlSlurperTest.groovy[tags=testNamedParameters,indent=0] +---- + +[source,groovy] +.XmlParser +---- +include::../test/UserGuideXmlParserTest.groovy[tags=testNamedParameters,indent=0] +---- + +Available options for both parsers: + +[cols="1,3,1",options="header"] +|=== +| Option | Description | Default + +| `namespaceAware` +| Enable XML namespace handling +| `true` + +| `validating` +| Validate documents against a DTD +| `false` + +| `allowDocTypeDeclaration` +| Allow DOCTYPE declarations in documents +| `false` + +| `keepIgnorableWhitespace` +| Preserve whitespace before elements +| `false` +|=== + +`XmlParser` additionally supports `trimWhitespace` (default `false`) +to trim whitespace from element text content. + +NOTE: Options that affect the underlying SAX parser (`namespaceAware`, +`validating`, `allowDocTypeDeclaration`) must be set before the first +parse call. Setting them afterwards throws an `IllegalStateException`. + === DOMCategory There is another way of parsing XML documents with Groovy with the diff --git a/subprojects/groovy-xml/src/spec/test/UserGuideXmlParserTest.groovy b/subprojects/groovy-xml/src/spec/test/UserGuideXmlParserTest.groovy index be06ac1fbd..5626f8d508 100644 --- a/subprojects/groovy-xml/src/spec/test/UserGuideXmlParserTest.groovy +++ b/subprojects/groovy-xml/src/spec/test/UserGuideXmlParserTest.groovy @@ -121,4 +121,12 @@ class UserGuideXmlParserTest extends GroovyTestCase { // end::testSettingAttributes1[] } + void testNamedParameters() { + // tag::testNamedParameters[] + def parser = new XmlParser(namespaceAware: false, trimWhitespace: true) + def result = parser.parseText('<root><item>value</item></root>') + assert result.item[0].text() == 'value' + // end::testNamedParameters[] + } + } diff --git a/subprojects/groovy-xml/src/spec/test/UserGuideXmlSlurperTest.groovy b/subprojects/groovy-xml/src/spec/test/UserGuideXmlSlurperTest.groovy index e91aad8a63..5e473b2c25 100644 --- a/subprojects/groovy-xml/src/spec/test/UserGuideXmlSlurperTest.groovy +++ b/subprojects/groovy-xml/src/spec/test/UserGuideXmlSlurperTest.groovy @@ -189,4 +189,12 @@ class UserGuideXmlSlurperTest extends GroovyTestCase { // end::testSettingAttributes1[] } + void testNamedParameters() { + // tag::testNamedParameters[] + def slurper = new XmlSlurper(namespaceAware: false, keepIgnorableWhitespace: true) + def result = slurper.parseText('<root><item>value</item></root>') + assert result.item.text() == 'value' + // end::testNamedParameters[] + } + } diff --git a/subprojects/groovy-xml/src/test/groovy/groovy/xml/XmlParserTest.groovy b/subprojects/groovy-xml/src/test/groovy/groovy/xml/XmlParserTest.groovy index 52745fc8d5..dc32462cf7 100644 --- a/subprojects/groovy-xml/src/test/groovy/groovy/xml/XmlParserTest.groovy +++ b/subprojects/groovy-xml/src/test/groovy/groovy/xml/XmlParserTest.groovy @@ -343,4 +343,30 @@ p() { def result = writer.toString() assert result == '<root><foo bar="baz"><inner/></foo><foo bar="zab"><inner>text</inner></foo></root>' } + + @Test + void testNamedParameterConstruction() { + def xml = '<root><item name="test">value</item></root>' + + // named parameters via setters + def parser = new XmlParser(namespaceAware: false, trimWhitespace: true) + def result = parser.parseText(xml) + assert result.item[0].text() == 'value' + assert result.item[0].@name == 'test' + + // setter after boolean constructor, before first parse + def parser2 = new XmlParser(false, false) + parser2.allowDocTypeDeclaration = true + assert parser2.parseText(xml).item[0].text() == 'value' + + // setter after parse should fail + def parser3 = new XmlParser() + parser3.parseText(xml) + try { + parser3.namespaceAware = false + assert false, 'should have thrown' + } catch (IllegalStateException e) { + assert e.message.contains('must be set before parsing') + } + } }
