This is an automated email from the ASF dual-hosted git repository.

slawrence pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/daffodil.git


The following commit(s) were added to refs/heads/main by this push:
     new 81104ca86 Maintain empty/non-elements in stringAsXml output
81104ca86 is described below

commit 81104ca865f1d669a768ac05a1cd9212ec3dcc47
Author: Steve Lawrence <[email protected]>
AuthorDate: Wed Apr 8 10:37:21 2026 -0400

    Maintain empty/non-elements in stringAsXml output
    
    The stringAsXml implementation currently converts both `<foo/>` and
    `<foo></foo>` XML strings to `<foo/>`. Although stringAsXml does not
    guarantee the resulting XML will be always be the same, we do try to
    keep keep the result as close to the original where reasonable.
    
    By casting the existing XMLReader/Writer classes to XMLStreamReader2 and
    XMLStreamWriter2, we can used additional APIs to detect empty elements
    and write them as empty, with all other elements written with full
    endings.
    
    DAFFODIL-3074
---
 .../runtime1/infoset/XMLTextInfosetInputter.scala  |  32 ++++++++++++++-------
 .../runtime1/infoset/XMLTextInfosetOutputter.scala |  14 +++++----
 .../stringAsXml/namespaced/binMessage_01.dat       | Bin 800 -> 821 bytes
 .../stringAsXml/namespaced/binMessage_01.dat.xml   |   4 +--
 .../namespaced/binMessage_01.dat.xml.dat           | Bin 747 -> 776 bytes
 5 files changed, 33 insertions(+), 17 deletions(-)

diff --git 
a/daffodil-core/src/main/scala/org/apache/daffodil/runtime1/infoset/XMLTextInfosetInputter.scala
 
b/daffodil-core/src/main/scala/org/apache/daffodil/runtime1/infoset/XMLTextInfosetInputter.scala
index 5d0edd0b8..7f8b50e1f 100644
--- 
a/daffodil-core/src/main/scala/org/apache/daffodil/runtime1/infoset/XMLTextInfosetInputter.scala
+++ 
b/daffodil-core/src/main/scala/org/apache/daffodil/runtime1/infoset/XMLTextInfosetInputter.scala
@@ -24,8 +24,6 @@ import javax.xml.XMLConstants
 import javax.xml.stream.XMLInputFactory
 import javax.xml.stream.XMLStreamConstants.*
 import javax.xml.stream.XMLStreamException
-import javax.xml.stream.XMLStreamReader
-import javax.xml.stream.XMLStreamWriter
 
 import org.apache.daffodil.api
 import org.apache.daffodil.api.Daffodil.InfosetInputterEventType
@@ -34,7 +32,9 @@ import org.apache.daffodil.lib.exceptions.Assert
 import org.apache.daffodil.lib.xml.XMLUtils
 import org.apache.daffodil.runtime1.dpath.NodeInfo
 
-import com.ctc.wstx.cfg.ErrorConsts;
+import com.ctc.wstx.cfg.ErrorConsts
+import org.codehaus.stax2.XMLStreamReader2
+import org.codehaus.stax2.XMLStreamWriter2
 
 object XMLTextInfoset {
   lazy val xmlInputFactory = {
@@ -140,10 +140,14 @@ object XMLTextInfoset {
    *
    * Both a lone CR and CRLF are converted to LF.
    */
-  def writeXMLStreamEvent(xsr: XMLStreamReader, xsw: XMLStreamWriter): Unit = {
+  def writeXMLStreamEvent(xsr: XMLStreamReader2, xsw: XMLStreamWriter2): Unit 
= {
     xsr.getEventType() match {
       case START_ELEMENT => {
-        xsw.writeStartElement(xsr.getPrefix(), xsr.getLocalName(), 
xsr.getNamespaceURI())
+        if (xsr.isEmptyElement()) {
+          xsw.writeEmptyElement(xsr.getPrefix(), xsr.getLocalName(), 
xsr.getNamespaceURI())
+        } else {
+          xsw.writeStartElement(xsr.getPrefix(), xsr.getLocalName(), 
xsr.getNamespaceURI())
+        }
         for (i <- 0 until xsr.getNamespaceCount()) {
           xsw.writeNamespace(xsr.getNamespacePrefix(i), xsr.getNamespaceURI(i))
         }
@@ -155,8 +159,13 @@ object XMLTextInfoset {
             xsr.getAttributeValue(i)
           )
         }
+        if (xsr.isEmptyElement()) {
+          // skip the next END_ELEMENT event since writeEmptyElement above 
causes the
+          // XMLStreamWriter to handle closing the empty element
+          xsr.next()
+        }
       }
-      case END_ELEMENT => xsw.writeEndElement()
+      case END_ELEMENT => xsw.writeFullEndElement()
       case CHARACTERS => xsw.writeCharacters(xsr.getText())
       case COMMENT => xsw.writeComment(xsr.getText())
       case CDATA => xsw.writeCData(xsr.getText())
@@ -189,8 +198,10 @@ object XMLTextInfoset {
 
 class XMLTextInfosetInputter(input: java.io.InputStream) extends 
api.infoset.InfosetInputter {
 
-  private lazy val xsr: XMLStreamReader = {
-    val xsr = XMLTextInfoset.xmlInputFactory.createXMLStreamReader(input)
+  private lazy val xsr: XMLStreamReader2 = {
+    val xsr = XMLTextInfoset.xmlInputFactory
+      .createXMLStreamReader(input)
+      .asInstanceOf[XMLStreamReader2]
 
     // no need for UnparseError here. If the XML syntax is bad, parser catches 
it before we get here.
     Assert.invariant(xsr.hasNext())
@@ -256,8 +267,9 @@ class XMLTextInfosetInputter(input: java.io.InputStream) 
extends api.infoset.Inf
     // wrapper tag. We trim the result to remove whitespace that the outputter
     // may have written with pretty mode enabled.
     val sw = new StringWriter()
-    val xsw =
-      XMLTextInfoset.xmlOutputFactory.createXMLStreamWriter(sw, 
StandardCharsets.UTF_8.toString)
+    val xsw = XMLTextInfoset.xmlOutputFactory
+      .createXMLStreamWriter(sw, StandardCharsets.UTF_8.toString)
+      .asInstanceOf[XMLStreamWriter2]
     xsw.writeStartDocument()
     while (
       xsr.getEventType() != END_ELEMENT || xsr.getLocalName() != 
XMLTextInfoset.stringAsXml
diff --git 
a/daffodil-core/src/main/scala/org/apache/daffodil/runtime1/infoset/XMLTextInfosetOutputter.scala
 
b/daffodil-core/src/main/scala/org/apache/daffodil/runtime1/infoset/XMLTextInfosetOutputter.scala
index ba9f25602..d3cbd762e 100644
--- 
a/daffodil-core/src/main/scala/org/apache/daffodil/runtime1/infoset/XMLTextInfosetOutputter.scala
+++ 
b/daffodil-core/src/main/scala/org/apache/daffodil/runtime1/infoset/XMLTextInfosetOutputter.scala
@@ -31,6 +31,9 @@ import org.apache.daffodil.lib.util.Indentable
 import org.apache.daffodil.lib.xml.XMLUtils
 import org.apache.daffodil.runtime1.dpath.NodeInfo
 
+import org.codehaus.stax2.XMLStreamReader2
+import org.codehaus.stax2.XMLStreamWriter2
+
 /**
  * Writes the infoset to a java.io.BufferedWriter as XML text.
  *
@@ -148,11 +151,12 @@ class XMLTextInfosetOutputter private (
     // logic also skips the START_DOCUMENT event so that the XML declaration is
     // not written in the middle of our XML infoset
     val sr = new StringReader(str)
-    val xsr = XMLTextInfoset.xmlInputFactory.createXMLStreamReader(sr)
-    val xsw = XMLTextInfoset.xmlOutputFactory.createXMLStreamWriter(
-      writer,
-      StandardCharsets.UTF_8.toString
-    )
+    val xsr = XMLTextInfoset.xmlInputFactory
+      .createXMLStreamReader(sr)
+      .asInstanceOf[XMLStreamReader2]
+    val xsw = XMLTextInfoset.xmlOutputFactory
+      .createXMLStreamWriter(writer, StandardCharsets.UTF_8.toString)
+      .asInstanceOf[XMLStreamWriter2]
     Assert.invariant(xsr.getEventType() == START_DOCUMENT)
     while (xsr.hasNext()) {
       xsr.next()
diff --git 
a/daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXml/namespaced/binMessage_01.dat
 
b/daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXml/namespaced/binMessage_01.dat
index 3e9a454e7..bdb4f0b65 100644
Binary files 
a/daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXml/namespaced/binMessage_01.dat
 and 
b/daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXml/namespaced/binMessage_01.dat
 differ
diff --git 
a/daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXml/namespaced/binMessage_01.dat.xml
 
b/daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXml/namespaced/binMessage_01.dat.xml
index caa9af9d1..c54b830fc 100644
--- 
a/daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXml/namespaced/binMessage_01.dat.xml
+++ 
b/daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXml/namespaced/binMessage_01.dat.xml
@@ -9,8 +9,8 @@
   <?processing instruction?>
   <field><![CDATA[Field ]]> with <![CDATA[cdata]]> </field> here is mixed 
content
   <field>spaces</field>    <field>   spaces   </field> and more mixed content
-  <field/> and more mixed content
-  <field/>
+  <field attr="foo"/> and more mixed content
+  <field attr="bar"></field>
   <field>entity references: &lt; > &amp; " ' ©</field>
   <field>CR</field>&#xd;<field>LF</field>
 <field>CRLF</field>&#xd;
diff --git 
a/daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXml/namespaced/binMessage_01.dat.xml.dat
 
b/daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXml/namespaced/binMessage_01.dat.xml.dat
index 523a7d539..92703251f 100644
Binary files 
a/daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXml/namespaced/binMessage_01.dat.xml.dat
 and 
b/daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXml/namespaced/binMessage_01.dat.xml.dat
 differ

Reply via email to