This is an automated email from the ASF dual-hosted git repository.
slawrence pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/daffodil.git
The following commit(s) were added to refs/heads/main by this push:
new d8bc0c199 Add support for infoset dataType member
d8bc0c199 is described below
commit d8bc0c199d96ffd3ca9f72a63bb899aedb8fb8b8
Author: Steve Lawrence <[email protected]>
AuthorDate: Wed Apr 8 10:38:54 2026 -0400
Add support for infoset dataType member
- Add new setIncludeDataType and getIncludeDataType functions to the
InfosetOutputter API to configure if InfosetOutputters should include
the dataType member when creating infosets. It is left up to specific
InfosetOutputter implementations if an how to represent this member in
their infosets
- The XMLTextInfosetOutputter and ScalaXMLInfosetOutputter are updated
to include the xsi:type attribute when this is enabled. They also
define the "xsi" namespace prefix if not already defined. Other
InfosetOutputters do not currently implement this.
- Update the TDML Runner to enable this flag for all infoset outputters.
The TDML Runner already supports type-aware comparisons when xsi:type
is provided, so enabling this allows all TDML test to use type aware
logic. This means expected infosets no longer need to include xsi:type
when for type-aware comparisons.
- Add new "infosetIncludeDataType", which calls setIncludeDataType if
set. This allows enabling this feature without needing to modify code
to call the new API function
- Fix TDML tests that used an xsi:type value that did not match
the actual value
- Discovered a bug where xs:decimal elements could be output with
scientific notation. This is not legal in XSD so these elements are
now output using .toPlainString
Deprecation/Compatibility
- Previous versions of Daffodil could sometimes use scientific notation
when outputting elements with an xs:decimal type, which XSD does not
allow. Daffodil now always outputs xs:deicmal types using standard
decimal notation without any exponent part.
DAFFODIL-182
---
.../daffodil/api/infoset/InfosetOutputter.java | 27 +++++++++++++++
.../org/apache/daffodil/lib/xml/XMLUtils.scala | 38 +++++++++++++++-------
.../daffodil/runtime1/infoset/InfosetImpl.scala | 4 +++
.../infoset/ScalaXMLInfosetOutputter.scala | 25 ++++++++++++--
.../runtime1/infoset/XMLTextInfosetOutputter.scala | 15 +++++++++
.../runtime1/processors/DataProcessor.scala | 5 +++
.../resources/org/apache/daffodil/xsd/dafext.xsd | 8 +++++
.../processor/tdml/TDMLInfosetOutputter.scala | 5 +++
.../daffodil/processor/tdml/TestTDMLRunner.scala | 24 ++++----------
.../section05/simple_types/SimpleTypes.tdml | 6 ++--
10 files changed, 121 insertions(+), 36 deletions(-)
diff --git
a/daffodil-core/src/main/java/org/apache/daffodil/api/infoset/InfosetOutputter.java
b/daffodil-core/src/main/java/org/apache/daffodil/api/infoset/InfosetOutputter.java
index e2cca491e..10f142c10 100644
---
a/daffodil-core/src/main/java/org/apache/daffodil/api/infoset/InfosetOutputter.java
+++
b/daffodil-core/src/main/java/org/apache/daffodil/api/infoset/InfosetOutputter.java
@@ -47,19 +47,28 @@ public abstract class InfosetOutputter {
* blob directory path which defaults to java temp dir
*/
private Path blobDirectory = Paths.get(System.getProperty("java.io.tmpdir"));
+
/**
* blob prefix which defaults to daffodil-
*/
private String blobPrefix = "daffodil-";
+
/**
* blob suffix which defaults to .blob
*/
private String blobSuffix = ".blob";
+
/**
* list of blob paths output in the infoset
*/
private List<Path> blobPaths;
+ /**
+ * whether or not to enable the dataType infoset member. It is up to
+ * InfosetOutputter implementations if and how to include member.
+ */
+ private boolean includeDataType = false;
+
/**
* Reset the internal state of this InfosetOutputter. This should be called
* in between calls to the parse method.
@@ -205,4 +214,22 @@ public abstract class InfosetOutputter {
final public void setBlobPaths(List<Path> blobPaths) {
this.blobPaths = blobPaths;
}
+
+ /**
+ * Set whether the InfosetOutputter should include the dataType member when
+ * it outputs infoset elements. It is up to InfosetOutputter implementations
+ * if and how to output the dataType
+ */
+ final public void setIncludeDataType(boolean includeDataType) {
+ this.includeDataType = includeDataType;
+ }
+
+ /**
+ * Get whether the InfosetOutputter should include the dataType member when
+ * it outputs infoset elements. It is up to InfosetOutputter implementations
+ * if and how to output this dataType
+ */
+ final public boolean getIncludeDataType() {
+ return includeDataType;
+ }
}
diff --git
a/daffodil-core/src/main/scala/org/apache/daffodil/lib/xml/XMLUtils.scala
b/daffodil-core/src/main/scala/org/apache/daffodil/lib/xml/XMLUtils.scala
index 2de76abe5..da67ac01b 100644
--- a/daffodil-core/src/main/scala/org/apache/daffodil/lib/xml/XMLUtils.scala
+++ b/daffodil-core/src/main/scala/org/apache/daffodil/lib/xml/XMLUtils.scala
@@ -912,6 +912,18 @@ Differences were (path, expected, actual):
res
}
+ // Normalize xmlns mappings to improve comparisons. Avoids differences in
whitespace,
+ // namespace ordering, and bindings like xmlns:xsi which is optionally added
by some infoset
+ // outputters that have support for xsi:type
+ private def normalizeMappings(mappings: String): String = {
+ mappings
+ .trim()
+ .split("\\s+")
+ .filterNot(_.startsWith("xmlns:xsi="))
+ .sorted
+ .mkString(" ")
+ }
+
def computeDiffOne(
an: Node,
bn: Node,
@@ -939,13 +951,21 @@ Differences were (path, expected, actual):
(prefixB, labelB, attribsB, nsbB, childrenB)
case x => Assert.invariantFailed(s"Expected elem, found $x")
}
- val typeA: Option[String] = getXSIType(a)
- val typeB: Option[String] = getXSIType(b)
+ // some TDML files use xsd prefixes for xsi:type values. Instead of
trying to resolve
+ // the prefix, just replace "xsd:" with "xs:"--the rest of our code
assumes xs prefixes
+ val typeA: Option[String] = getXSIType(a).map { t =>
+ if (t.startsWith("xsd:")) "xs:" + t.substring(4) else t
+ }
+ val typeB: Option[String] = getXSIType(b).map { t =>
+ if (t.startsWith("xsd:")) "xs:" + t.substring(4) else t
+ }
val maybeType: Option[String] =
Option(typeA.getOrElse(typeB.getOrElse(null)))
val nilledA = a.attribute(XSI_NAMESPACE.toString, "nil")
val nilledB = b.attribute(XSI_NAMESPACE.toString, "nil")
- val mappingsA = if (checkNamespaces)
nsbA.buildString(aParentScope).trim else ""
- val mappingsB = if (checkNamespaces)
nsbB.buildString(bParentScope).trim else ""
+ val mappingsA =
+ if (checkNamespaces)
normalizeMappings(nsbA.buildString(aParentScope)) else ""
+ val mappingsB =
+ if (checkNamespaces)
normalizeMappings(nsbB.buildString(bParentScope)) else ""
if (labelA != labelB) {
// different label
@@ -965,15 +985,9 @@ Differences were (path, expected, actual):
nilledB.map(_.toString).getOrElse("")
)
)
- } else if (typeA != typeB && typeA.isDefined && typeB.isDefined) {
+ } else if (typeA.isDefined && typeB.isDefined && typeA.get !=
typeB.get) {
// different xsi:type (if both suppplied)
- List(
- (
- zPath + "/" + labelA + "@xsi:type",
- typeA.map(_.toString).getOrElse(""),
- typeA.map(_.toString).getOrElse("")
- )
- )
+ List((zPath + "/" + labelA + "@xsi:type", typeA.get, typeB.get))
} else {
val pathLabel = labelA + maybeIndex.map("[" + _ + "]").getOrElse("")
val thisPathStep = pathLabel +: parentPathSteps
diff --git
a/daffodil-core/src/main/scala/org/apache/daffodil/runtime1/infoset/InfosetImpl.scala
b/daffodil-core/src/main/scala/org/apache/daffodil/runtime1/infoset/InfosetImpl.scala
index 311057bc3..8b1741f11 100644
---
a/daffodil-core/src/main/scala/org/apache/daffodil/runtime1/infoset/InfosetImpl.scala
+++
b/daffodil-core/src/main/scala/org/apache/daffodil/runtime1/infoset/InfosetImpl.scala
@@ -1543,6 +1543,10 @@ sealed class DISimple(override val erd:
ElementRuntimeData)
else if (f == Float.NegativeInfinity) XMLUtils.NegativeInfinityString
else f.toString
}
+ case d: java.math.BigDecimal => {
+ // scientific notation is not allowed by XSD for xs:decimal
+ d.toPlainString
+ }
case x => x.toString
}
}
diff --git
a/daffodil-core/src/main/scala/org/apache/daffodil/runtime1/infoset/ScalaXMLInfosetOutputter.scala
b/daffodil-core/src/main/scala/org/apache/daffodil/runtime1/infoset/ScalaXMLInfosetOutputter.scala
index e30534fcc..4abe32f37 100644
---
a/daffodil-core/src/main/scala/org/apache/daffodil/runtime1/infoset/ScalaXMLInfosetOutputter.scala
+++
b/daffodil-core/src/main/scala/org/apache/daffodil/runtime1/infoset/ScalaXMLInfosetOutputter.scala
@@ -19,7 +19,9 @@ package org.apache.daffodil.runtime1.infoset
import scala.collection.mutable.ListBuffer
import scala.xml.MetaData
+import scala.xml.NamespaceBinding
import scala.xml.Null
+import scala.xml.PrefixedAttribute
import scala.xml.UnprefixedAttribute
import org.apache.daffodil.api.DFDLPrimType
@@ -54,6 +56,16 @@ class ScalaXMLInfosetOutputter(showFreedInfo: Boolean =
false)
resultNode = Maybe(root(0))
}
+ private def getScope(diElem: DIElement): NamespaceBinding = {
+ val minScope = diElem.metadata.minimizedScope
+ // if including xsi:type is enabled, ensure the xsi namespace is defined
on the root element
+ if (getIncludeDataType() && stack.length == 1 && minScope.getURI("xsi") ==
null) {
+ NamespaceBinding("xsi", XMLUtils.XSI_NAMESPACE, minScope)
+ } else {
+ minScope
+ }
+ }
+
private def getAttributes(diElem: DIElement): MetaData = {
val nilAttr = if (diElem.isNilled) XMLUtils.xmlNilAttribute else Null
val freedAttr =
@@ -80,7 +92,14 @@ class ScalaXMLInfosetOutputter(showFreedInfo: Boolean =
false)
} else {
nilAttr
}
- freedAttr
+ val typedAttr =
+ if (getIncludeDataType() && diElem.isSimple) {
+ val primName = diElem.erd.optPrimType.get.name
+ new PrefixedAttribute("xsi", "type", "xs:" + primName, freedAttr)
+ } else {
+ freedAttr
+ }
+ typedAttr
}
override def startSimple(se: InfosetSimpleElement): Unit = {
@@ -105,7 +124,7 @@ class ScalaXMLInfosetOutputter(showFreedInfo: Boolean =
false)
diSimple.metadata.prefix,
diSimple.metadata.name,
attributes,
- diSimple.metadata.minimizedScope,
+ getScope(diSimple),
minimizeEmpty = true,
children*
)
@@ -130,7 +149,7 @@ class ScalaXMLInfosetOutputter(showFreedInfo: Boolean =
false)
diComplex.metadata.prefix,
diComplex.metadata.name,
attributes,
- diComplex.metadata.minimizedScope,
+ getScope(diComplex),
minimizeEmpty = true,
children*
)
diff --git
a/daffodil-core/src/main/scala/org/apache/daffodil/runtime1/infoset/XMLTextInfosetOutputter.scala
b/daffodil-core/src/main/scala/org/apache/daffodil/runtime1/infoset/XMLTextInfosetOutputter.scala
index d3cbd762e..642316b71 100644
---
a/daffodil-core/src/main/scala/org/apache/daffodil/runtime1/infoset/XMLTextInfosetOutputter.scala
+++
b/daffodil-core/src/main/scala/org/apache/daffodil/runtime1/infoset/XMLTextInfosetOutputter.scala
@@ -85,9 +85,12 @@ class XMLTextInfosetOutputter private (
*/
private var inScopeComplexElementHasChildren = false
+ private var hasStartedRoot = false
+
override def reset(): Unit = {
resetIndentation()
inScopeComplexElementHasChildren = false
+ hasStartedRoot = false
}
private def outputTagName(elem: DIElement): Unit = {
@@ -112,6 +115,16 @@ class XMLTextInfosetOutputter private (
nsbStart.buildString(sb, nsbEnd)
writer.write(sb.toString)
}
+ // if including xsi:type is enabled, ensure the xsi namespace is defined
on the root
+ // element
+ if (getIncludeDataType() && !hasStartedRoot && nsbStart.getURI("xsi") ==
null) {
+ writer.write(" xmlns:xsi=\"" + XMLUtils.XSI_NAMESPACE + "\"")
+ }
+ }
+
+ if (getIncludeDataType() && elem.isSimple) {
+ val primName = elem.erd.optPrimType.get.name
+ writer.write(" xsi:type=\"xs:" + primName + "\"")
}
if (elem.isNilled) {
@@ -227,6 +240,7 @@ class XMLTextInfosetOutputter private (
outputEndTag(simple)
inScopeComplexElementHasChildren = true
+ hasStartedRoot = true
}
override def endSimple(simple: InfosetSimpleElement): Unit = {
@@ -242,6 +256,7 @@ class XMLTextInfosetOutputter private (
outputStartTag(complex)
incrementIndentation()
inScopeComplexElementHasChildren = false
+ hasStartedRoot = true
}
override def endComplex(ce: InfosetComplexElement): Unit = {
diff --git
a/daffodil-core/src/main/scala/org/apache/daffodil/runtime1/processors/DataProcessor.scala
b/daffodil-core/src/main/scala/org/apache/daffodil/runtime1/processors/DataProcessor.scala
index cf88c42a4..3b16d3680 100644
---
a/daffodil-core/src/main/scala/org/apache/daffodil/runtime1/processors/DataProcessor.scala
+++
b/daffodil-core/src/main/scala/org/apache/daffodil/runtime1/processors/DataProcessor.scala
@@ -287,6 +287,11 @@ class DataProcessor(
output: api.infoset.InfosetOutputter
): DFDL.ParseResult = {
checkNotError()
+
+ if (tunables.infosetIncludeDataType) {
+ output.setIncludeDataType(true)
+ }
+
// If full validation is enabled, tee all the infoset events to a second
// infoset outputter that writes the infoset to a byte array, and then
// we'll validate that byte array upon a successful parse.
diff --git
a/daffodil-propgen/src/main/resources/org/apache/daffodil/xsd/dafext.xsd
b/daffodil-propgen/src/main/resources/org/apache/daffodil/xsd/dafext.xsd
index 8e0398c04..c34927801 100644
--- a/daffodil-propgen/src/main/resources/org/apache/daffodil/xsd/dafext.xsd
+++ b/daffodil-propgen/src/main/resources/org/apache/daffodil/xsd/dafext.xsd
@@ -273,6 +273,14 @@
</xs:restriction>
</xs:simpleType>
</xs:element>
+ <xs:element name="infosetIncludeDataType" type="xs:boolean"
default="false" minOccurs="0">
+ <xs:annotation>
+ <xs:documentation>
+ Whether or not to include the dataType infoset member when
outputting an infoset. If
+ and how this is represented in the infoset is up to
InfosetOutputter implementations.
+ </xs:documentation>
+ </xs:annotation>
+ </xs:element>
<xs:element name="inputFileMemoryMapLowThreshold" type="xs:int"
default="33554432" minOccurs="0">
<xs:annotation>
<xs:documentation>
diff --git
a/daffodil-tdml-processor/src/main/scala/org/apache/daffodil/processor/tdml/TDMLInfosetOutputter.scala
b/daffodil-tdml-processor/src/main/scala/org/apache/daffodil/processor/tdml/TDMLInfosetOutputter.scala
index 6adfcf70d..cf913d687 100644
---
a/daffodil-tdml-processor/src/main/scala/org/apache/daffodil/processor/tdml/TDMLInfosetOutputter.scala
+++
b/daffodil-tdml-processor/src/main/scala/org/apache/daffodil/processor/tdml/TDMLInfosetOutputter.scala
@@ -57,6 +57,7 @@ class TDMLInfosetOutputterScala(scalaOut:
ScalaXMLInfosetOutputter)
object TDMLInfosetOutputterScala {
def apply(): TDMLInfosetOutputterScala = {
val scalaOut = new ScalaXMLInfosetOutputter()
+ scalaOut.setIncludeDataType(true)
new TDMLInfosetOutputterScala(scalaOut)
}
}
@@ -99,6 +100,10 @@ object TDMLInfosetOutputterAll {
val jsonOut = new JsonInfosetOutputter(jsonStream, false)
val xmlOut = new XMLTextInfosetOutputter(xmlStream, false)
+ Seq(scalaOut, jdomOut, w3cdomOut, jsonOut, xmlOut).foreach { out =>
+ out.setIncludeDataType(true)
+ }
+
new TDMLInfosetOutputterAll(
jsonStream,
xmlStream,
diff --git
a/daffodil-tdml-processor/src/test/scala/org/apache/daffodil/processor/tdml/TestTDMLRunner.scala
b/daffodil-tdml-processor/src/test/scala/org/apache/daffodil/processor/tdml/TestTDMLRunner.scala
index 8cbfb0ebf..59f9af9ab 100644
---
a/daffodil-tdml-processor/src/test/scala/org/apache/daffodil/processor/tdml/TestTDMLRunner.scala
+++
b/daffodil-tdml-processor/src/test/scala/org/apache/daffodil/processor/tdml/TestTDMLRunner.scala
@@ -690,7 +690,7 @@ f0 f1 f2 f3 f4 f5 f6 f7 f8 f9 fa fb fc fd fe ff
runner.runOneTest("testTDMLHexBinaryTypeAwareSuccess")
}
- @Test def testTDMLHexBinaryTypeAwareFailure(): Unit = {
+ @Test def testTDMLHexBinaryTypeAwareSuccess_03(): Unit = {
val testSuite = <ts:testSuite xmlns:ts={tdml} suiteName="theSuiteName"
xmlns:xs={
xsd
} xmlns:dfdl={dfdl} xmlns:tns={example}>
@@ -699,7 +699,7 @@ f0 f1 f2 f3 f4 f5 f6 f7 f8 f9 fa fb fc fd fe ff
<dfdl:format ref="tns:GeneralFormat"/>
<xs:element name="data" type="xs:hexBinary"
dfdl:lengthKind="explicit" dfdl:length="4"/>
</ts:defineSchema>
- <ts:parserTestCase ID="some identifier"
name="testTDMLHexBinaryTypeAwareFailure"
+ <ts:parserTestCase ID="some identifier"
name="testTDMLHexBinaryTypeAwareSuccess"
root="data" model="mySchema">
<ts:document>
<ts:documentPart
type="byte">A1B2C3D4</ts:documentPart>
@@ -712,13 +712,7 @@ f0 f1 f2 f3 f4 f5 f6 f7 f8 f9 fa fb fc fd fe ff
</ts:parserTestCase>
</ts:testSuite>
val runner = new Runner(testSuite)
- val e = intercept[Exception] {
- runner.runOneTest("testTDMLHexBinaryTypeAwareFailure")
- }
- val msg = e.getMessage()
- assertTrue(msg.contains("Comparison failed"))
- assertTrue(msg.contains("a1b2c3d4"))
- assertTrue(msg.contains("A1B2C3D4"))
+ runner.runOneTest("testTDMLHexBinaryTypeAwareSuccess")
}
@Test def testTDMLDateTimeTypeAwareSuccess_01(): Unit = {
@@ -825,7 +819,7 @@ f0 f1 f2 f3 f4 f5 f6 f7 f8 f9 fa fb fc fd fe ff
runner.runOneTest("testTDMLDateTimeTypeAwareSuccess")
}
- @Test def testTDMLDateTimeTypeAwareFailure(): Unit = {
+ @Test def testTDMLDateTimeTypeAwareSuccess_05(): Unit = {
val testSuite = <ts:testSuite xmlns:ts={tdml} suiteName="theSuiteName"
xmlns:xs={
xsd
} xmlns:dfdl={dfdl} xmlns:tns={example}>
@@ -836,7 +830,7 @@ f0 f1 f2 f3 f4 f5 f6 f7 f8 f9 fa fb fc fd fe ff
dfdl:calendarPatternKind="explicit"
dfdl:calendarPattern="uuuu-MM-dd'T'HH:mm:ss.SSSSSSxxxxx" />
</ts:defineSchema>
- <ts:parserTestCase ID="some identifier"
name="testTDMLDateTimeTypeAwareFailure"
+ <ts:parserTestCase ID="some identifier"
name="testTDMLDateTimeTypeAwareSuccess"
root="data" model="mySchema">
<ts:document>1995-03-24T01:30:00.000000+00:00</ts:document>
<ts:infoset>
@@ -847,13 +841,7 @@ f0 f1 f2 f3 f4 f5 f6 f7 f8 f9 fa fb fc fd fe ff
</ts:parserTestCase>
</ts:testSuite>
val runner = new Runner(testSuite)
- val e = intercept[Exception] {
- runner.runOneTest("testTDMLDateTimeTypeAwareFailure")
- }
- val msg = e.getMessage()
- assertTrue(msg.contains("Comparison failed"))
- assertTrue(msg.contains("1995-03-24T01:30:00Z"))
- assertTrue(msg.contains("1995-03-24T01:30:00+00:00"))
+ runner.runOneTest("testTDMLDateTimeTypeAwareSuccess")
}
/**
diff --git
a/daffodil-test/src/test/resources/org/apache/daffodil/section05/simple_types/SimpleTypes.tdml
b/daffodil-test/src/test/resources/org/apache/daffodil/section05/simple_types/SimpleTypes.tdml
index f9bf50c1a..61808e7ec 100644
---
a/daffodil-test/src/test/resources/org/apache/daffodil/section05/simple_types/SimpleTypes.tdml
+++
b/daffodil-test/src/test/resources/org/apache/daffodil/section05/simple_types/SimpleTypes.tdml
@@ -3345,7 +3345,7 @@
</tdml:document>
<tdml:infoset>
<tdml:dfdlInfoset>
- <dec_03 xsi:type="xs:double">1.23456789E+13</dec_03>
+ <dec_03 xsi:type="xs:decimal">12345678900000</dec_03>
</tdml:dfdlInfoset>
</tdml:infoset>
</tdml:parserTestCase>
@@ -3357,7 +3357,7 @@
</tdml:document>
<tdml:infoset>
<tdml:dfdlInfoset>
- <dec_04 xsi:type="xs:double">1E-200</dec_04>
+ <dec_04
xsi:type="xs:decimal">0.00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001</dec_04>
</tdml:dfdlInfoset>
</tdml:infoset>
</tdml:parserTestCase>
@@ -3369,7 +3369,7 @@
</tdml:document>
<tdml:infoset>
<tdml:dfdlInfoset>
- <dec_05 xsi:type="xs:double">1E+200</dec_05>
+ <dec_05
xsi:type="xs:decimal">100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000</dec_05>
</tdml:dfdlInfoset>
</tdml:infoset>
</tdml:parserTestCase>