This is an automated email from the ASF dual-hosted git repository.

ggregory pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/commons-io.git

commit 17f8b44d50372f4b540059232ed0ffa189eceb62
Author: Gary Gregory <garydgreg...@gmail.com>
AuthorDate: Tue Jan 2 09:08:58 2024 -0500

    XmlStreamReader can't parse XML document with multi-line prolog #550
    
    - Apply PR #550, not merged or would have caused the build to fail.
    - Implement fix
---
 src/changes/changes.xml                                  |  1 +
 .../org/apache/commons/io/input/XmlStreamReader.java     | 16 +++++++++++-----
 .../org/apache/commons/io/input/XmlStreamReaderTest.java | 10 ++++++++++
 3 files changed, 22 insertions(+), 5 deletions(-)

diff --git a/src/changes/changes.xml b/src/changes/changes.xml
index b0670bf4..7508b585 100644
--- a/src/changes/changes.xml
+++ b/src/changes/changes.xml
@@ -88,6 +88,7 @@ The <action> type attribute can be add,update,fix,remove.
       <action dev="ggregory" type="fix" issue="IO-807" due-to="Elliotte Rusty 
Harold, Gary Gregory">Characterization test for broken symlinks when copying 
directories #547.</action>
       <action dev="ggregory" type="fix" due-to="Gary 
Gregory">ClosedInputStream.read(byte[], int, int) does not always return 
-1.</action>
       <action dev="ggregory" type="fix" due-to="Gary 
Gregory">ClosedOutputStream.write(byte[], int, int) does not always throw 
IOException.</action>
+      <action dev="ggregory" type="fix" due-to="Sylwester Lachiewicz, Gary 
Gregory">XmlStreamReader can't parse XML document with multi-line prolog 
#550.</action>
       <!-- Add -->
       <action dev="ggregory" type="add"                due-to="Gary 
Gregory">Add and use PathUtils.getFileName(Path, Function&lt;Path, 
R&gt;).</action>
       <action dev="ggregory" type="add"                due-to="Gary 
Gregory">Add and use PathUtils.getFileNameString().</action>
diff --git a/src/main/java/org/apache/commons/io/input/XmlStreamReader.java 
b/src/main/java/org/apache/commons/io/input/XmlStreamReader.java
index 2b9b379d..ff16987f 100644
--- a/src/main/java/org/apache/commons/io/input/XmlStreamReader.java
+++ b/src/main/java/org/apache/commons/io/input/XmlStreamReader.java
@@ -214,6 +214,16 @@ public class XmlStreamReader extends Reader {
      * <p>
      * See also the <a 
href="https://www.w3.org/TR/2008/REC-xml-20081126/#NT-EncName";>XML 
specification</a>.
      * </p>
+     * <p>
+     * Note the documented pattern is:
+     * </p>
+     * <pre>
+     * EncName   ::=   [A-Za-z] ([A-Za-z0-9._] | '-')*
+     * </pre>
+     * <p>
+     * However this does not match all the aliases that are supported by Java.
+     * For example, '437', 'ISO_8859-1:1987' and 'ebcdic-de-273+euro'.
+     * </p>
      */
     public static final Pattern ENCODING_PATTERN = Pattern.compile(
     // @formatter:off
@@ -223,10 +233,6 @@ public class XmlStreamReader extends Reader {
             + "((?:\"[A-Za-z0-9][A-Za-z0-9._+:-]*\")"  // double-quoted
             +  "|(?:'[A-Za-z0-9][A-Za-z0-9._+:-]*'))", // single-quoted
             Pattern.MULTILINE);
-    // N.B. the documented pattern is
-    // EncName   ::=   [A-Za-z] ([A-Za-z0-9._] | '-')*
-    // However this does not match all the aliases that are supported by Java.
-    // e.g.  '437', 'ISO_8859-1:1987' and 'ebcdic-de-273+euro'
     // @formatter:on
 
     private static final String RAW_EX_1 = "Illegal encoding, BOM [{0}] XML 
guess [{1}] XML prolog [{2}] encoding mismatch";
@@ -325,7 +331,7 @@ public class XmlStreamReader extends Reader {
                 inputStream.reset();
                 final BufferedReader bReader = new BufferedReader(new 
StringReader(xmlProlog.substring(0, firstGT + 1)));
                 final StringBuilder prolog = new StringBuilder();
-                IOConsumer.forEach(bReader.lines(), prolog::append);
+                IOConsumer.forEach(bReader.lines(), l -> 
prolog.append(l).append(' '));
                 final Matcher m = ENCODING_PATTERN.matcher(prolog);
                 if (m.find()) {
                     encoding = m.group(1).toUpperCase(Locale.ROOT);
diff --git a/src/test/java/org/apache/commons/io/input/XmlStreamReaderTest.java 
b/src/test/java/org/apache/commons/io/input/XmlStreamReaderTest.java
index 63d587a8..de986c98 100644
--- a/src/test/java/org/apache/commons/io/input/XmlStreamReaderTest.java
+++ b/src/test/java/org/apache/commons/io/input/XmlStreamReaderTest.java
@@ -60,6 +60,8 @@ public class XmlStreamReaderTest {
     private static final String UTF_32LE = "UTF-32LE";
     private static final String UTF_32BE = "UTF-32BE";
     private static final String UTF_8 = StandardCharsets.UTF_8.name();
+
+    private static final String XML6 = "xml-prolog-encoding-new-line";
     private static final String XML5 = 
"xml-prolog-encoding-spaced-single-quotes";
     private static final String XML4 = "xml-prolog-encoding-single-quotes";
     private static final String XML3 = "xml-prolog-encoding-double-quotes";
@@ -102,6 +104,8 @@ public class XmlStreamReaderTest {
 
     private static final MessageFormat XML_WITH_PROLOG = new MessageFormat(
             "<?xml version=\"1.0\"?>\n<root>{2}</root>");
+    private static final MessageFormat XML_WITH_PROLOG_AND_ENCODING_NEW_LINES 
= new MessageFormat(
+            
"<?xml\nversion\n=\n\"1.0\"\nencoding\n=\n\"{1}\"\n?>\n<root>{2}</root>");
 
     private static final MessageFormat 
XML_WITH_PROLOG_AND_ENCODING_DOUBLE_QUOTES = new MessageFormat(
             "<?xml version=\"1.0\" encoding=\"{1}\"?>\n<root>{2}</root>");
@@ -123,6 +127,7 @@ public class XmlStreamReaderTest {
         XMLs.put(XML3, XML_WITH_PROLOG_AND_ENCODING_DOUBLE_QUOTES);
         XMLs.put(XML4, XML_WITH_PROLOG_AND_ENCODING_SINGLE_QUOTES);
         XMLs.put(XML5, XML_WITH_PROLOG_AND_ENCODING_SPACED_SINGLE_QUOTES);
+        XMLs.put(XML6, XML_WITH_PROLOG_AND_ENCODING_NEW_LINES);
     }
 
     /**
@@ -624,5 +629,10 @@ public class XmlStreamReaderTest {
         xmlReader = new XmlStreamReader(is);
         assertEquals(xmlReader.getEncoding(), encoding);
         xmlReader.close();
+
+        is = getXmlInputStream("no-bom", XML6, encoding, encoding);
+        xmlReader = new XmlStreamReader(is);
+        assertEquals(xmlReader.getEncoding(), encoding);
+        xmlReader.close();
     }
 }

Reply via email to