This is an automated email from the ASF dual-hosted git repository.

sebb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/commons-io.git


The following commit(s) were added to refs/heads/master by this push:
     new 841b5faf IO-815: XmlStreamReader encoding RE is too strict
841b5faf is described below

commit 841b5fafe13683389d078ec7a7adf736c05e038e
Author: Sebb <s...@apache.org>
AuthorDate: Tue Oct 3 22:10:45 2023 +0100

    IO-815: XmlStreamReader encoding RE is too strict
---
 src/changes/changes.xml                                        | 3 +++
 src/main/java/org/apache/commons/io/input/XmlStreamReader.java | 8 +++++++-
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/src/changes/changes.xml b/src/changes/changes.xml
index b42898b0..49264bf1 100644
--- a/src/changes/changes.xml
+++ b/src/changes/changes.xml
@@ -48,6 +48,9 @@ The <action> type attribute can be add,update,fix,remove.
 
   <body>
     <release version="2.14.1" date="202Y-MM-DD" description="Java 8 is 
required.">
+      <action dev="sebb" type="fix" issue="IO-810" due-to="Laurence Gonsalves">
+        XmlStreamReader encoding match RE is too strict
+      </action>
       <action dev="ggregory" type="fix" issue="IO-810" due-to="Gregor Dschung, 
Gary Gregory">
         Javadoc in FileUtils does not reflect code for thrown exceptions.
       </action>
diff --git a/src/main/java/org/apache/commons/io/input/XmlStreamReader.java 
b/src/main/java/org/apache/commons/io/input/XmlStreamReader.java
index a38ef807..f50fc76e 100644
--- a/src/main/java/org/apache/commons/io/input/XmlStreamReader.java
+++ b/src/main/java/org/apache/commons/io/input/XmlStreamReader.java
@@ -209,8 +209,14 @@ public class XmlStreamReader extends Reader {
     // @formatter:off
             "^<\\?xml\\s+"
             + "version\\s*=\\s*(?:(?:\"1\\.[0-9]+\")|(?:'1.[0-9]+'))\\s+"
-            + 
"encoding\\s*=\\s*((?:\"[A-Za-z]([A-Za-z0-9\\._]|-)*\")|(?:'[A-Za-z]([A-Za-z0-9\\\\._]|-)*'))",
+            + "encoding\\s*=\\s*"
+            + "((?:\"[A-Za-z0-9][A-Za-z0-9._+:-]*\")"  // double-quoted
+            +  "|(?:'[A-Za-z0-9][A-Za-z0-9._+:-]*'))", // single-quoted
             Pattern.MULTILINE);
+    // N.B. the documented pattern is
+    // EncName   ::=   [A-Za-z] ([A-Za-z0-9._] | '-')*
+    // However this does not match all the aliases that are supported by Java.
+    // e.g.  '437', 'ISO_8859-1:1987' and 'ebcdic-de-273+euro'
     // @formatter:on
 
     private static final String RAW_EX_1 = "Illegal encoding, BOM [{0}] XML 
guess [{1}] XML prolog [{2}] encoding mismatch";

Reply via email to