This is an automated email from the ASF dual-hosted git repository. sebb pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/commons-io.git
The following commit(s) were added to refs/heads/master by this push: new 841b5faf IO-815: XmlStreamReader encoding RE is too strict 841b5faf is described below commit 841b5fafe13683389d078ec7a7adf736c05e038e Author: Sebb <s...@apache.org> AuthorDate: Tue Oct 3 22:10:45 2023 +0100 IO-815: XmlStreamReader encoding RE is too strict --- src/changes/changes.xml | 3 +++ src/main/java/org/apache/commons/io/input/XmlStreamReader.java | 8 +++++++- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/src/changes/changes.xml b/src/changes/changes.xml index b42898b0..49264bf1 100644 --- a/src/changes/changes.xml +++ b/src/changes/changes.xml @@ -48,6 +48,9 @@ The <action> type attribute can be add,update,fix,remove. <body> <release version="2.14.1" date="202Y-MM-DD" description="Java 8 is required."> + <action dev="sebb" type="fix" issue="IO-810" due-to="Laurence Gonsalves"> + XmlStreamReader encoding match RE is too strict + </action> <action dev="ggregory" type="fix" issue="IO-810" due-to="Gregor Dschung, Gary Gregory"> Javadoc in FileUtils does not reflect code for thrown exceptions. </action> diff --git a/src/main/java/org/apache/commons/io/input/XmlStreamReader.java b/src/main/java/org/apache/commons/io/input/XmlStreamReader.java index a38ef807..f50fc76e 100644 --- a/src/main/java/org/apache/commons/io/input/XmlStreamReader.java +++ b/src/main/java/org/apache/commons/io/input/XmlStreamReader.java @@ -209,8 +209,14 @@ public class XmlStreamReader extends Reader { // @formatter:off "^<\\?xml\\s+" + "version\\s*=\\s*(?:(?:\"1\\.[0-9]+\")|(?:'1.[0-9]+'))\\s+" - + "encoding\\s*=\\s*((?:\"[A-Za-z]([A-Za-z0-9\\._]|-)*\")|(?:'[A-Za-z]([A-Za-z0-9\\\\._]|-)*'))", + + "encoding\\s*=\\s*" + + "((?:\"[A-Za-z0-9][A-Za-z0-9._+:-]*\")" // double-quoted + + "|(?:'[A-Za-z0-9][A-Za-z0-9._+:-]*'))", // single-quoted Pattern.MULTILINE); + // N.B. the documented pattern is + // EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')* + // However this does not match all the aliases that are supported by Java. + // e.g. '437', 'ISO_8859-1:1987' and 'ebcdic-de-273+euro' // @formatter:on private static final String RAW_EX_1 = "Illegal encoding, BOM [{0}] XML guess [{1}] XML prolog [{2}] encoding mismatch";