Repository: commons-io Updated Branches: refs/heads/master d3fc1a287 -> ed15b0998
IO-557: UnsupportedEncodingException when opening an ISO-8859-1 XML stream with Turkish as the default locale (closes #51) Perform locale independent upper case conversions. To handle properly lower cased character encoding name in XML prolog with any default system locale, notably Turkish. Project: http://git-wip-us.apache.org/repos/asf/commons-io/repo Commit: http://git-wip-us.apache.org/repos/asf/commons-io/commit/d19259a7 Tree: http://git-wip-us.apache.org/repos/asf/commons-io/tree/d19259a7 Diff: http://git-wip-us.apache.org/repos/asf/commons-io/diff/d19259a7 Branch: refs/heads/master Commit: d19259a7774e23ebdaac4af5ee4def8602fe2969 Parents: d3fc1a2 Author: luccioman <luccio...@users.noreply.github.com> Authored: Mon Dec 18 09:37:06 2017 +0100 Committer: pascalschumacher <pascalschumac...@gmx.net> Committed: Sun Jan 14 11:39:38 2018 +0100 ---------------------------------------------------------------------- .../org/apache/commons/io/ByteOrderMark.java | 3 ++- .../apache/commons/io/input/XmlStreamReader.java | 4 ++-- .../commons/io/output/XmlStreamWriter.java | 3 ++- .../commons/io/input/XmlStreamReaderTest.java | 19 +++++++++++++++++++ .../io/input/compatibility/XmlStreamReader.java | 5 +++-- .../commons/io/output/XmlStreamWriterTest.java | 16 ++++++++++++++-- 6 files changed, 42 insertions(+), 8 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/commons-io/blob/d19259a7/src/main/java/org/apache/commons/io/ByteOrderMark.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/commons/io/ByteOrderMark.java b/src/main/java/org/apache/commons/io/ByteOrderMark.java index 109c402..6840e7e 100644 --- a/src/main/java/org/apache/commons/io/ByteOrderMark.java +++ b/src/main/java/org/apache/commons/io/ByteOrderMark.java @@ -17,6 +17,7 @@ package org.apache.commons.io; import java.io.Serializable; +import java.util.Locale; /** * Byte Order Mark (BOM) representation - see {@link org.apache.commons.io.input.BOMInputStream}. @@ -182,7 +183,7 @@ public class ByteOrderMark implements Serializable { builder.append(","); } builder.append("0x"); - builder.append(Integer.toHexString(0xFF & bytes[i]).toUpperCase()); + builder.append(Integer.toHexString(0xFF & bytes[i]).toUpperCase(Locale.ROOT)); } builder.append(']'); return builder.toString(); http://git-wip-us.apache.org/repos/asf/commons-io/blob/d19259a7/src/main/java/org/apache/commons/io/input/XmlStreamReader.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/commons/io/input/XmlStreamReader.java b/src/main/java/org/apache/commons/io/input/XmlStreamReader.java index d1d24b3..f729b54 100644 --- a/src/main/java/org/apache/commons/io/input/XmlStreamReader.java +++ b/src/main/java/org/apache/commons/io/input/XmlStreamReader.java @@ -683,7 +683,7 @@ public class XmlStreamReader extends Reader { final String postMime = httpContentType.substring(i + 1); final Matcher m = CHARSET_PATTERN.matcher(postMime); encoding = m.find() ? m.group(1) : null; - encoding = encoding != null ? encoding.toUpperCase(Locale.US) : null; + encoding = encoding != null ? encoding.toUpperCase(Locale.ROOT) : null; } } return encoding; @@ -741,7 +741,7 @@ public class XmlStreamReader extends Reader { } final Matcher m = ENCODING_PATTERN.matcher(prolog); if (m.find()) { - encoding = m.group(1).toUpperCase(); + encoding = m.group(1).toUpperCase(Locale.ROOT); encoding = encoding.substring(1, encoding.length() - 1); } } http://git-wip-us.apache.org/repos/asf/commons-io/blob/d19259a7/src/main/java/org/apache/commons/io/output/XmlStreamWriter.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/commons/io/output/XmlStreamWriter.java b/src/main/java/org/apache/commons/io/output/XmlStreamWriter.java index d352ad0..28fa6ac 100644 --- a/src/main/java/org/apache/commons/io/output/XmlStreamWriter.java +++ b/src/main/java/org/apache/commons/io/output/XmlStreamWriter.java @@ -24,6 +24,7 @@ import java.io.OutputStream; import java.io.OutputStreamWriter; import java.io.StringWriter; import java.io.Writer; +import java.util.Locale; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -168,7 +169,7 @@ public class XmlStreamWriter extends Writer { final Matcher m = ENCODING_PATTERN.matcher(xmlProlog.substring(0, xmlPrologEnd)); if (m.find()) { - encoding = m.group(1).toUpperCase(); + encoding = m.group(1).toUpperCase(Locale.ROOT); encoding = encoding.substring(1, encoding.length() - 1); } else { // no encoding found in XML prolog: using default http://git-wip-us.apache.org/repos/asf/commons-io/blob/d19259a7/src/test/java/org/apache/commons/io/input/XmlStreamReaderTest.java ---------------------------------------------------------------------- diff --git a/src/test/java/org/apache/commons/io/input/XmlStreamReaderTest.java b/src/test/java/org/apache/commons/io/input/XmlStreamReaderTest.java index 183e1a4..a968c56 100644 --- a/src/test/java/org/apache/commons/io/input/XmlStreamReaderTest.java +++ b/src/test/java/org/apache/commons/io/input/XmlStreamReaderTest.java @@ -276,6 +276,25 @@ public class XmlStreamReaderTest { _testHttpLenient("text/html;charset=UTF-16BE", "no-bom", "US-ASCII", "UTF-8", "UTF-8"); _testHttpLenient("text/html;charset=UTF-32BE", "no-bom", "US-ASCII", "UTF-8", "UTF-8"); } + + /** + * Check lower case encoding names are properly handled. Should be successfull + * with any system default locale, notably with Turkish language + * (-Duser.language=tr JVM parameter), which has specific rules to convert dotted and dottless + * i character. + */ + @Test + public void testLowerCaseEncoding() throws Exception { + final String[] encodings = { "iso8859-1", "us-ascii", "utf-8" }; + for (final String encoding : encodings) { + final String xml = getXML("no-bom", XML3, encoding, encoding); + try (final ByteArrayInputStream is = new ByteArrayInputStream(xml.getBytes(encoding)); + final XmlStreamReader xmlReader = new XmlStreamReader(is);) { + assertTrue("Check encoding : " + encoding, encoding.equalsIgnoreCase(xmlReader.getEncoding())); + assertEquals("Check content", xml, IOUtils.toString(xmlReader)); + } + } + } @Test public void testRawContent() throws Exception { http://git-wip-us.apache.org/repos/asf/commons-io/blob/d19259a7/src/test/java/org/apache/commons/io/input/compatibility/XmlStreamReader.java ---------------------------------------------------------------------- diff --git a/src/test/java/org/apache/commons/io/input/compatibility/XmlStreamReader.java b/src/test/java/org/apache/commons/io/input/compatibility/XmlStreamReader.java index be5b121..5442315 100644 --- a/src/test/java/org/apache/commons/io/input/compatibility/XmlStreamReader.java +++ b/src/test/java/org/apache/commons/io/input/compatibility/XmlStreamReader.java @@ -29,6 +29,7 @@ import java.net.HttpURLConnection; import java.net.URL; import java.net.URLConnection; import java.text.MessageFormat; +import java.util.Locale; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -590,7 +591,7 @@ public class XmlStreamReader extends Reader { final String postMime = httpContentType.substring(i + 1); final Matcher m = CHARSET_PATTERN.matcher(postMime); encoding = m.find() ? m.group(1) : null; - encoding = encoding != null ? encoding.toUpperCase() : null; + encoding = encoding != null ? encoding.toUpperCase(Locale.ROOT) : null; } } return encoding; @@ -699,7 +700,7 @@ public class XmlStreamReader extends Reader { } final Matcher m = ENCODING_PATTERN.matcher(prolog); if (m.find()) { - encoding = m.group(1).toUpperCase(); + encoding = m.group(1).toUpperCase(Locale.ROOT); encoding = encoding.substring(1, encoding.length() - 1); } } http://git-wip-us.apache.org/repos/asf/commons-io/blob/d19259a7/src/test/java/org/apache/commons/io/output/XmlStreamWriterTest.java ---------------------------------------------------------------------- diff --git a/src/test/java/org/apache/commons/io/output/XmlStreamWriterTest.java b/src/test/java/org/apache/commons/io/output/XmlStreamWriterTest.java index 0ee2507..b8f578c 100644 --- a/src/test/java/org/apache/commons/io/output/XmlStreamWriterTest.java +++ b/src/test/java/org/apache/commons/io/output/XmlStreamWriterTest.java @@ -16,7 +16,6 @@ */ package org.apache.commons.io.output; -import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; import java.io.ByteArrayOutputStream; @@ -56,7 +55,7 @@ public class XmlStreamWriterTest { writer.write(xml); writer.close(); final byte[] xmlContent = out.toByteArray(); - assertEquals(encoding, writer.getEncoding()); + assertTrue(encoding.equalsIgnoreCase(writer.getEncoding())); assertTrue(Arrays.equals(xml.getBytes(encoding), xmlContent)); } @@ -102,6 +101,19 @@ public class XmlStreamWriterTest { checkXmlWriter(TEXT_UNICODE, null, "UTF-16BE"); checkXmlWriter(TEXT_UNICODE, null, "ISO-8859-1"); } + + /** + * Check lower case encoding names are properly handled. Should be successfull + * with any system default locale, notably with Turkish language + * (-Duser.language=tr JVM parameter), which has specific rules to convert + * dotted and dottless i character. + */ + @Test + public void testLowerCaseEncoding() throws IOException { + checkXmlWriter(TEXT_UNICODE, "utf-8"); + checkXmlWriter(TEXT_LATIN1, "iso-8859-1"); + checkXmlWriter(TEXT_LATIN7, "iso-8859-7"); + } @Test public void testUTF8Encoding() throws IOException {