Repository: commons-io
Updated Branches:
  refs/heads/master d3fc1a287 -> ed15b0998


IO-557: UnsupportedEncodingException when opening an ISO-8859-1 XML stream with 
Turkish as the default locale (closes #51)

Perform locale independent upper case conversions.

To handle properly lower cased character encoding name in XML prolog
with any default system locale, notably Turkish.


Project: http://git-wip-us.apache.org/repos/asf/commons-io/repo
Commit: http://git-wip-us.apache.org/repos/asf/commons-io/commit/d19259a7
Tree: http://git-wip-us.apache.org/repos/asf/commons-io/tree/d19259a7
Diff: http://git-wip-us.apache.org/repos/asf/commons-io/diff/d19259a7

Branch: refs/heads/master
Commit: d19259a7774e23ebdaac4af5ee4def8602fe2969
Parents: d3fc1a2
Author: luccioman <luccio...@users.noreply.github.com>
Authored: Mon Dec 18 09:37:06 2017 +0100
Committer: pascalschumacher <pascalschumac...@gmx.net>
Committed: Sun Jan 14 11:39:38 2018 +0100

----------------------------------------------------------------------
 .../org/apache/commons/io/ByteOrderMark.java     |  3 ++-
 .../apache/commons/io/input/XmlStreamReader.java |  4 ++--
 .../commons/io/output/XmlStreamWriter.java       |  3 ++-
 .../commons/io/input/XmlStreamReaderTest.java    | 19 +++++++++++++++++++
 .../io/input/compatibility/XmlStreamReader.java  |  5 +++--
 .../commons/io/output/XmlStreamWriterTest.java   | 16 ++++++++++++++--
 6 files changed, 42 insertions(+), 8 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/commons-io/blob/d19259a7/src/main/java/org/apache/commons/io/ByteOrderMark.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/io/ByteOrderMark.java 
b/src/main/java/org/apache/commons/io/ByteOrderMark.java
index 109c402..6840e7e 100644
--- a/src/main/java/org/apache/commons/io/ByteOrderMark.java
+++ b/src/main/java/org/apache/commons/io/ByteOrderMark.java
@@ -17,6 +17,7 @@
 package org.apache.commons.io;
 
 import java.io.Serializable;
+import java.util.Locale;
 
 /**
  * Byte Order Mark (BOM) representation - see {@link 
org.apache.commons.io.input.BOMInputStream}.
@@ -182,7 +183,7 @@ public class ByteOrderMark implements Serializable {
                 builder.append(",");
             }
             builder.append("0x");
-            builder.append(Integer.toHexString(0xFF & bytes[i]).toUpperCase());
+            builder.append(Integer.toHexString(0xFF & 
bytes[i]).toUpperCase(Locale.ROOT));
         }
         builder.append(']');
         return builder.toString();

http://git-wip-us.apache.org/repos/asf/commons-io/blob/d19259a7/src/main/java/org/apache/commons/io/input/XmlStreamReader.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/io/input/XmlStreamReader.java 
b/src/main/java/org/apache/commons/io/input/XmlStreamReader.java
index d1d24b3..f729b54 100644
--- a/src/main/java/org/apache/commons/io/input/XmlStreamReader.java
+++ b/src/main/java/org/apache/commons/io/input/XmlStreamReader.java
@@ -683,7 +683,7 @@ public class XmlStreamReader extends Reader {
                 final String postMime = httpContentType.substring(i + 1);
                 final Matcher m = CHARSET_PATTERN.matcher(postMime);
                 encoding = m.find() ? m.group(1) : null;
-                encoding = encoding != null ? encoding.toUpperCase(Locale.US) 
: null;
+                encoding = encoding != null ? 
encoding.toUpperCase(Locale.ROOT) : null;
             }
         }
         return encoding;
@@ -741,7 +741,7 @@ public class XmlStreamReader extends Reader {
                 }
                 final Matcher m = ENCODING_PATTERN.matcher(prolog);
                 if (m.find()) {
-                    encoding = m.group(1).toUpperCase();
+                    encoding = m.group(1).toUpperCase(Locale.ROOT);
                     encoding = encoding.substring(1, encoding.length() - 1);
                 }
             }

http://git-wip-us.apache.org/repos/asf/commons-io/blob/d19259a7/src/main/java/org/apache/commons/io/output/XmlStreamWriter.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/io/output/XmlStreamWriter.java 
b/src/main/java/org/apache/commons/io/output/XmlStreamWriter.java
index d352ad0..28fa6ac 100644
--- a/src/main/java/org/apache/commons/io/output/XmlStreamWriter.java
+++ b/src/main/java/org/apache/commons/io/output/XmlStreamWriter.java
@@ -24,6 +24,7 @@ import java.io.OutputStream;
 import java.io.OutputStreamWriter;
 import java.io.StringWriter;
 import java.io.Writer;
+import java.util.Locale;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
@@ -168,7 +169,7 @@ public class XmlStreamWriter extends Writer {
                     final Matcher m = 
ENCODING_PATTERN.matcher(xmlProlog.substring(0,
                             xmlPrologEnd));
                     if (m.find()) {
-                        encoding = m.group(1).toUpperCase();
+                        encoding = m.group(1).toUpperCase(Locale.ROOT);
                         encoding = encoding.substring(1, encoding.length() - 
1);
                     } else {
                         // no encoding found in XML prolog: using default

http://git-wip-us.apache.org/repos/asf/commons-io/blob/d19259a7/src/test/java/org/apache/commons/io/input/XmlStreamReaderTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/commons/io/input/XmlStreamReaderTest.java 
b/src/test/java/org/apache/commons/io/input/XmlStreamReaderTest.java
index 183e1a4..a968c56 100644
--- a/src/test/java/org/apache/commons/io/input/XmlStreamReaderTest.java
+++ b/src/test/java/org/apache/commons/io/input/XmlStreamReaderTest.java
@@ -276,6 +276,25 @@ public class XmlStreamReaderTest {
         _testHttpLenient("text/html;charset=UTF-16BE", "no-bom", "US-ASCII", 
"UTF-8", "UTF-8");
         _testHttpLenient("text/html;charset=UTF-32BE", "no-bom", "US-ASCII", 
"UTF-8", "UTF-8");
     }
+    
+    /**
+     * Check lower case encoding names are properly handled. Should be 
successfull
+     * with any system default locale, notably with Turkish language
+     * (-Duser.language=tr JVM parameter), which has specific rules to convert 
dotted and dottless
+     * i character.
+     */
+    @Test
+    public void testLowerCaseEncoding() throws Exception {
+        final String[] encodings = { "iso8859-1", "us-ascii", "utf-8" };
+        for (final String encoding : encodings) {
+            final String xml = getXML("no-bom", XML3, encoding, encoding);
+            try (final ByteArrayInputStream is = new 
ByteArrayInputStream(xml.getBytes(encoding));
+                    final XmlStreamReader xmlReader = new 
XmlStreamReader(is);) {
+                assertTrue("Check encoding : " + encoding, 
encoding.equalsIgnoreCase(xmlReader.getEncoding()));
+                assertEquals("Check content", xml, 
IOUtils.toString(xmlReader));
+            }
+        }
+    }
 
     @Test
     public void testRawContent() throws Exception {

http://git-wip-us.apache.org/repos/asf/commons-io/blob/d19259a7/src/test/java/org/apache/commons/io/input/compatibility/XmlStreamReader.java
----------------------------------------------------------------------
diff --git 
a/src/test/java/org/apache/commons/io/input/compatibility/XmlStreamReader.java 
b/src/test/java/org/apache/commons/io/input/compatibility/XmlStreamReader.java
index be5b121..5442315 100644
--- 
a/src/test/java/org/apache/commons/io/input/compatibility/XmlStreamReader.java
+++ 
b/src/test/java/org/apache/commons/io/input/compatibility/XmlStreamReader.java
@@ -29,6 +29,7 @@ import java.net.HttpURLConnection;
 import java.net.URL;
 import java.net.URLConnection;
 import java.text.MessageFormat;
+import java.util.Locale;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
@@ -590,7 +591,7 @@ public class XmlStreamReader extends Reader {
                 final String postMime = httpContentType.substring(i + 1);
                 final Matcher m = CHARSET_PATTERN.matcher(postMime);
                 encoding = m.find() ? m.group(1) : null;
-                encoding = encoding != null ? encoding.toUpperCase() : null;
+                encoding = encoding != null ? 
encoding.toUpperCase(Locale.ROOT) : null;
             }
         }
         return encoding;
@@ -699,7 +700,7 @@ public class XmlStreamReader extends Reader {
                 }
                 final Matcher m = ENCODING_PATTERN.matcher(prolog);
                 if (m.find()) {
-                    encoding = m.group(1).toUpperCase();
+                    encoding = m.group(1).toUpperCase(Locale.ROOT);
                     encoding = encoding.substring(1, encoding.length() - 1);
                 }
             }

http://git-wip-us.apache.org/repos/asf/commons-io/blob/d19259a7/src/test/java/org/apache/commons/io/output/XmlStreamWriterTest.java
----------------------------------------------------------------------
diff --git 
a/src/test/java/org/apache/commons/io/output/XmlStreamWriterTest.java 
b/src/test/java/org/apache/commons/io/output/XmlStreamWriterTest.java
index 0ee2507..b8f578c 100644
--- a/src/test/java/org/apache/commons/io/output/XmlStreamWriterTest.java
+++ b/src/test/java/org/apache/commons/io/output/XmlStreamWriterTest.java
@@ -16,7 +16,6 @@
  */
 package org.apache.commons.io.output;
 
-import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertTrue;
 
 import java.io.ByteArrayOutputStream;
@@ -56,7 +55,7 @@ public class XmlStreamWriterTest {
         writer.write(xml);
         writer.close();
         final byte[] xmlContent = out.toByteArray();
-        assertEquals(encoding, writer.getEncoding());
+        assertTrue(encoding.equalsIgnoreCase(writer.getEncoding()));
         assertTrue(Arrays.equals(xml.getBytes(encoding), xmlContent));
 
     }
@@ -102,6 +101,19 @@ public class XmlStreamWriterTest {
         checkXmlWriter(TEXT_UNICODE, null, "UTF-16BE");
         checkXmlWriter(TEXT_UNICODE, null, "ISO-8859-1");
     }
+    
+    /**
+     * Check lower case encoding names are properly handled. Should be 
successfull
+     * with any system default locale, notably with Turkish language
+     * (-Duser.language=tr JVM parameter), which has specific rules to convert
+     * dotted and dottless i character.
+     */
+    @Test
+    public void testLowerCaseEncoding() throws IOException {
+        checkXmlWriter(TEXT_UNICODE, "utf-8");
+        checkXmlWriter(TEXT_LATIN1, "iso-8859-1");
+        checkXmlWriter(TEXT_LATIN7, "iso-8859-7");
+    }
 
     @Test
     public void testUTF8Encoding() throws IOException {

Reply via email to