Repository: camel Updated Branches: refs/heads/master ef0433fb4 -> e2ca351c9
CAMEL-8356 IOConverter.toInputStream(file, charset) returns strange behaving stream Project: http://git-wip-us.apache.org/repos/asf/camel/repo Commit: http://git-wip-us.apache.org/repos/asf/camel/commit/7f7513b9 Tree: http://git-wip-us.apache.org/repos/asf/camel/tree/7f7513b9 Diff: http://git-wip-us.apache.org/repos/asf/camel/diff/7f7513b9 Branch: refs/heads/master Commit: 7f7513b90a237e66b3a96e763097e6a99fe243af Parents: ef0433f Author: Stefan Mandel <mande...@gmail.com> Authored: Sat Feb 14 18:31:37 2015 +0100 Committer: Willem Jiang <willem.ji...@gmail.com> Committed: Wed Feb 25 10:58:18 2015 +0800 ---------------------------------------------------------------------- .../org/apache/camel/converter/IOConverter.java | 20 ++++- .../camel/converter/IOConverterCharsetTest.java | 77 ++++++++++++++++++++ .../camel/converter/german.iso-8859-1.txt | 1 + .../org/apache/camel/converter/german.utf-8.txt | 1 + 4 files changed, 97 insertions(+), 2 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/camel/blob/7f7513b9/camel-core/src/main/java/org/apache/camel/converter/IOConverter.java ---------------------------------------------------------------------- diff --git a/camel-core/src/main/java/org/apache/camel/converter/IOConverter.java b/camel-core/src/main/java/org/apache/camel/converter/IOConverter.java index 05c49b4..835512d 100644 --- a/camel-core/src/main/java/org/apache/camel/converter/IOConverter.java +++ b/camel-core/src/main/java/org/apache/camel/converter/IOConverter.java @@ -39,6 +39,9 @@ import java.io.StringReader; import java.io.UnsupportedEncodingException; import java.io.Writer; import java.net.URL; +import java.nio.ByteBuffer; +import java.nio.CharBuffer; +import java.nio.charset.Charset; import java.nio.charset.UnsupportedCharsetException; import java.util.Properties; @@ -77,11 +80,24 @@ public final class IOConverter { public static InputStream toInputStream(File file, String charset) throws IOException { if (charset != null) { - final BufferedReader reader = toReader(file, charset); + final BufferedReader reader = toReader(file, charset); + final Charset defaultStreamCharset = Charset.forName("UTF-8"); return new InputStream() { + private ByteBuffer bufferBytes; + private CharBuffer bufferedChars = CharBuffer.allocate(4096); + @Override public int read() throws IOException { - return reader.read(); + if (bufferBytes == null || bufferBytes.remaining() <= 0) { + bufferedChars.clear(); + int len = reader.read(bufferedChars); + bufferedChars.flip(); + if (len == -1) { + return -1; + } + bufferBytes = defaultStreamCharset.encode(bufferedChars); + } + return bufferBytes.get(); } @Override http://git-wip-us.apache.org/repos/asf/camel/blob/7f7513b9/camel-core/src/test/java/org/apache/camel/converter/IOConverterCharsetTest.java ---------------------------------------------------------------------- diff --git a/camel-core/src/test/java/org/apache/camel/converter/IOConverterCharsetTest.java b/camel-core/src/test/java/org/apache/camel/converter/IOConverterCharsetTest.java new file mode 100644 index 0000000..672e84a --- /dev/null +++ b/camel-core/src/test/java/org/apache/camel/converter/IOConverterCharsetTest.java @@ -0,0 +1,77 @@ +package org.apache.camel.converter; + +import java.io.BufferedReader; +import java.io.File; +import java.io.FileInputStream; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.util.Arrays; + +import org.apache.camel.ContextTestSupport; + +public class IOConverterCharsetTest extends ContextTestSupport { + + public void testToInputStreamFileWithCharsetUTF8() throws Exception { + File file = new File("src/test/resources/org/apache/camel/converter/german.utf-8.txt"); + InputStream in = IOConverter.toInputStream(file, "UTF-8"); + BufferedReader reader = new BufferedReader(new InputStreamReader(in)); + BufferedReader naiveReader = new BufferedReader(new InputStreamReader(new FileInputStream(file), "UTF-8")); + String line = reader.readLine(); + String naiveLine = naiveReader.readLine(); + assertEquals(naiveLine, line); + assertEquals("Götzendämmerung,Joseph und seine Brüder", line); + reader.close(); + naiveReader.close(); + } + + public void testToInputStreamFileWithCharsetLatin1() throws Exception { + File file = new File("src/test/resources/org/apache/camel/converter/german.iso-8859-1.txt"); + InputStream in = IOConverter.toInputStream(file, "ISO-8859-1"); + BufferedReader reader = new BufferedReader(new InputStreamReader(in)); + BufferedReader naiveReader = new BufferedReader(new InputStreamReader(new FileInputStream(file), "ISO-8859-1")); + String line = reader.readLine(); + String naiveLine = naiveReader.readLine(); + assertEquals(naiveLine, line); + assertEquals("Götzendämmerung,Joseph und seine Brüder", line); + reader.close(); + naiveReader.close(); + } + + public void testToInputStreamFileDirectByteDumpWithCharsetLatin1() throws Exception { + File file = new File("src/test/resources/org/apache/camel/converter/german.iso-8859-1.txt"); + InputStream in = IOConverter.toInputStream(file, "ISO-8859-1"); + InputStream naiveIn = new FileInputStream(file); + byte[] bytes = new byte[8192]; + in.read(bytes); + byte[] naiveBytes = new byte[8192]; + naiveIn.read(naiveBytes); + assertFalse("both input streams deliver the same byte sequence", Arrays.equals(naiveBytes, bytes)); + in.close(); + naiveIn.close(); + } + + public void testToReaderFileWithCharsetUTF8() throws Exception { + File file = new File("src/test/resources/org/apache/camel/converter/german.utf-8.txt"); + BufferedReader reader = IOConverter.toReader(file, "UTF-8"); + BufferedReader naiveReader = new BufferedReader(new InputStreamReader(new FileInputStream(file), "UTF-8")); + String line = reader.readLine(); + String naiveLine = naiveReader.readLine(); + assertEquals(naiveLine, line); + assertEquals("Götzendämmerung,Joseph und seine Brüder", line); + reader.close(); + naiveReader.close(); + } + + public void testToReaderFileWithCharsetLatin1() throws Exception { + File file = new File("src/test/resources/org/apache/camel/converter/german.iso-8859-1.txt"); + BufferedReader reader = IOConverter.toReader(file, "ISO-8859-1"); + BufferedReader naiveReader = new BufferedReader(new InputStreamReader(new FileInputStream(file), "ISO-8859-1")); + String line = reader.readLine(); + String naiveLine = naiveReader.readLine(); + assertEquals(naiveLine, line); + assertEquals("Götzendämmerung,Joseph und seine Brüder", line); + reader.close(); + naiveReader.close(); + } + +} http://git-wip-us.apache.org/repos/asf/camel/blob/7f7513b9/camel-core/src/test/resources/org/apache/camel/converter/german.iso-8859-1.txt ---------------------------------------------------------------------- diff --git a/camel-core/src/test/resources/org/apache/camel/converter/german.iso-8859-1.txt b/camel-core/src/test/resources/org/apache/camel/converter/german.iso-8859-1.txt new file mode 100644 index 0000000..5b57ac0 --- /dev/null +++ b/camel-core/src/test/resources/org/apache/camel/converter/german.iso-8859-1.txt @@ -0,0 +1 @@ +G�tzend�mmerung,Joseph und seine Br�der \ No newline at end of file http://git-wip-us.apache.org/repos/asf/camel/blob/7f7513b9/camel-core/src/test/resources/org/apache/camel/converter/german.utf-8.txt ---------------------------------------------------------------------- diff --git a/camel-core/src/test/resources/org/apache/camel/converter/german.utf-8.txt b/camel-core/src/test/resources/org/apache/camel/converter/german.utf-8.txt new file mode 100644 index 0000000..6c603fd --- /dev/null +++ b/camel-core/src/test/resources/org/apache/camel/converter/german.utf-8.txt @@ -0,0 +1 @@ +Götzendämmerung,Joseph und seine Brüder \ No newline at end of file