This is an automated email from the ASF dual-hosted git repository. davsclaus pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/camel.git
The following commit(s) were added to refs/heads/master by this push: new 8fa8bc9 CAMEL-12769: Combination of File consumer with charset and Split DSL with XPath doesn't parse XML correctly (#2505) 8fa8bc9 is described below commit 8fa8bc992a2d10a2efba7428da87cd79b7e08cd8 Author: Tadayoshi Sato <sato.tadayo...@gmail.com> AuthorDate: Tue Sep 4 23:36:31 2018 +0900 CAMEL-12769: Combination of File consumer with charset and Split DSL with XPath doesn't parse XML correctly (#2505) --- .../org/apache/camel/converter/IOConverter.java | 87 ++++++++++++++-------- .../apache/camel/converter/jaxp/XmlConverter.java | 12 ++- .../camel/converter/IOConverterCharsetTest.java | 18 ++--- 3 files changed, 75 insertions(+), 42 deletions(-) diff --git a/camel-core/src/main/java/org/apache/camel/converter/IOConverter.java b/camel-core/src/main/java/org/apache/camel/converter/IOConverter.java index 073547e..ae02a2c 100644 --- a/camel-core/src/main/java/org/apache/camel/converter/IOConverter.java +++ b/camel-core/src/main/java/org/apache/camel/converter/IOConverter.java @@ -81,40 +81,18 @@ public final class IOConverter { return IOHelper.buffered(new FileInputStream(file)); } + /** + * Converts the given {@link File} with the given charset to {@link InputStream} with the JVM default charset + * + * @param file the file to be converted + * @param charset the charset the file is read with + * @return the input stream with the JVM default charset + */ public static InputStream toInputStream(File file, String charset) throws IOException { if (charset != null) { - final BufferedReader reader = toReader(file, charset); - final Charset defaultStreamCharset = defaultCharset.get(); - return new InputStream() { - private ByteBuffer bufferBytes; - private CharBuffer bufferedChars = CharBuffer.allocate(4096); - - @Override - public int read() throws IOException { - if (bufferBytes == null || bufferBytes.remaining() <= 0) { - bufferedChars.clear(); - int len = reader.read(bufferedChars); - bufferedChars.flip(); - if (len == -1) { - return -1; - } - bufferBytes = defaultStreamCharset.encode(bufferedChars); - } - return bufferBytes.get(); - } - - @Override - public void close() throws IOException { - reader.close(); - } - - @Override - public void reset() throws IOException { - reader.reset(); - } - }; + return new EncodingInputStream(file, charset); } else { - return IOHelper.buffered(new FileInputStream(file)); + return toInputStream(file); } } @@ -501,6 +479,53 @@ public final class IOConverter { } /** + * Encoding-aware input stream. + */ + public static class EncodingInputStream extends InputStream { + + private final File file; + private final BufferedReader reader; + private final Charset defaultStreamCharset; + + private ByteBuffer bufferBytes; + private CharBuffer bufferedChars = CharBuffer.allocate(4096); + + public EncodingInputStream(File file, String charset) throws IOException { + this.file = file; + reader = toReader(file, charset); + defaultStreamCharset = defaultCharset.get(); + } + + @Override + public int read() throws IOException { + if (bufferBytes == null || bufferBytes.remaining() <= 0) { + bufferedChars.clear(); + int len = reader.read(bufferedChars); + bufferedChars.flip(); + if (len == -1) { + return -1; + } + bufferBytes = defaultStreamCharset.encode(bufferedChars); + } + return bufferBytes.get(); + } + + @Override + public void close() throws IOException { + reader.close(); + } + + @Override + public void reset() throws IOException { + reader.reset(); + } + + public InputStream toOriginalInputStream() throws FileNotFoundException { + return new FileInputStream(file); + } + } + + /** * Encoding-aware file reader. */ private static class EncodingFileReader extends InputStreamReader { diff --git a/camel-core/src/main/java/org/apache/camel/converter/jaxp/XmlConverter.java b/camel-core/src/main/java/org/apache/camel/converter/jaxp/XmlConverter.java index f8a8766..6d7c063 100644 --- a/camel-core/src/main/java/org/apache/camel/converter/jaxp/XmlConverter.java +++ b/camel-core/src/main/java/org/apache/camel/converter/jaxp/XmlConverter.java @@ -54,7 +54,6 @@ import javax.xml.transform.stax.StAXSource; import javax.xml.transform.stream.StreamResult; import javax.xml.transform.stream.StreamSource; -import org.apache.camel.util.StringHelper; import org.w3c.dom.Document; import org.w3c.dom.Element; import org.w3c.dom.Node; @@ -70,8 +69,10 @@ import org.apache.camel.BytesSource; import org.apache.camel.Converter; import org.apache.camel.Exchange; import org.apache.camel.StringSource; +import org.apache.camel.converter.IOConverter; import org.apache.camel.util.IOHelper; import org.apache.camel.util.ObjectHelper; +import org.apache.camel.util.StringHelper; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -870,7 +871,14 @@ public class XmlConverter { @Converter public Document toDOMDocument(InputStream in, Exchange exchange) throws IOException, SAXException, ParserConfigurationException { DocumentBuilder documentBuilder = createDocumentBuilder(getDocumentBuilderFactory(exchange)); - return documentBuilder.parse(in); + if (in instanceof IOConverter.EncodingInputStream) { + // DocumentBuilder detects encoding from XML declaration, so we need to + // revert the converted encoding for the input stream + IOConverter.EncodingInputStream encIn = (IOConverter.EncodingInputStream) in; + return documentBuilder.parse(encIn.toOriginalInputStream()); + } else { + return documentBuilder.parse(in); + } } /** diff --git a/camel-core/src/test/java/org/apache/camel/converter/IOConverterCharsetTest.java b/camel-core/src/test/java/org/apache/camel/converter/IOConverterCharsetTest.java index 9d82ade..c192349 100644 --- a/camel-core/src/test/java/org/apache/camel/converter/IOConverterCharsetTest.java +++ b/camel-core/src/test/java/org/apache/camel/converter/IOConverterCharsetTest.java @@ -38,8 +38,8 @@ public class IOConverterCharsetTest extends ContextTestSupport { switchToDefaultCharset(StandardCharsets.UTF_8); File file = new File("src/test/resources/org/apache/camel/converter/german.utf-8.txt"); try (InputStream in = IOConverter.toInputStream(file, "UTF-8"); - BufferedReader reader = new BufferedReader(new InputStreamReader(in, StandardCharsets.UTF_8)); - BufferedReader naiveReader = new BufferedReader(new InputStreamReader(Files.newInputStream(Paths.get(file.getAbsolutePath())), StandardCharsets.UTF_8))) { + BufferedReader reader = new BufferedReader(new InputStreamReader(in, StandardCharsets.UTF_8)); + BufferedReader naiveReader = new BufferedReader(new InputStreamReader(Files.newInputStream(Paths.get(file.getAbsolutePath())), StandardCharsets.UTF_8))) { String line = reader.readLine(); String naiveLine = naiveReader.readLine(); assertEquals(naiveLine, line); @@ -52,8 +52,8 @@ public class IOConverterCharsetTest extends ContextTestSupport { switchToDefaultCharset(StandardCharsets.ISO_8859_1); File file = new File("src/test/resources/org/apache/camel/converter/german.utf-8.txt"); try (InputStream in = IOConverter.toInputStream(file, "UTF-8"); - BufferedReader reader = new BufferedReader(new InputStreamReader(in, StandardCharsets.ISO_8859_1)); - BufferedReader naiveReader = new BufferedReader(new InputStreamReader(Files.newInputStream(Paths.get(file.getAbsolutePath())), StandardCharsets.UTF_8))) { + BufferedReader reader = new BufferedReader(new InputStreamReader(in, StandardCharsets.ISO_8859_1)); + BufferedReader naiveReader = new BufferedReader(new InputStreamReader(Files.newInputStream(Paths.get(file.getAbsolutePath())), StandardCharsets.UTF_8))) { String line = reader.readLine(); String naiveLine = naiveReader.readLine(); assertEquals(naiveLine, line); @@ -66,8 +66,8 @@ public class IOConverterCharsetTest extends ContextTestSupport { switchToDefaultCharset(StandardCharsets.UTF_8); File file = new File("src/test/resources/org/apache/camel/converter/german.iso-8859-1.txt"); try (InputStream in = IOConverter.toInputStream(file, "ISO-8859-1"); - BufferedReader reader = new BufferedReader(new InputStreamReader(in, StandardCharsets.UTF_8)); - BufferedReader naiveReader = new BufferedReader(new InputStreamReader(Files.newInputStream(Paths.get(file.getAbsolutePath())), "ISO-8859-1"))) { + BufferedReader reader = new BufferedReader(new InputStreamReader(in, StandardCharsets.UTF_8)); + BufferedReader naiveReader = new BufferedReader(new InputStreamReader(Files.newInputStream(Paths.get(file.getAbsolutePath())), "ISO-8859-1"))) { String line = reader.readLine(); String naiveLine = naiveReader.readLine(); assertEquals(naiveLine, line); @@ -80,7 +80,7 @@ public class IOConverterCharsetTest extends ContextTestSupport { switchToDefaultCharset(StandardCharsets.UTF_8); File file = new File("src/test/resources/org/apache/camel/converter/german.iso-8859-1.txt"); try (InputStream in = IOConverter.toInputStream(file, "ISO-8859-1"); - InputStream naiveIn = Files.newInputStream(Paths.get(file.getAbsolutePath()))) { + InputStream naiveIn = Files.newInputStream(Paths.get(file.getAbsolutePath()))) { byte[] bytes = new byte[8192]; in.read(bytes); byte[] naiveBytes = new byte[8192]; @@ -93,7 +93,7 @@ public class IOConverterCharsetTest extends ContextTestSupport { public void testToReaderFileWithCharsetUTF8() throws Exception { File file = new File("src/test/resources/org/apache/camel/converter/german.utf-8.txt"); try (BufferedReader reader = IOConverter.toReader(file, "UTF-8"); - BufferedReader naiveReader = new BufferedReader(new InputStreamReader(Files.newInputStream(Paths.get(file.getAbsolutePath())), StandardCharsets.UTF_8))) { + BufferedReader naiveReader = new BufferedReader(new InputStreamReader(Files.newInputStream(Paths.get(file.getAbsolutePath())), StandardCharsets.UTF_8))) { String line = reader.readLine(); String naiveLine = naiveReader.readLine(); assertEquals(naiveLine, line); @@ -105,7 +105,7 @@ public class IOConverterCharsetTest extends ContextTestSupport { public void testToReaderFileWithCharsetLatin1() throws Exception { File file = new File("src/test/resources/org/apache/camel/converter/german.iso-8859-1.txt"); try (BufferedReader reader = IOConverter.toReader(file, "ISO-8859-1"); - BufferedReader naiveReader = new BufferedReader(new InputStreamReader(Files.newInputStream(Paths.get(file.getAbsolutePath())), "ISO-8859-1"))) { + BufferedReader naiveReader = new BufferedReader(new InputStreamReader(Files.newInputStream(Paths.get(file.getAbsolutePath())), "ISO-8859-1"))) { String line = reader.readLine(); String naiveLine = naiveReader.readLine(); assertEquals(naiveLine, line);