This is an automated email from the ASF dual-hosted git repository. acosentino pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/camel.git
commit 4344ab66b3d3256db3f097b7b553135592db4ea5 Author: Tadayoshi Sato <sato.tadayo...@gmail.com> AuthorDate: Tue May 14 20:42:20 2019 +0900 CAMEL-13136: File consumer with charset doesn't parse XML --- .../camel/component/file/GenericFileConverter.java | 115 +-------------------- .../org/apache/camel/converter/IOConverter.java | 23 +---- .../component/file/FileSplitXPathCharsetTest.java | 84 +++++++++++++++ .../IOHelperCharsetTest.java} | 18 ++-- .../file/FileSplitXPathCharsetTest-input.xml | 6 ++ .../main/java/org/apache/camel/util/IOHelper.java | 19 ++++ 6 files changed, 124 insertions(+), 141 deletions(-) diff --git a/components/camel-file/src/main/java/org/apache/camel/component/file/GenericFileConverter.java b/components/camel-file/src/main/java/org/apache/camel/component/file/GenericFileConverter.java index b9cd8b8..706aaab 100644 --- a/components/camel-file/src/main/java/org/apache/camel/component/file/GenericFileConverter.java +++ b/components/camel-file/src/main/java/org/apache/camel/component/file/GenericFileConverter.java @@ -125,7 +125,7 @@ public final class GenericFileConverter { } else { LOG.debug("Read file {} (no charset)", f); } - return toInputStream(f, charset); + return IOHelper.toInputStream(f, charset); } } if (exchange != null) { @@ -143,7 +143,7 @@ public final class GenericFileConverter { // use reader first as it supports the file charset BufferedReader reader = genericFileToReader(file, exchange); if (reader != null) { - return toString(reader); + return IOHelper.toString(reader); } if (exchange != null) { // otherwise ensure the body is loaded as we want the content of the body @@ -184,120 +184,13 @@ public final class GenericFileConverter { String charset = file.getCharset(); if (charset != null) { LOG.debug("Read file {} with charset {}", f, file.getCharset()); - return toReader(f, charset); + return IOHelper.toReader(f, charset); } else { LOG.debug("Read file {} (no charset)", f); - return toReader(f, ExchangeHelper.getCharsetName(exchange)); + return IOHelper.toReader(f, ExchangeHelper.getCharsetName(exchange)); } } return null; } - private static BufferedReader toReader(File file, String charset) throws IOException { - FileInputStream in = new FileInputStream(file); - return IOHelper.buffered(new EncodingFileReader(in, charset)); - } - - private static InputStream toInputStream(File file, String charset) throws IOException { - if (charset != null) { - return new EncodingInputStream(file, charset); - } else { - return toInputStream(file); - } - } - - private static InputStream toInputStream(File file) throws IOException { - return IOHelper.buffered(new FileInputStream(file)); - } - - private static String toString(BufferedReader reader) throws IOException { - StringBuilder sb = new StringBuilder(1024); - char[] buf = new char[1024]; - try { - int len; - // read until we reach then end which is the -1 marker - while ((len = reader.read(buf)) != -1) { - sb.append(buf, 0, len); - } - } finally { - IOHelper.close(reader, "reader", LOG); - } - - return sb.toString(); - } - - /** - * Encoding-aware file reader. - */ - private static class EncodingFileReader extends InputStreamReader { - - private final FileInputStream in; - - /** - * @param in file to read - * @param charset character set to use - */ - EncodingFileReader(FileInputStream in, String charset) - throws FileNotFoundException, UnsupportedEncodingException { - super(in, charset); - this.in = in; - } - - @Override - public void close() throws IOException { - try { - super.close(); - } finally { - in.close(); - } - } - } - - /** - * Encoding-aware input stream. - */ - public static class EncodingInputStream extends InputStream { - - private final File file; - private final BufferedReader reader; - private final Charset defaultStreamCharset; - - private ByteBuffer bufferBytes; - private CharBuffer bufferedChars = CharBuffer.allocate(4096); - - public EncodingInputStream(File file, String charset) throws IOException { - this.file = file; - reader = toReader(file, charset); - defaultStreamCharset = defaultCharset.get(); - } - - @Override - public int read() throws IOException { - if (bufferBytes == null || bufferBytes.remaining() <= 0) { - bufferedChars.clear(); - int len = reader.read(bufferedChars); - bufferedChars.flip(); - if (len == -1) { - return -1; - } - bufferBytes = defaultStreamCharset.encode(bufferedChars); - } - return bufferBytes.get(); - } - - @Override - public void close() throws IOException { - reader.close(); - } - - @Override - public void reset() throws IOException { - reader.reset(); - } - - public InputStream toOriginalInputStream() throws FileNotFoundException { - return new FileInputStream(file); - } - } - } diff --git a/core/camel-base/src/main/java/org/apache/camel/converter/IOConverter.java b/core/camel-base/src/main/java/org/apache/camel/converter/IOConverter.java index 7bd8590..8f99cbe 100644 --- a/core/camel-base/src/main/java/org/apache/camel/converter/IOConverter.java +++ b/core/camel-base/src/main/java/org/apache/camel/converter/IOConverter.java @@ -73,21 +73,6 @@ public final class IOConverter { return IOHelper.buffered(new FileInputStream(file)); } - /** - * Converts the given {@link File} with the given charset to {@link InputStream} with the JVM default charset - * - * @param file the file to be converted - * @param charset the charset the file is read with - * @return the input stream with the JVM default charset - */ - public static InputStream toInputStream(File file, String charset) throws IOException { - if (charset != null) { - return new IOHelper.EncodingInputStream(file, charset); - } else { - return toInputStream(file); - } - } - @Converter public static BufferedReader toReader(File file, Exchange exchange) throws IOException { return IOHelper.toReader(file, ExchangeHelper.getCharsetName(exchange)); @@ -106,15 +91,11 @@ public final class IOConverter { @Converter public static BufferedWriter toWriter(File file, Exchange exchange) throws IOException { FileOutputStream os = new FileOutputStream(file, false); - return toWriter(os, ExchangeHelper.getCharsetName(exchange)); + return IOHelper.toWriter(os, ExchangeHelper.getCharsetName(exchange)); } public static BufferedWriter toWriter(File file, boolean append, String charset) throws IOException { - return toWriter(new FileOutputStream(file, append), charset); - } - - public static BufferedWriter toWriter(FileOutputStream os, String charset) throws IOException { - return IOHelper.buffered(new IOHelper.EncodingFileWriter(os, charset)); + return IOHelper.toWriter(new FileOutputStream(file, append), charset); } @Converter diff --git a/core/camel-core/src/test/java/org/apache/camel/component/file/FileSplitXPathCharsetTest.java b/core/camel-core/src/test/java/org/apache/camel/component/file/FileSplitXPathCharsetTest.java new file mode 100644 index 0000000..e61fe43 --- /dev/null +++ b/core/camel-core/src/test/java/org/apache/camel/component/file/FileSplitXPathCharsetTest.java @@ -0,0 +1,84 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.camel.component.file; + +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; + +import org.apache.camel.ContextTestSupport; +import org.apache.camel.builder.RouteBuilder; +import org.apache.camel.component.mock.MockEndpoint; +import org.junit.BeforeClass; +import org.junit.Test; + +public class FileSplitXPathCharsetTest extends ContextTestSupport { + + private static final String TEST_DIR = "target/data/file-split-xpath-charset"; + + private static Path inputCsv = Paths.get(TEST_DIR, "input.csv"); + private static Path inputXml = Paths.get(TEST_DIR, "input.xml"); + + @BeforeClass + public static void clearInputFiles() throws IOException { + deleteDirectory(TEST_DIR); + } + + @Test + public void testCsv() throws Exception { + MockEndpoint out = getMockEndpoint("mock:result"); + out.expectedMessageCount(3); + out.expectedBodiesReceived("abc", "xyz", "åäö"); + + Files.write(inputCsv, "abc,xyz,åäö".getBytes(StandardCharsets.ISO_8859_1)); + + out.assertIsSatisfied(); + } + + @Test + public void testXml() throws Exception { + MockEndpoint out = getMockEndpoint("mock:result"); + out.expectedMessageCount(3); + out.expectedBodiesReceived("abc", "xyz", "åäö"); + + Files.copy( + getClass().getResourceAsStream("FileSplitXPathCharsetTest-input.xml"), + inputXml); + + out.assertIsSatisfied(); + } + + @Override + protected RouteBuilder createRouteBuilder() { + return new RouteBuilder() { + @Override + public void configure() { + // input: *.csv + fromF("file:%s?charset=ISO-8859-1&include=.*\\.csv", TEST_DIR) + .split().tokenize(",") + .to("mock:result"); + + // input: *.xml + fromF("file:%s?charset=ISO-8859-1&include=.*\\.xml", TEST_DIR) + .split().xpath("/foo/bar/text()") + .to("mock:result"); + } + }; + } +} diff --git a/core/camel-core/src/test/java/org/apache/camel/converter/IOConverterCharsetTest.java b/core/camel-core/src/test/java/org/apache/camel/util/IOHelperCharsetTest.java similarity index 91% rename from core/camel-core/src/test/java/org/apache/camel/converter/IOConverterCharsetTest.java rename to core/camel-core/src/test/java/org/apache/camel/util/IOHelperCharsetTest.java index defdc00..e5b2958 100644 --- a/core/camel-core/src/test/java/org/apache/camel/converter/IOConverterCharsetTest.java +++ b/core/camel-core/src/test/java/org/apache/camel/util/IOHelperCharsetTest.java @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.camel.converter; +package org.apache.camel.util; import java.io.BufferedReader; import java.io.File; @@ -27,17 +27,17 @@ import java.nio.file.Paths; import java.util.Arrays; import org.apache.camel.ContextTestSupport; -import org.apache.camel.util.IOHelper; import org.junit.Test; -public class IOConverterCharsetTest extends ContextTestSupport { +public class IOHelperCharsetTest extends ContextTestSupport { + private static final String CONTENT = "G\u00f6tzend\u00e4mmerung,Joseph und seine Br\u00fcder"; @Test public void testToInputStreamFileWithCharsetUTF8() throws Exception { switchToDefaultCharset(StandardCharsets.UTF_8); File file = new File("src/test/resources/org/apache/camel/converter/german.utf-8.txt"); - try (InputStream in = IOConverter.toInputStream(file, "UTF-8"); + try (InputStream in = IOHelper.toInputStream(file, "UTF-8"); BufferedReader reader = new BufferedReader(new InputStreamReader(in, StandardCharsets.UTF_8)); BufferedReader naiveReader = new BufferedReader(new InputStreamReader(Files.newInputStream(Paths.get(file.getAbsolutePath())), StandardCharsets.UTF_8))) { String line = reader.readLine(); @@ -51,7 +51,7 @@ public class IOConverterCharsetTest extends ContextTestSupport { public void testToInputStreamFileWithCharsetUTF8withOtherDefaultEncoding() throws Exception { switchToDefaultCharset(StandardCharsets.ISO_8859_1); File file = new File("src/test/resources/org/apache/camel/converter/german.utf-8.txt"); - try (InputStream in = IOConverter.toInputStream(file, "UTF-8"); + try (InputStream in = IOHelper.toInputStream(file, "UTF-8"); BufferedReader reader = new BufferedReader(new InputStreamReader(in, StandardCharsets.ISO_8859_1)); BufferedReader naiveReader = new BufferedReader(new InputStreamReader(Files.newInputStream(Paths.get(file.getAbsolutePath())), StandardCharsets.UTF_8))) { String line = reader.readLine(); @@ -65,9 +65,9 @@ public class IOConverterCharsetTest extends ContextTestSupport { public void testToInputStreamFileWithCharsetLatin1() throws Exception { switchToDefaultCharset(StandardCharsets.UTF_8); File file = new File("src/test/resources/org/apache/camel/converter/german.iso-8859-1.txt"); - try (InputStream in = IOConverter.toInputStream(file, "ISO-8859-1"); + try (InputStream in = IOHelper.toInputStream(file, "ISO-8859-1"); BufferedReader reader = new BufferedReader(new InputStreamReader(in, StandardCharsets.UTF_8)); - BufferedReader naiveReader = new BufferedReader(new InputStreamReader(Files.newInputStream(Paths.get(file.getAbsolutePath())), "ISO-8859-1"))) { + BufferedReader naiveReader = new BufferedReader(new InputStreamReader(Files.newInputStream(Paths.get(file.getAbsolutePath())), StandardCharsets.ISO_8859_1))) { String line = reader.readLine(); String naiveLine = naiveReader.readLine(); assertEquals(naiveLine, line); @@ -79,7 +79,7 @@ public class IOConverterCharsetTest extends ContextTestSupport { public void testToInputStreamFileDirectByteDumpWithCharsetLatin1() throws Exception { switchToDefaultCharset(StandardCharsets.UTF_8); File file = new File("src/test/resources/org/apache/camel/converter/german.iso-8859-1.txt"); - try (InputStream in = IOConverter.toInputStream(file, "ISO-8859-1"); + try (InputStream in = IOHelper.toInputStream(file, "ISO-8859-1"); InputStream naiveIn = Files.newInputStream(Paths.get(file.getAbsolutePath()))) { byte[] bytes = new byte[8192]; in.read(bytes); @@ -105,7 +105,7 @@ public class IOConverterCharsetTest extends ContextTestSupport { public void testToReaderFileWithCharsetLatin1() throws Exception { File file = new File("src/test/resources/org/apache/camel/converter/german.iso-8859-1.txt"); try (BufferedReader reader = IOHelper.toReader(file, "ISO-8859-1"); - BufferedReader naiveReader = new BufferedReader(new InputStreamReader(Files.newInputStream(Paths.get(file.getAbsolutePath())), "ISO-8859-1"))) { + BufferedReader naiveReader = new BufferedReader(new InputStreamReader(Files.newInputStream(Paths.get(file.getAbsolutePath())), StandardCharsets.ISO_8859_1))) { String line = reader.readLine(); String naiveLine = naiveReader.readLine(); assertEquals(naiveLine, line); diff --git a/core/camel-core/src/test/resources/org/apache/camel/component/file/FileSplitXPathCharsetTest-input.xml b/core/camel-core/src/test/resources/org/apache/camel/component/file/FileSplitXPathCharsetTest-input.xml new file mode 100644 index 0000000..5ab8ecc --- /dev/null +++ b/core/camel-core/src/test/resources/org/apache/camel/component/file/FileSplitXPathCharsetTest-input.xml @@ -0,0 +1,6 @@ +<?xml version="1.0" encoding="ISO-8859-1"?> +<foo> + <bar>abc</bar> + <bar>xyz</bar> + <bar>���</bar> +</foo> diff --git a/core/camel-util/src/main/java/org/apache/camel/util/IOHelper.java b/core/camel-util/src/main/java/org/apache/camel/util/IOHelper.java index 0f883e8..ee52b06 100644 --- a/core/camel-util/src/main/java/org/apache/camel/util/IOHelper.java +++ b/core/camel-util/src/main/java/org/apache/camel/util/IOHelper.java @@ -617,8 +617,27 @@ public final class IOHelper { } } + /** + * Converts the given {@link File} with the given charset to {@link InputStream} with the JVM default charset + * + * @param file the file to be converted + * @param charset the charset the file is read with + * @return the input stream with the JVM default charset + */ + public static InputStream toInputStream(File file, String charset) throws IOException { + if (charset != null) { + return new EncodingInputStream(file, charset); + } else { + return buffered(new FileInputStream(file)); + } + } + public static BufferedReader toReader(File file, String charset) throws IOException { FileInputStream in = new FileInputStream(file); return IOHelper.buffered(new EncodingFileReader(in, charset)); } + + public static BufferedWriter toWriter(FileOutputStream os, String charset) throws IOException { + return IOHelper.buffered(new EncodingFileWriter(os, charset)); + } }