This is an automated email from the ASF dual-hosted git repository. davsclaus pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/camel.git
The following commit(s) were added to refs/heads/main by this push: new 0b7a5a6a2ba CAMEL-19143: Added option to quote fields only when necessary (#9552) 0b7a5a6a2ba is described below commit 0b7a5a6a2ba62d135aa688f61b976f5983a211d6 Author: Brice Frisco <39070938+bricefri...@users.noreply.github.com> AuthorDate: Thu Mar 16 01:55:27 2023 -0500 CAMEL-19143: Added option to quote fields only when necessary (#9552) * CAMEL-19143: Added option to quote fields only when necessary * CAMEL-19143: Formatted file to comply with coding style * CAMEL-19143: Formatted file --- .../camel/dataformat/bindy/BindyCsvFactory.java | 58 ++-- .../dataformat/bindy/annotation/CsvRecord.java | 6 + .../BindySimpleCsvQuotingOnlyWhenNeededTest.java | 346 +++++++++++++++++++++ ...oubleQuotesInFieldCsvUnmarshallTest-context.xml | 10 +- ...oubleQuotesInFieldCsvUnmarshallTest-context.xml | 10 +- 5 files changed, 404 insertions(+), 26 deletions(-) diff --git a/components/camel-bindy/src/main/java/org/apache/camel/dataformat/bindy/BindyCsvFactory.java b/components/camel-bindy/src/main/java/org/apache/camel/dataformat/bindy/BindyCsvFactory.java index 15d81c85076..1949ae7d5ba 100644 --- a/components/camel-bindy/src/main/java/org/apache/camel/dataformat/bindy/BindyCsvFactory.java +++ b/components/camel-bindy/src/main/java/org/apache/camel/dataformat/bindy/BindyCsvFactory.java @@ -73,6 +73,7 @@ public class BindyCsvFactory extends BindyAbstractFactory implements BindyFactor private boolean autospanLine; private boolean allowEmptyStream; private boolean quotingEscaped; + private boolean quotingOnlyWhenNeeded; private boolean endWithLineBreak; private boolean removeQuotes; @@ -312,6 +313,7 @@ public class BindyCsvFactory extends BindyAbstractFactory implements BindyFactor org.apache.camel.util.ObjectHelper.notNull(this.separator, "The separator has not been instantiated or property not defined in the @CsvRecord annotation"); + String carriageReturn = ConverterUtils.getStringCarriageReturn(getCarriageReturn()); char separator = ConverterUtils.getCharDelimiter(this.getSeparator()); if (LOG.isDebugEnabled()) { @@ -362,24 +364,27 @@ public class BindyCsvFactory extends BindyAbstractFactory implements BindyFactor l.add(temp); } - if (l != null) { - Iterator<List<String>> it = l.iterator(); - while (it.hasNext()) { - List<String> tokens = it.next(); - Iterator<String> itx = tokens.iterator(); - - while (itx.hasNext()) { - String res = itx.next(); - if (res != null) { - // the field may be enclosed in quotes if a quote was configured - if (quoting && quote != null) { - buffer.append(quote); - } + Iterator<List<String>> it = l.iterator(); + while (it.hasNext()) { + List<String> tokens = it.next(); + Iterator<String> itx = tokens.iterator(); + + while (itx.hasNext()) { + String res = itx.next(); + if (res != null) { + // RFC 4180 section 2.6 - fields containing line breaks, double + // quotes, and commas should be enclosed in double-quotes + boolean needsQuotes = quoting && quote != null && + (!quotingOnlyWhenNeeded || res.contains(carriageReturn) || res.indexOf(separator) != -1 + || res.contains(quote)); + + if (needsQuotes) { + buffer.append(quote); + // CAMEL-7519 - improvement escape the token itself by prepending escape char - if (quoting && quote != null && (res.contains("\\" + quote) || res.contains(quote)) && quotingEscaped) { + if (quotingEscaped && (res.contains("\\" + quote) || res.contains(quote))) { buffer.append(res.replaceAll("\\" + quote, "\\\\" + quote)); - } else if (quoting && quote != null && quote.equals(DOUBLE_QUOTES_SYMBOL) && res.contains(quote) - && !quotingEscaped) { + } else if (!quotingEscaped && quote.equals(DOUBLE_QUOTES_SYMBOL) && res.contains(quote)) { // If double-quotes are used to enclose fields, then a double-quote // appearing inside a field must be escaped by preceding it with another // double quote according to RFC 4180 section 2.7 @@ -387,20 +392,21 @@ public class BindyCsvFactory extends BindyAbstractFactory implements BindyFactor } else { buffer.append(res); } - if (quoting && quote != null) { - buffer.append(quote); - } - } - if (itx.hasNext()) { - buffer.append(separator); + buffer.append(quote); + } else { + buffer.append(res); } } - if (it.hasNext()) { - buffer.append(ConverterUtils.getStringCarriageReturn(getCarriageReturn())); + if (itx.hasNext()) { + buffer.append(separator); } } + + if (it.hasNext()) { + buffer.append(ConverterUtils.getStringCarriageReturn(getCarriageReturn())); + } } return buffer.toString(); @@ -673,6 +679,10 @@ public class BindyCsvFactory extends BindyAbstractFactory implements BindyFactor quotingEscaped = record.quotingEscaped(); LOG.debug("Escape quote character flag of the CSV: {}", quotingEscaped); + // Get quotingOnlyWhenNeeded parameter + quotingOnlyWhenNeeded = record.quotingOnlyWhenNeeded(); + LOG.debug("Quoting only when needed: {}", quotingOnlyWhenNeeded); + // Get endWithLineBreak parameter endWithLineBreak = record.endWithLineBreak(); LOG.debug("End with line break: {}", endWithLineBreak); diff --git a/components/camel-bindy/src/main/java/org/apache/camel/dataformat/bindy/annotation/CsvRecord.java b/components/camel-bindy/src/main/java/org/apache/camel/dataformat/bindy/annotation/CsvRecord.java index ff956adf59f..af4cb41abab 100644 --- a/components/camel-bindy/src/main/java/org/apache/camel/dataformat/bindy/annotation/CsvRecord.java +++ b/components/camel-bindy/src/main/java/org/apache/camel/dataformat/bindy/annotation/CsvRecord.java @@ -92,6 +92,12 @@ public @interface CsvRecord { */ boolean quotingEscaped() default false; + /** + * Indicate if the values should be quoted only when needed (optional) - if enabled then the value is only quoted + * when it contains the configured separator, quote, or crlf characters. The quoting option must also be enabled. + */ + boolean quotingOnlyWhenNeeded() default false; + /** * Last record spans rest of line (optional) - if enabled then the last column is auto spanned to end of line, for * example if its a comment, etc this allows the line to contain all characters, also the delimiter char. diff --git a/components/camel-bindy/src/test/java/org/apache/camel/dataformat/bindy/csv/BindySimpleCsvQuotingOnlyWhenNeededTest.java b/components/camel-bindy/src/test/java/org/apache/camel/dataformat/bindy/csv/BindySimpleCsvQuotingOnlyWhenNeededTest.java new file mode 100644 index 00000000000..3051e076b2e --- /dev/null +++ b/components/camel-bindy/src/test/java/org/apache/camel/dataformat/bindy/csv/BindySimpleCsvQuotingOnlyWhenNeededTest.java @@ -0,0 +1,346 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.camel.dataformat.bindy.csv; + +import java.io.Serializable; + +import org.apache.camel.RoutesBuilder; +import org.apache.camel.builder.RouteBuilder; +import org.apache.camel.dataformat.bindy.annotation.CsvRecord; +import org.apache.camel.dataformat.bindy.annotation.DataField; +import org.apache.camel.test.junit5.CamelTestSupport; +import org.junit.jupiter.api.Test; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +public class BindySimpleCsvQuotingOnlyWhenNeededTest extends CamelTestSupport { + @Test + public void testMarshalFieldQuotedWhenContainingDoubleQuote() { + BindyCsvRowFormat191431 body = new BindyCsvRowFormat191431(); + body.setFirstField("123"); + body.setSecondField("He said \"lets go to Hawaii!\""); + body.setaNumber(10.5); + body.setaBoolean(true); + + String marshalled = template.requestBody("direct:marshal1", body, String.class); + assertEquals("123,\"He said \"\"lets go to Hawaii!\"\"\",10.5,true\r\n", marshalled); + } + + @Test + public void testUnmarshalFieldWhenContainingDoubleQuote() { + String body = "123,\"He said \"\"lets go to Hawaii!\"\"\",10.5,true\r\n"; + BindyCsvRowFormat191431 unmarshalled = template.requestBody("direct:unmarshal1", body, BindyCsvRowFormat191431.class); + assertEquals("123", unmarshalled.getFirstField()); + assertEquals("He said \"lets go to Hawaii!\"", unmarshalled.getSecondField()); + assertEquals(10.5, unmarshalled.getaNumber()); + assertEquals(true, unmarshalled.getaBoolean()); + } + + @Test + public void testMarshalFieldQuotedWhenContainingOtherConfiguredQuoteCharacter() { + BindyCsvRowFormat191432 body = new BindyCsvRowFormat191432(); + body.setFirstField("123"); + body.setSecondField("He said 'lets go to Hawaii!'"); + body.setaNumber(10.5); + body.setaBoolean(true); + + String marshalled = template.requestBody("direct:marshal2", body, String.class); + assertEquals("123,'He said \\'lets go to Hawaii!\\'',10.5,true\r\n", marshalled); + } + + @Test + public void testUnmarshalFieldWhenContainingOtherConfiguredQuoteCharacter() { + String body = "123,'He said \\'lets go to Hawaii!\\'',10.5,true\r\n"; + BindyCsvRowFormat191432 unmarshalled = template.requestBody("direct:unmarshal2", body, BindyCsvRowFormat191432.class); + assertEquals("123", unmarshalled.getFirstField()); + assertEquals("He said 'lets go to Hawaii!'", unmarshalled.getSecondField()); + assertEquals(10.5, unmarshalled.getaNumber()); + assertEquals(true, unmarshalled.getaBoolean()); + } + + @Test + public void testMarshalFieldQuotedWhenContainingComma() { + BindyCsvRowFormat191431 body = new BindyCsvRowFormat191431(); + body.setFirstField("123"); + body.setSecondField("Then, lets go to Hawaii!"); + body.setaNumber(10.5); + body.setaBoolean(true); + + String marshalled = template.requestBody("direct:marshal1", body, String.class); + assertEquals("123,\"Then, lets go to Hawaii!\",10.5,true\r\n", marshalled); + } + + @Test + public void testUnmarshalFieldWhenContainingComma() { + String body = "123,\"Then, lets go to Hawaii!\",10.5,true\r\n"; + BindyCsvRowFormat191431 unmarshalled = template.requestBody("direct:unmarshal1", body, BindyCsvRowFormat191431.class); + assertEquals("123", unmarshalled.getFirstField()); + assertEquals("Then, lets go to Hawaii!", unmarshalled.getSecondField()); + assertEquals(10.5, unmarshalled.getaNumber()); + assertEquals(true, unmarshalled.getaBoolean()); + } + + @Test + public void testMarshalFieldQuotedWhenContainingOtherConfiguredSeparator() { + BindyCsvRowFormat191433 body = new BindyCsvRowFormat191433(); + body.setFirstField("123"); + body.setSecondField("Then; lets go to Hawaii!"); + body.setaNumber(10.5); + body.setaBoolean(true); + + String marshalled = template.requestBody("direct:marshal3", body, String.class); + assertEquals("123;\"Then; lets go to Hawaii!\";10.5;true\r\n", marshalled); + } + + @Test + public void testUnmarshalFieldWhenContainingOtherConfiguredSeparator() { + String body = "123;\"Then; lets go to Hawaii!\";10.5;true\r\n"; + BindyCsvRowFormat191433 unmarshalled = template.requestBody("direct:unmarshal3", body, BindyCsvRowFormat191433.class); + assertEquals("123", unmarshalled.getFirstField()); + assertEquals("Then; lets go to Hawaii!", unmarshalled.getSecondField()); + assertEquals(10.5, unmarshalled.getaNumber()); + assertEquals(true, unmarshalled.getaBoolean()); + } + + @Test + public void testMarshalFieldQuotedWhenContainingCrlf() { + BindyCsvRowFormat191431 body = new BindyCsvRowFormat191431(); + body.setFirstField("123"); + body.setSecondField("Then\r\n lets go to Hawaii!"); + body.setaNumber(10.5); + body.setaBoolean(true); + + String marshalled = template.requestBody("direct:marshal1", body, String.class); + assertEquals("123,\"Then\r\n lets go to Hawaii!\",10.5,true\r\n", marshalled); + } + + @Test + public void testMarshalFieldQuotedWhenContainingOtherConfiguredEscapeCharacter() { + BindyCsvRowFormat191434 body = new BindyCsvRowFormat191434(); + body.setFirstField("123"); + body.setSecondField("Then\n lets go to Hawaii!"); + body.setaNumber(10.5); + body.setaBoolean(true); + + String marshalled = template.requestBody("direct:marshal4", body, String.class); + assertEquals("123,\"Then\n lets go to Hawaii!\",10.5,true\n", marshalled); + } + + @Override + protected RoutesBuilder createRouteBuilder() { + return new RouteBuilder() { + @Override + public void configure() { + final BindyCsvDataFormat one = new BindyCsvDataFormat(BindyCsvRowFormat191431.class); + final BindyCsvDataFormat two = new BindyCsvDataFormat(BindyCsvRowFormat191432.class); + final BindyCsvDataFormat three = new BindyCsvDataFormat(BindyCsvRowFormat191433.class); + final BindyCsvDataFormat four = new BindyCsvDataFormat(BindyCsvRowFormat191434.class); + + from("direct:marshal1").marshal(one); + from("direct:marshal2").marshal(two); + from("direct:marshal3").marshal(three); + from("direct:marshal4").marshal(four); + + from("direct:unmarshal1").unmarshal(one); + from("direct:unmarshal2").unmarshal(two); + from("direct:unmarshal3").unmarshal(three); + from("direct:unmarshal4").unmarshal(four); + } + }; + } + + @CsvRecord(separator = ",", quoting = true, quotingOnlyWhenNeeded = true) + public static class BindyCsvRowFormat191431 implements Serializable { + private static final long serialVersionUID = 1L; + + @DataField(pos = 1) + private String firstField; + @DataField(pos = 2) + private String secondField; + @DataField(pos = 3, pattern = "#.##") + private Double aNumber; + @DataField(pos = 4) + private Boolean aBoolean; + + public String getFirstField() { + return firstField; + } + + public void setFirstField(String firstField) { + this.firstField = firstField; + } + + public String getSecondField() { + return secondField; + } + + public void setSecondField(String secondField) { + this.secondField = secondField; + } + + public Double getaNumber() { + return aNumber; + } + + public void setaNumber(Double aNumber) { + this.aNumber = aNumber; + } + + public Boolean getaBoolean() { + return aBoolean; + } + + public void setaBoolean(Boolean aBoolean) { + this.aBoolean = aBoolean; + } + } + + @CsvRecord(separator = ",", quoting = true, quote = "'", quotingEscaped = true, quotingOnlyWhenNeeded = true) + public static class BindyCsvRowFormat191432 implements Serializable { + private static final long serialVersionUID = 1L; + @DataField(pos = 1) + private String firstField; + @DataField(pos = 2) + private String secondField; + @DataField(pos = 3, pattern = "#.##") + private Double aNumber; + @DataField(pos = 4) + private Boolean aBoolean; + + public String getFirstField() { + return firstField; + } + + public void setFirstField(String firstField) { + this.firstField = firstField; + } + + public String getSecondField() { + return secondField; + } + + public void setSecondField(String secondField) { + this.secondField = secondField; + } + + public Double getaNumber() { + return aNumber; + } + + public void setaNumber(Double aNumber) { + this.aNumber = aNumber; + } + + public Boolean getaBoolean() { + return aBoolean; + } + + public void setaBoolean(Boolean aBoolean) { + this.aBoolean = aBoolean; + } + } + + @CsvRecord(separator = ";", quoting = true, quotingOnlyWhenNeeded = true) + public static class BindyCsvRowFormat191433 implements Serializable { + private static final long serialVersionUID = 1L; + @DataField(pos = 1) + private String firstField; + @DataField(pos = 2) + private String secondField; + @DataField(pos = 3, pattern = "#.##") + private Double aNumber; + @DataField(pos = 4) + private Boolean aBoolean; + + public String getFirstField() { + return firstField; + } + + public void setFirstField(String firstField) { + this.firstField = firstField; + } + + public String getSecondField() { + return secondField; + } + + public void setSecondField(String secondField) { + this.secondField = secondField; + } + + public Double getaNumber() { + return aNumber; + } + + public void setaNumber(Double aNumber) { + this.aNumber = aNumber; + } + + public Boolean getaBoolean() { + return aBoolean; + } + + public void setaBoolean(Boolean aBoolean) { + this.aBoolean = aBoolean; + } + } + + @CsvRecord(separator = ",", quoting = true, quotingOnlyWhenNeeded = true, crlf = "UNIX") + public static class BindyCsvRowFormat191434 implements Serializable { + private static final long serialVersionUID = 1L; + @DataField(pos = 1) + private String firstField; + @DataField(pos = 2) + private String secondField; + @DataField(pos = 3, pattern = "#.##") + private Double aNumber; + @DataField(pos = 4) + private Boolean aBoolean; + + public String getFirstField() { + return firstField; + } + + public void setFirstField(String firstField) { + this.firstField = firstField; + } + + public String getSecondField() { + return secondField; + } + + public void setSecondField(String secondField) { + this.secondField = secondField; + } + + public Double getaNumber() { + return aNumber; + } + + public void setaNumber(Double aNumber) { + this.aNumber = aNumber; + } + + public Boolean getaBoolean() { + return aBoolean; + } + + public void setaBoolean(Boolean aBoolean) { + this.aBoolean = aBoolean; + } + } +} diff --git a/components/camel-bindy/src/test/resources/org/apache/camel/dataformat/bindy/csv/BindyDoubleQuotesInFieldCsvUnmarshallTest-context.xml b/components/camel-bindy/src/test/resources/org/apache/camel/dataformat/bindy/csv/BindyDoubleQuotesInFieldCsvUnmarshallTest-context.xml index 30c670dcdb8..62f06addd72 100644 --- a/components/camel-bindy/src/test/resources/org/apache/camel/dataformat/bindy/csv/BindyDoubleQuotesInFieldCsvUnmarshallTest-context.xml +++ b/components/camel-bindy/src/test/resources/org/apache/camel/dataformat/bindy/csv/BindyDoubleQuotesInFieldCsvUnmarshallTest-context.xml @@ -18,10 +18,18 @@ --> <beans xmlns="http://www.springframework.org/schema/beans" - xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation=" http://www.springframework.org/schema/beans http://www.springframework.org/schema/beans/spring-beans.xsd http://camel.apache.org/schema/spring http://camel.apache.org/schema/spring/camel-spring.xsd"> + xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" + xsi:schemaLocation=" + http://www.springframework.org/schema/beans + http://www.springframework.org/schema/beans/spring-beans.xsd + http://camel.apache.org/schema/spring + http://camel.apache.org/schema/spring/camel-spring.xsd"> + <camelContext id="camelContext-c82c721b-cdcb-4454-9994-26a6cded0e15" xmlns="http://camel.apache.org/schema/spring"> <routeBuilder ref="myBuilder"/> </camelContext> + <bean class="org.apache.camel.dataformat.bindy.csv.BindyDoubleQuotesInFieldCsvUnmarshallTest$ContextConfig" id="myBuilder"/> + </beans> diff --git a/components/camel-bindy/src/test/resources/org/apache/camel/dataformat/bindy/csv/BindySingleQuotesWithDoubleQuotesInFieldCsvUnmarshallTest-context.xml b/components/camel-bindy/src/test/resources/org/apache/camel/dataformat/bindy/csv/BindySingleQuotesWithDoubleQuotesInFieldCsvUnmarshallTest-context.xml index b4c657766b0..387097e1a0a 100644 --- a/components/camel-bindy/src/test/resources/org/apache/camel/dataformat/bindy/csv/BindySingleQuotesWithDoubleQuotesInFieldCsvUnmarshallTest-context.xml +++ b/components/camel-bindy/src/test/resources/org/apache/camel/dataformat/bindy/csv/BindySingleQuotesWithDoubleQuotesInFieldCsvUnmarshallTest-context.xml @@ -18,10 +18,18 @@ --> <beans xmlns="http://www.springframework.org/schema/beans" - xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation=" http://www.springframework.org/schema/beans http://www.springframework.org/schema/beans/spring-beans.xsd http://camel.apache.org/schema/spring http://camel.apache.org/schema/spring/camel-spring.xsd"> + xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" + xsi:schemaLocation=" + http://www.springframework.org/schema/beans + http://www.springframework.org/schema/beans/spring-beans.xsd + http://camel.apache.org/schema/spring + http://camel.apache.org/schema/spring/camel-spring.xsd"> + <camelContext id="camelContext-8b6febc1-7092-4b97-8847-38f4482d2ca2" xmlns="http://camel.apache.org/schema/spring"> <routeBuilder ref="myBuilder"/> </camelContext> + <bean class="org.apache.camel.dataformat.bindy.csv.BindySingleQuotesWithDoubleQuotesInFieldCsvUnmarshallTest$ContextConfig" id="myBuilder"/> + </beans>