This is an automated email from the ASF dual-hosted git repository. ggregory pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/commons-csv.git
commit 50e56c64b6c412d7885ef642b466d21d6aa1be53 Author: Gary Gregory <garydgreg...@gmail.com> AuthorDate: Mon Mar 11 14:08:27 2024 -0400 Internal refactoring for escape character --- .../java/org/apache/commons/csv/CSVFormat.java | 38 +++++++++++++++------- .../java/org/apache/commons/csv/CSVFormatTest.java | 3 ++ 2 files changed, 30 insertions(+), 11 deletions(-) diff --git a/src/main/java/org/apache/commons/csv/CSVFormat.java b/src/main/java/org/apache/commons/csv/CSVFormat.java index d7d5ccba..9ab21d5a 100644 --- a/src/main/java/org/apache/commons/csv/CSVFormat.java +++ b/src/main/java/org/apache/commons/csv/CSVFormat.java @@ -1717,6 +1717,15 @@ public final class CSVFormat implements Serializable { return escapeCharacter; } + /** + * Gets the escape character. + * + * @return the escape character, may be {@code 0} + */ + char getEscapeChar() { + return escapeCharacter != null ? escapeCharacter.charValue() : 0; + } + /** * Gets a copy of the header array. * @@ -2129,7 +2138,7 @@ public final class CSVFormat implements Serializable { } /* - * Note: Must only be called if escaping is enabled, otherwise will generate NPE. + * Note: Must only be called if escaping is enabled, otherwise can throw exceptions. */ private void printWithEscapes(final CharSequence charSeq, final Appendable appendable) throws IOException { int start = 0; @@ -2137,18 +2146,20 @@ public final class CSVFormat implements Serializable { final int end = charSeq.length(); final char[] delim = getDelimiterCharArray(); final int delimLength = delim.length; - final char escape = getEscapeCharacter().charValue(); + final char escape = getEscapeChar(); while (pos < end) { char c = charSeq.charAt(pos); final boolean isDelimiterStart = isDelimiter(c, charSeq, pos, delim, delimLength); - if (c == CR || c == LF || c == escape || isDelimiterStart) { + final boolean isCr = c == CR; + final boolean isLf = c == LF; + if (isCr || isLf || c == escape || isDelimiterStart) { // write out segment up until this char if (pos > start) { appendable.append(charSeq, start, pos); } - if (c == LF) { + if (isLf) { c = 'n'; - } else if (c == CR) { + } else if (isCr) { c = 'r'; } appendable.append(escape); @@ -2172,6 +2183,9 @@ public final class CSVFormat implements Serializable { } } + /* + * Note: Must only be called if escaping is enabled, otherwise can throw exceptions. + */ private void printWithEscapes(final Reader reader, final Appendable appendable) throws IOException { int start = 0; int pos = 0; @@ -2179,23 +2193,25 @@ public final class CSVFormat implements Serializable { final ExtendedBufferedReader bufferedReader = new ExtendedBufferedReader(reader); final char[] delim = getDelimiterCharArray(); final int delimLength = delim.length; - final char escape = getEscapeCharacter().charValue(); + final char escape = getEscapeChar(); final StringBuilder builder = new StringBuilder(IOUtils.DEFAULT_BUFFER_SIZE); int c; while (EOF != (c = bufferedReader.read())) { builder.append((char) c); final boolean isDelimiterStart = isDelimiter((char) c, builder.toString() + new String(bufferedReader.lookAhead(delimLength - 1)), pos, delim, delimLength); - if (c == CR || c == LF || c == escape || isDelimiterStart) { + final boolean isCr = c == CR; + final boolean isLf = c == LF; + if (isCr || isLf || c == escape || isDelimiterStart) { // write out segment up until this char if (pos > start) { append(builder.substring(start, pos), appendable); builder.setLength(0); pos = -1; } - if (c == LF) { + if (isLf) { c = 'n'; - } else if (c == CR) { + } else if (isCr) { c = 'r'; } append(escape, appendable); @@ -2232,7 +2248,7 @@ public final class CSVFormat implements Serializable { // If escape char not specified, default to the quote char // This avoids having to keep checking whether there is an escape character // at the cost of checking against quote twice - final char escapeChar = isEscapeCharacterSet() ? getEscapeCharacter().charValue() : quoteChar; + final char escapeChar = isEscapeCharacterSet() ? getEscapeChar() : quoteChar; QuoteMode quoteModePolicy = getQuoteMode(); if (quoteModePolicy == null) { quoteModePolicy = QuoteMode.MINIMAL; @@ -2436,7 +2452,7 @@ public final class CSVFormat implements Serializable { final boolean emptyDuplicatesAllowed = duplicateHeaderMode == DuplicateHeaderMode.ALLOW_EMPTY; for (final String header : headers) { final boolean blank = isBlank(header); - // Sanitise all empty headers to the empty string "" when checking duplicates + // Sanitize all empty headers to the empty string "" when checking duplicates final boolean containsHeader = !dupCheckSet.add(blank ? "" : header); if (containsHeader && !(blank && emptyDuplicatesAllowed)) { throw new IllegalArgumentException( diff --git a/src/test/java/org/apache/commons/csv/CSVFormatTest.java b/src/test/java/org/apache/commons/csv/CSVFormatTest.java index 8bd8fed6..3220759e 100644 --- a/src/test/java/org/apache/commons/csv/CSVFormatTest.java +++ b/src/test/java/org/apache/commons/csv/CSVFormatTest.java @@ -393,6 +393,7 @@ public class CSVFormatTest { final CSVFormat csvFormatTwo = CSVFormat.MYSQL; assertEquals('\\', (char) csvFormatOne.getEscapeCharacter()); + assertEquals('\\', csvFormatOne.getEscapeChar()); assertNull(csvFormatOne.getQuoteMode()); assertTrue(csvFormatOne.getIgnoreEmptyLines()); @@ -426,6 +427,8 @@ public class CSVFormatTest { assertEquals(QuoteMode.ALL_NON_NULL, csvFormatTwo.getQuoteMode()); assertEquals('\t', csvFormatTwo.getDelimiter()); + assertArrayEquals(new char[] { '\t' }, csvFormatTwo.getDelimiterCharArray()); + assertEquals("\t", csvFormatTwo.getDelimiterString()); assertEquals("\n", csvFormatTwo.getRecordSeparator()); assertFalse(csvFormatTwo.isQuoteCharacterSet());