Author: sebb Date: Sat Mar 17 12:29:15 2012 New Revision: 1301928 URL: http://svn.apache.org/viewvc?rev=1301928&view=rev Log: CSV-67 UnicodeUnescapeReader should not be applied before parsing
Removed: commons/proper/csv/trunk/src/main/java/org/apache/commons/csv/UnicodeUnescapeReader.java Modified: commons/proper/csv/trunk/src/main/java/org/apache/commons/csv/CSVFormat.java commons/proper/csv/trunk/src/main/java/org/apache/commons/csv/CSVParser.java commons/proper/csv/trunk/src/test/java/org/apache/commons/csv/CSVFormatTest.java commons/proper/csv/trunk/src/test/java/org/apache/commons/csv/CSVParserTest.java Modified: commons/proper/csv/trunk/src/main/java/org/apache/commons/csv/CSVFormat.java URL: http://svn.apache.org/viewvc/commons/proper/csv/trunk/src/main/java/org/apache/commons/csv/CSVFormat.java?rev=1301928&r1=1301927&r2=1301928&view=diff ============================================================================== --- commons/proper/csv/trunk/src/main/java/org/apache/commons/csv/CSVFormat.java (original) +++ commons/proper/csv/trunk/src/main/java/org/apache/commons/csv/CSVFormat.java Sat Mar 17 12:29:15 2012 @@ -38,7 +38,6 @@ public class CSVFormat implements Serial private final char escape; private final boolean leadingSpacesIgnored; private final boolean trailingSpacesIgnored; - private final boolean unicodeEscapesInterpreted; private final boolean emptyLinesIgnored; private final String lineSeparator; // for outputs private final String[] header; @@ -53,7 +52,7 @@ public class CSVFormat implements Serial static final char DISABLED = '\ufffe'; /** Standard comma separated format as defined by <a href="http://tools.ietf.org/html/rfc4180">RFC 4180</a>. */ - public static final CSVFormat DEFAULT = new CSVFormat(',', '"', DISABLED, DISABLED, true, true, false, true, CRLF, null); + public static final CSVFormat DEFAULT = new CSVFormat(',', '"', DISABLED, DISABLED, true, true, true, CRLF, null); /** * Excel file format (using a comma as the value delimiter). @@ -66,10 +65,10 @@ public class CSVFormat implements Serial * * <pre>CSVFormat fmt = CSVFormat.EXCEL.withDelimiter(';');</pre> */ - public static final CSVFormat EXCEL = new CSVFormat(',', '"', DISABLED, DISABLED, false, false, false, false, CRLF, null); + public static final CSVFormat EXCEL = new CSVFormat(',', '"', DISABLED, DISABLED, false, false, false, CRLF, null); /** Tab-delimited format, with quote; leading and trailing spaces ignored. */ - public static final CSVFormat TDF = new CSVFormat('\t', '"', DISABLED, DISABLED, true, true, false, true, CRLF, null); + public static final CSVFormat TDF = new CSVFormat('\t', '"', DISABLED, DISABLED, true, true, true, CRLF, null); /** * Default MySQL format used by the <tt>SELECT INTO OUTFILE</tt> and @@ -79,7 +78,7 @@ public class CSVFormat implements Serial * * @see <a href="http://dev.mysql.com/doc/refman/5.1/en/load-data.html">http://dev.mysql.com/doc/refman/5.1/en/load-data.html</a> */ - public static final CSVFormat MYSQL = new CSVFormat('\t', DISABLED, DISABLED, '\\', false, false, false, false, "\n", null); + public static final CSVFormat MYSQL = new CSVFormat('\t', DISABLED, DISABLED, '\\', false, false, false, "\n", null); /** @@ -91,7 +90,6 @@ public class CSVFormat implements Serial * @param escape the char used to escape special characters in values * @param leadingSpacesIgnored <tt>true</tt> when leading whitespaces should be ignored * @param trailingSpacesIgnored <tt>true</tt> when trailing whitespaces should be ignored - * @param unicodeEscapesInterpreted <tt>true</tt> when unicode escapes should be interpreted * @param emptyLinesIgnored <tt>true</tt> when the parser should skip emtpy lines * @param lineSeparator the line separator to use for output * @param header the header @@ -103,7 +101,6 @@ public class CSVFormat implements Serial char escape, boolean leadingSpacesIgnored, boolean trailingSpacesIgnored, - boolean unicodeEscapesInterpreted, boolean emptyLinesIgnored, String lineSeparator, String[] header) { @@ -113,7 +110,6 @@ public class CSVFormat implements Serial this.escape = escape; this.leadingSpacesIgnored = leadingSpacesIgnored; this.trailingSpacesIgnored = trailingSpacesIgnored; - this.unicodeEscapesInterpreted = unicodeEscapesInterpreted; this.emptyLinesIgnored = emptyLinesIgnored; this.lineSeparator = lineSeparator; this.header = header; @@ -176,7 +172,7 @@ public class CSVFormat implements Serial throw new IllegalArgumentException("The delimiter cannot be a line break"); } - return new CSVFormat(delimiter, encapsulator, commentStart, escape, leadingSpacesIgnored, trailingSpacesIgnored, unicodeEscapesInterpreted, emptyLinesIgnored, lineSeparator, header); + return new CSVFormat(delimiter, encapsulator, commentStart, escape, leadingSpacesIgnored, trailingSpacesIgnored, emptyLinesIgnored, lineSeparator, header); } /** @@ -200,7 +196,7 @@ public class CSVFormat implements Serial throw new IllegalArgumentException("The encapsulator cannot be a line break"); } - return new CSVFormat(delimiter, encapsulator, commentStart, escape, leadingSpacesIgnored, trailingSpacesIgnored, unicodeEscapesInterpreted, emptyLinesIgnored, lineSeparator, header); + return new CSVFormat(delimiter, encapsulator, commentStart, escape, leadingSpacesIgnored, trailingSpacesIgnored, emptyLinesIgnored, lineSeparator, header); } boolean isEncapsulating() { @@ -228,7 +224,7 @@ public class CSVFormat implements Serial throw new IllegalArgumentException("The comment start character cannot be a line break"); } - return new CSVFormat(delimiter, encapsulator, commentStart, escape, leadingSpacesIgnored, trailingSpacesIgnored, unicodeEscapesInterpreted, emptyLinesIgnored, lineSeparator, header); + return new CSVFormat(delimiter, encapsulator, commentStart, escape, leadingSpacesIgnored, trailingSpacesIgnored, emptyLinesIgnored, lineSeparator, header); } /** @@ -261,7 +257,7 @@ public class CSVFormat implements Serial throw new IllegalArgumentException("The escape character cannot be a line break"); } - return new CSVFormat(delimiter, encapsulator, commentStart, escape, leadingSpacesIgnored, trailingSpacesIgnored, unicodeEscapesInterpreted, emptyLinesIgnored, lineSeparator, header); + return new CSVFormat(delimiter, encapsulator, commentStart, escape, leadingSpacesIgnored, trailingSpacesIgnored, emptyLinesIgnored, lineSeparator, header); } boolean isEscaping() { @@ -285,7 +281,7 @@ public class CSVFormat implements Serial * @return A copy of this format with the specified left trimming behavior. */ public CSVFormat withLeadingSpacesIgnored(boolean leadingSpacesIgnored) { - return new CSVFormat(delimiter, encapsulator, commentStart, escape, leadingSpacesIgnored, trailingSpacesIgnored, unicodeEscapesInterpreted, emptyLinesIgnored, lineSeparator, header); + return new CSVFormat(delimiter, encapsulator, commentStart, escape, leadingSpacesIgnored, trailingSpacesIgnored, emptyLinesIgnored, lineSeparator, header); } /** @@ -305,7 +301,7 @@ public class CSVFormat implements Serial * @return A copy of this format with the specified right trimming behavior. */ public CSVFormat withTrailingSpacesIgnored(boolean trailingSpacesIgnored) { - return new CSVFormat(delimiter, encapsulator, commentStart, escape, leadingSpacesIgnored, trailingSpacesIgnored, unicodeEscapesInterpreted, emptyLinesIgnored, lineSeparator, header); + return new CSVFormat(delimiter, encapsulator, commentStart, escape, leadingSpacesIgnored, trailingSpacesIgnored, emptyLinesIgnored, lineSeparator, header); } /** @@ -316,28 +312,7 @@ public class CSVFormat implements Serial * @return A copy of this format with the specified trimming behavior. */ public CSVFormat withSurroundingSpacesIgnored(boolean surroundingSpacesIgnored) { - return new CSVFormat(delimiter, encapsulator, commentStart, escape, surroundingSpacesIgnored, surroundingSpacesIgnored, unicodeEscapesInterpreted, emptyLinesIgnored, lineSeparator, header); - } - - /** - * Tells if unicode escape sequences (e.g. {@literal \u1234}) are turned into their corresponding character - * when parsing input. - * - * @return <tt>true</tt> if unicode escape sequences are interpreted, <tt>false</tt> if they are left as is. - */ - public boolean isUnicodeEscapesInterpreted() { - return unicodeEscapesInterpreted; - } - - /** - * Returns a copy of this format with the specified unicode escaping behavior. - * - * @param unicodeEscapesInterpreted the escaping behavior, <tt>true</tt> to interpret unicode escape sequences, - * <tt>false</tt> to leave the escape sequences as is. - * @return A copy of this format with the specified unicode escaping behavior. - */ - public CSVFormat withUnicodeEscapesInterpreted(boolean unicodeEscapesInterpreted) { - return new CSVFormat(delimiter, encapsulator, commentStart, escape, leadingSpacesIgnored, trailingSpacesIgnored, unicodeEscapesInterpreted, emptyLinesIgnored, lineSeparator, header); + return new CSVFormat(delimiter, encapsulator, commentStart, escape, surroundingSpacesIgnored, surroundingSpacesIgnored, emptyLinesIgnored, lineSeparator, header); } /** @@ -357,7 +332,7 @@ public class CSVFormat implements Serial * @return A copy of this format with the specified empty line skipping behavior. */ public CSVFormat withEmptyLinesIgnored(boolean emptyLinesIgnored) { - return new CSVFormat(delimiter, encapsulator, commentStart, escape, leadingSpacesIgnored, trailingSpacesIgnored, unicodeEscapesInterpreted, emptyLinesIgnored, lineSeparator, header); + return new CSVFormat(delimiter, encapsulator, commentStart, escape, leadingSpacesIgnored, trailingSpacesIgnored, emptyLinesIgnored, lineSeparator, header); } /** @@ -377,7 +352,7 @@ public class CSVFormat implements Serial * @return A copy of this format using the specified output line separator */ public CSVFormat withLineSeparator(String lineSeparator) { - return new CSVFormat(delimiter, encapsulator, commentStart, escape, leadingSpacesIgnored, trailingSpacesIgnored, unicodeEscapesInterpreted, emptyLinesIgnored, lineSeparator, header); + return new CSVFormat(delimiter, encapsulator, commentStart, escape, leadingSpacesIgnored, trailingSpacesIgnored, emptyLinesIgnored, lineSeparator, header); } String[] getHeader() { @@ -399,7 +374,7 @@ public class CSVFormat implements Serial * @return A copy of this format using the specified header */ public CSVFormat withHeader(String... header) { - return new CSVFormat(delimiter, encapsulator, commentStart, escape, leadingSpacesIgnored, trailingSpacesIgnored, unicodeEscapesInterpreted, emptyLinesIgnored, lineSeparator, header); + return new CSVFormat(delimiter, encapsulator, commentStart, escape, leadingSpacesIgnored, trailingSpacesIgnored, emptyLinesIgnored, lineSeparator, header); } /** Modified: commons/proper/csv/trunk/src/main/java/org/apache/commons/csv/CSVParser.java URL: http://svn.apache.org/viewvc/commons/proper/csv/trunk/src/main/java/org/apache/commons/csv/CSVParser.java?rev=1301928&r1=1301927&r2=1301928&view=diff ============================================================================== --- commons/proper/csv/trunk/src/main/java/org/apache/commons/csv/CSVParser.java (original) +++ commons/proper/csv/trunk/src/main/java/org/apache/commons/csv/CSVParser.java Sat Mar 17 12:29:15 2012 @@ -92,10 +92,6 @@ public class CSVParser implements Iterab public CSVParser(Reader input, CSVFormat format) throws IOException { format.validate(); - if (format.isUnicodeEscapesInterpreted()) { - input = new UnicodeUnescapeReader(input); - } - this.lexer = new CSVLexer(format, new ExtendedBufferedReader(input)); initializeHeader(format); Modified: commons/proper/csv/trunk/src/test/java/org/apache/commons/csv/CSVFormatTest.java URL: http://svn.apache.org/viewvc/commons/proper/csv/trunk/src/test/java/org/apache/commons/csv/CSVFormatTest.java?rev=1301928&r1=1301927&r2=1301928&view=diff ============================================================================== --- commons/proper/csv/trunk/src/test/java/org/apache/commons/csv/CSVFormatTest.java (original) +++ commons/proper/csv/trunk/src/test/java/org/apache/commons/csv/CSVFormatTest.java Sat Mar 17 12:29:15 2012 @@ -30,7 +30,7 @@ public class CSVFormatTest { @Test public void testImmutalibity() { - CSVFormat format = new CSVFormat('!', '!', '!', '!', true, true, true, true, "\r\n", null); + CSVFormat format = new CSVFormat('!', '!', '!', '!', true, true, true, "\r\n", null); format.withDelimiter('?'); format.withEncapsulator('?'); @@ -40,7 +40,6 @@ public class CSVFormatTest { format.withLeadingSpacesIgnored(false); format.withTrailingSpacesIgnored(false); format.withEmptyLinesIgnored(false); - format.withUnicodeEscapesInterpreted(false); assertEquals('!', format.getDelimiter()); assertEquals('!', format.getEncapsulator()); @@ -51,12 +50,11 @@ public class CSVFormatTest { assertTrue(format.isLeadingSpacesIgnored()); assertTrue(format.isTrailingSpacesIgnored()); assertTrue(format.isEmptyLinesIgnored()); - assertTrue(format.isUnicodeEscapesInterpreted()); } @Test public void testMutators() { - CSVFormat format = new CSVFormat('!', '!', '!', '!', true, true, true, true, "\r\n", null); + CSVFormat format = new CSVFormat('!', '!', '!', '!', true, true, true, "\r\n", null); assertEquals('?', format.withDelimiter('?').getDelimiter()); assertEquals('?', format.withEncapsulator('?').getEncapsulator()); @@ -69,7 +67,6 @@ public class CSVFormatTest { assertFalse(format.withSurroundingSpacesIgnored(false).isLeadingSpacesIgnored()); assertFalse(format.withSurroundingSpacesIgnored(false).isTrailingSpacesIgnored()); assertFalse(format.withEmptyLinesIgnored(false).isEmptyLinesIgnored()); - assertFalse(format.withUnicodeEscapesInterpreted(false).isUnicodeEscapesInterpreted()); } @Test @@ -172,7 +169,6 @@ public class CSVFormatTest { assertEquals("comment start", CSVFormat.DEFAULT.getCommentStart(), format.getCommentStart()); assertEquals("line separator", CSVFormat.DEFAULT.getLineSeparator(), format.getLineSeparator()); assertEquals("escape", CSVFormat.DEFAULT.getEscape(), format.getEscape()); - assertEquals("unicode escape", CSVFormat.DEFAULT.isUnicodeEscapesInterpreted(), format.isUnicodeEscapesInterpreted()); assertEquals("trim left", CSVFormat.DEFAULT.isLeadingSpacesIgnored(), format.isLeadingSpacesIgnored()); assertEquals("trim right", CSVFormat.DEFAULT.isTrailingSpacesIgnored(), format.isTrailingSpacesIgnored()); assertEquals("empty lines", CSVFormat.DEFAULT.isEmptyLinesIgnored(), format.isEmptyLinesIgnored()); Modified: commons/proper/csv/trunk/src/test/java/org/apache/commons/csv/CSVParserTest.java URL: http://svn.apache.org/viewvc/commons/proper/csv/trunk/src/test/java/org/apache/commons/csv/CSVParserTest.java?rev=1301928&r1=1301927&r2=1301928&view=diff ============================================================================== --- commons/proper/csv/trunk/src/test/java/org/apache/commons/csv/CSVParserTest.java (original) +++ commons/proper/csv/trunk/src/test/java/org/apache/commons/csv/CSVParserTest.java Sat Mar 17 12:29:15 2012 @@ -283,7 +283,7 @@ public class CSVParserTest { }; - CSVFormat format = new CSVFormat(',', '\'', CSVFormat.DISABLED, '/', false, false, true, true, "\r\n", null); + CSVFormat format = new CSVFormat(',', '\'', CSVFormat.DISABLED, '/', false, false, true, "\r\n", null); CSVParser parser = new CSVParser(code, format); List<CSVRecord> records = parser.getRecords(); @@ -312,7 +312,7 @@ public class CSVParserTest { }; - CSVFormat format = new CSVFormat(',', CSVFormat.DISABLED, CSVFormat.DISABLED, '/', false, false, true, true, "\r\n", null); + CSVFormat format = new CSVFormat(',', CSVFormat.DISABLED, CSVFormat.DISABLED, '/', false, false, true, "\r\n", null); CSVParser parser = new CSVParser(code, format); List<CSVRecord> records = parser.getRecords(); @@ -357,30 +357,6 @@ public class CSVParserTest { } @Test - public void testUnicodeEscape() throws Exception { - String code = "abc,\\u0070\\u0075\\u0062\\u006C\\u0069\\u0063"; - CSVParser parser = new CSVParser(code, CSVFormat.DEFAULT.withUnicodeEscapesInterpreted(true)); - final Iterator<CSVRecord> iterator = parser.iterator(); - CSVRecord record = iterator.next(); - assertEquals(2, record.size()); - assertEquals("abc", record.get(0)); - assertEquals("public", record.get(1)); - assertFalse("Should not have any more records", iterator.hasNext()); - } - - @Test - public void testUnicodeEscapeMySQL() throws Exception { - String code = "abc\t\\u0070\\u0075\\u0062\\u006C\\u0069\\u0063"; - CSVParser parser = new CSVParser(code, CSVFormat.MYSQL.withUnicodeEscapesInterpreted(true)); - final Iterator<CSVRecord> iterator = parser.iterator(); - CSVRecord record = iterator.next(); - assertEquals(2, record.size()); - assertEquals("abc", record.get(0)); - assertEquals("public", record.get(1)); - assertFalse("Should not have any more records", iterator.hasNext()); - } - - @Test public void testCarriageReturnLineFeedEndings() throws IOException { String code = "foo\r\nbaar,\r\nhello,world\r\n,kanu"; CSVParser parser = new CSVParser(new StringReader(code));