Author: yonik Date: Wed Jun 16 16:12:34 2010 New Revision: 955284 URL: http://svn.apache.org/viewvc?rev=955284&view=rev Log: SANDBOX-322: CSVPrinter overhaul
Modified: commons/sandbox/csv/trunk/src/java/org/apache/commons/csv/CSVParser.java commons/sandbox/csv/trunk/src/java/org/apache/commons/csv/CSVPrinter.java commons/sandbox/csv/trunk/src/java/org/apache/commons/csv/CSVStrategy.java commons/sandbox/csv/trunk/src/java/org/apache/commons/csv/CSVUtils.java commons/sandbox/csv/trunk/src/test/org/apache/commons/csv/CSVParserTest.java commons/sandbox/csv/trunk/src/test/org/apache/commons/csv/CSVPrinterTest.java commons/sandbox/csv/trunk/src/test/org/apache/commons/csv/CSVStrategyTest.java Modified: commons/sandbox/csv/trunk/src/java/org/apache/commons/csv/CSVParser.java URL: http://svn.apache.org/viewvc/commons/sandbox/csv/trunk/src/java/org/apache/commons/csv/CSVParser.java?rev=955284&r1=955283&r2=955284&view=diff ============================================================================== --- commons/sandbox/csv/trunk/src/java/org/apache/commons/csv/CSVParser.java (original) +++ commons/sandbox/csv/trunk/src/java/org/apache/commons/csv/CSVParser.java Wed Jun 16 16:12:34 2010 @@ -70,8 +70,7 @@ public class CSVParser { // the input stream private final ExtendedBufferedReader in; - // TODO: this can be made final if setStrategy is removed - private CSVStrategy strategy; + private final CSVStrategy strategy; // the following objects are shared to reduce garbage /** A record buffer for getLine(). Grows as necessary and is reused. */ @@ -346,7 +345,7 @@ public class CSVParser { // important: make sure a new char gets consumed in each iteration while (!tkn.isReady) { // ignore whitespaces at beginning of a token - while (isWhitespace(c) && !eol) { + while (strategy.getIgnoreLeadingWhitespaces() && isWhitespace(c) && !eol) { wsBuf.append((char) c); c = in.read(); eol = isEndOfLine(c); @@ -561,18 +560,7 @@ public class CSVParser { // ====================================================== /** - * Sets the specified CSV Strategy - * - * @return current instance of CSVParser to allow chained method calls - * @deprecated the strategy should be set in the constructor {...@link #CSVParser(Reader,CSVStrategy)}. - */ - public CSVParser setStrategy(CSVStrategy strategy) { - this.strategy = strategy; - return this; - } - - /** - * Obtain the specified CSV Strategy + * Obtain the specified CSV Strategy. This should not be modified. * * @return strategy currently being used */ Modified: commons/sandbox/csv/trunk/src/java/org/apache/commons/csv/CSVPrinter.java URL: http://svn.apache.org/viewvc/commons/sandbox/csv/trunk/src/java/org/apache/commons/csv/CSVPrinter.java?rev=955284&r1=955283&r2=955284&view=diff ============================================================================== --- commons/sandbox/csv/trunk/src/java/org/apache/commons/csv/CSVPrinter.java (original) +++ commons/sandbox/csv/trunk/src/java/org/apache/commons/csv/CSVPrinter.java Wed Jun 16 16:12:34 2010 @@ -16,6 +16,7 @@ */ package org.apache.commons.csv; +import java.io.IOException; import java.io.OutputStream; import java.io.PrintWriter; import java.io.Writer; @@ -26,63 +27,27 @@ import java.io.Writer; public class CSVPrinter { /** The place that the values get written. */ - protected PrintWriter out; + protected final Writer out; + protected final CSVStrategy strategy; /** True if we just began a new line. */ protected boolean newLine = true; - private CSVStrategy strategy = CSVStrategy.DEFAULT_STRATEGY; + protected char[] buf = new char[0]; // temporary buffer /** * Create a printer that will print values to the given - * stream. Character to byte conversion is done using - * the default character encoding. Comments will be - * written using the default comment character '#'. + * stream following the CSVStrategy. * - * @param out stream to which to print. - */ - public CSVPrinter(OutputStream out) { - this.out = new PrintWriter(out); - } - - - /** - * Create a printer that will print values to the given - * stream. Comments will be - * written using the default comment character '#'. + * Currently, only a pure encapsulation strategy or a pure escaping strategy + * is supported. Hybrid strategies (encapsulation and escaping with a different character) are not supported. * * @param out stream to which to print. + * @param strategy describes the CSV variation. */ - public CSVPrinter(Writer out) { - if (out instanceof PrintWriter) { - this.out = (PrintWriter) out; - } else { - this.out = new PrintWriter(out); - } - } - - - // ====================================================== - // strategies - // ====================================================== - - /** - * Sets the specified CSV Strategy - * - * @return current instance of CSVParser to allow chained method calls - */ - public CSVPrinter setStrategy(CSVStrategy strategy) { - this.strategy = strategy; - return this; - } - - /** - * Obtain the specified CSV Strategy - * - * @return strategy currently being used - */ - public CSVStrategy getStrategy() { - return this.strategy; + public CSVPrinter(Writer out, CSVStrategy strategy) { + this.out = out; + this.strategy = strategy==null ? CSVStrategy.DEFAULT_STRATEGY : strategy; } // ====================================================== @@ -90,26 +55,15 @@ public class CSVPrinter { // ====================================================== /** - * Print the string as the last value on the line. The value - * will be quoted if needed. - * - * @param value value to be outputted. + * Output a blank line */ - public void println(String value) { - print(value); - out.println(); - out.flush(); + public void println() throws IOException { + out.write(strategy.getPrinterNewline()); newLine = true; } - - /** - * Output a blank line - */ - public void println() { - out.println(); + public void flush() throws IOException { out.flush(); - newLine = true; } @@ -120,32 +74,11 @@ public class CSVPrinter { * * @param values values to be outputted. */ - public void println(String[] values) { + public void println(String[] values) throws IOException { for (int i = 0; i < values.length; i++) { print(values[i]); } - out.println(); - out.flush(); - newLine = true; - } - - - /** - * Print several lines of comma separated values. - * The values will be quoted if needed. Quotes and - * newLine characters will be escaped. - * - * @param values values to be outputted. - */ - public void println(String[][] values) { - for (int i = 0; i < values.length; i++) { - println(values[i]); - } - if (values.length == 0) { - out.println(); - } - out.flush(); - newLine = true; + println(); } @@ -158,15 +91,15 @@ public class CSVPrinter { * * @param comment the comment to output */ - public void printlnComment(String comment) { + public void printlnComment(String comment) throws IOException { if(this.strategy.isCommentingDisabled()) { return; } if (!newLine) { - out.println(); + println(); } - out.print(this.strategy.getCommentStart()); - out.print(' '); + out.write(this.strategy.getCommentStart()); + out.write(' '); for (int i = 0; i < comment.length(); i++) { char c = comment.charAt(i); switch (c) { @@ -176,120 +109,201 @@ public class CSVPrinter { } // break intentionally excluded. case '\n' : - out.println(); - out.print(this.strategy.getCommentStart()); - out.print(' '); + println(); + out.write(this.strategy.getCommentStart()); + out.write(' '); break; default : - out.print(c); + out.write(c); break; } } - out.println(); - out.flush(); - newLine = true; + println(); } - /** - * Print the string as the next value on the line. The value - * will be quoted if needed. - * - * @param value value to be outputted. - */ - public void print(String value) { - boolean quote = false; - if (value.length() > 0) { - char c = value.charAt(0); - if (newLine - && (c < '0' - || (c > '9' && c < 'A') - || (c > 'Z' && c < 'a') - || (c > 'z'))) { - quote = true; - } - if (c == ' ' || c == '\f' || c == '\t') { - quote = true; + public void print(char[] value, int offset, int len, boolean checkForEscape) throws IOException { + if (!checkForEscape) { + if (newLine) { + newLine = false; + } else { + out.write(this.strategy.getDelimiter()); } - for (int i = 0; i < value.length(); i++) { - c = value.charAt(i); - if (c == '"' || c == this.strategy.getDelimiter() || c == '\n' || c == '\r') { - quote = true; - c = value.charAt( value.length() - 1 ); - break; + out.write(value, offset, len); + return; + } + + if (strategy.getEncapsulator() != (char)-2) { + printAndEncapsulate(value, offset, len); + } else if (strategy.getEscape() != (char)-2) { + printAndEscape(value, offset, len); + } else { + out.write(value, offset, len); + } + } + + void printSep() throws IOException { + if (newLine) { + newLine = false; + } else { + out.write(this.strategy.getDelimiter()); + } + } + + void printAndEscape(char[] value, int offset, int len) throws IOException { + int start = offset; + int pos = offset; + int end = offset + len; + + char delim = this.strategy.getDelimiter(); + char escape = this.strategy.getEscape(); + + printSep(); + + while (pos < end) { + char c = value[pos]; + if (c == '\r' || c=='\n' || c==delim || c==escape) { + // write out segment up until this char + int l = pos-start; + if (l>0) { + out.write(value, start, l); } + if (c=='\n') c='n'; + else if (c=='\r') c='r'; + + out.write(escape); + out.write(c); + + start = pos+1; // start on the current char after this one } - if (c == ' ' || c == '\f' || c == '\t') { - quote = true; - } - } else if (newLine) { + + pos++; + } + + // write last segment + int l = pos-start; + if (l>0) { + out.write(value, start, l); + } + } + + void printAndEncapsulate(char[] value, int offset, int len) throws IOException { + boolean first = newLine; // is this the first value on this line? + boolean quote = false; + int start = offset; + int pos = offset; + int end = offset + len; + + char delim = this.strategy.getDelimiter(); + char encapsulator = this.strategy.getEncapsulator(); + + printSep(); + + if (len <= 0) { // always quote an empty token that is the first // on the line, as it may be the only thing on the // line. If it were not quoted in that case, // an empty line has no tokens. - quote = true; - } - if (newLine) { - newLine = false; + if (first) { + quote = true; + } } else { - out.print(this.strategy.getDelimiter()); + char c = value[pos]; + + // Hmmm, where did this rule come from? + if (first + && (c < '0' + || (c > '9' && c < 'A') + || (c > 'Z' && c < 'a') + || (c > 'z'))) { + quote = true; + // } else if (c == ' ' || c == '\f' || c == '\t') { + } else if (c <= '#') { + // Some other chars at the start of a value caused the parser to fail, so for now + // encapsulate if we start in anything less than '#'. We are being conservative + // by including the default comment char too. + quote = true; + } else { + while (pos < end) { + c = value[pos]; + if (c=='\n' || c=='\r' || c==encapsulator || c==delim) { + quote = true; + break; + } + pos++; + } + + if (!quote) { + pos = end-1; + c = value[pos]; + // if (c == ' ' || c == '\f' || c == '\t') { + // Some other chars at the end caused the parser to fail, so for now + // encapsulate if we end in anything less than ' ' + if (c <= ' ') { + quote = true; + } + } + } } - if (quote) { - out.print(escapeAndQuote(value)); - } else { - out.print(value); + + if (!quote) { + // no encapsulation needed - write out the original value + out.write(value, offset, len); + return; + } + + // we hit something that needed encapsulation + out.write(encapsulator); + + // Pick up where we left off: pos should be positioned on the first character that caused + // the need for encapsulation. + while (pos<end) { + char c = value[pos]; + if (c==encapsulator) { + // write out the chunk up until this point + + // add 1 to the length to write out the encapsulator also + out.write(value, start, pos-start+1); + // put the next starting position on the encapsulator so we will + // write it out again with the next string (effectively doubling it) + start = pos; + } + pos++; } - out.flush(); - } + // write the last segment + out.write(value, start, pos-start); + out.write(encapsulator); + } /** - * Enclose the value in quotes and escape the quote - * and comma characters that are inside. + * Print the string as the next value on the line. The value + * will be escaped or encapsulated as needed if checkForEscape==true * - * @param value needs to be escaped and quoted - * @return the value, escaped and quoted + * @param value value to be outputted. */ - private String escapeAndQuote(String value) { - // the initial count is for the preceding and trailing quotes - int count = 2; - for (int i = 0; i < value.length(); i++) { - switch (value.charAt(i)) { - case '\"' : - case '\n' : - case '\r' : - case '\\' : - count++; - break; - default: - break; - } + public void print(String value, boolean checkForEscape) throws IOException { + if (!checkForEscape) { + // write directly from string + out.write(value); + return; } - StringBuffer sb = new StringBuffer(value.length() + count); - sb.append(strategy.getEncapsulator()); - for (int i = 0; i < value.length(); i++) { - char c = value.charAt(i); - - if (c == strategy.getEncapsulator()) { - sb.append('\\').append(c); - continue; - } - switch (c) { - case '\n' : - sb.append("\\n"); - break; - case '\r' : - sb.append("\\r"); - break; - case '\\' : - sb.append("\\\\"); - break; - default : - sb.append(c); - } + + if (buf.length < value.length()) { + buf = new char[value.length()]; } - sb.append(strategy.getEncapsulator()); - return sb.toString(); + + value.getChars(0, value.length(), buf, 0); + print(buf, 0, value.length(), checkForEscape); } + /** + * Print the string as the next value on the line. The value + * will be escaped or encapsulated as needed. + * + * @param value value to be outputted. + */ + public void print(String value) throws IOException { + print(value, true); + } } Modified: commons/sandbox/csv/trunk/src/java/org/apache/commons/csv/CSVStrategy.java URL: http://svn.apache.org/viewvc/commons/sandbox/csv/trunk/src/java/org/apache/commons/csv/CSVStrategy.java?rev=955284&r1=955283&r2=955284&view=diff ============================================================================== --- commons/sandbox/csv/trunk/src/java/org/apache/commons/csv/CSVStrategy.java (original) +++ commons/sandbox/csv/trunk/src/java/org/apache/commons/csv/CSVStrategy.java Wed Jun 16 16:12:34 2010 @@ -34,12 +34,16 @@ public class CSVStrategy implements Clon private boolean interpretUnicodeEscapes; private boolean ignoreEmptyLines; + // controls for output + private String printerNewline = "\n"; + // -2 is used to signal disabled, because it won't be confused with // an EOF signal (-1), and because \ufffe in UTF-16 would be // encoded as two chars (using surrogates) and thus there should never // be a collision with a real text char. public static char COMMENTS_DISABLED = (char)-2; public static char ESCAPE_DISABLED = (char)-2; + public static char ENCAPSULATOR_DISABLED = (char)-2; public static CSVStrategy DEFAULT_STRATEGY = new CSVStrategy(',', '"', COMMENTS_DISABLED, ESCAPE_DISABLED, true, true, false, true); @@ -98,7 +102,6 @@ public class CSVStrategy implements Clon true, interpretUnicodeEscapes, ignoreEmptyLines); } - public void setDelimiter(char delimiter) { this.delimiter = delimiter; } public char getDelimiter() { return this.delimiter; } @@ -130,6 +133,13 @@ public class CSVStrategy implements Clon public void setIgnoreEmptyLines(boolean ignoreEmptyLines) { this.ignoreEmptyLines = ignoreEmptyLines; } public boolean getIgnoreEmptyLines() { return this.ignoreEmptyLines; } + public void setPrinterNewline(String newline) { + this.printerNewline = newline; + } + public String getPrinterNewline() { + return this.printerNewline; + } + public Object clone() { try { return super.clone(); Modified: commons/sandbox/csv/trunk/src/java/org/apache/commons/csv/CSVUtils.java URL: http://svn.apache.org/viewvc/commons/sandbox/csv/trunk/src/java/org/apache/commons/csv/CSVUtils.java?rev=955284&r1=955283&r2=955284&view=diff ============================================================================== --- commons/sandbox/csv/trunk/src/java/org/apache/commons/csv/CSVUtils.java (original) +++ commons/sandbox/csv/trunk/src/java/org/apache/commons/csv/CSVUtils.java Wed Jun 16 16:12:34 2010 @@ -48,10 +48,10 @@ public class CSVUtils { * @return the CSV string, will be an empty string if the length of the * value array is 0 */ - public static String printLine(String[] values) { + public static String printLine(String[] values, CSVStrategy strategy) { // set up a CSVUtils StringWriter stringWriter = new StringWriter(); - CSVPrinter csvPrinter = new CSVPrinter(stringWriter); + CSVPrinter csvPrinter = new CSVPrinter(stringWriter, strategy); // check for null values an "null" as strings and convert them // into the strings "null" and "\"null\"" @@ -64,8 +64,11 @@ public class CSVUtils { } // convert to CSV - csvPrinter.println(values); - + try { + csvPrinter.println(values); + } catch (IOException e) { + // should not happen with StringWriter + } // as the resulting string has \r\n at the end, we will trim that away return stringWriter.toString().trim(); } Modified: commons/sandbox/csv/trunk/src/test/org/apache/commons/csv/CSVParserTest.java URL: http://svn.apache.org/viewvc/commons/sandbox/csv/trunk/src/test/org/apache/commons/csv/CSVParserTest.java?rev=955284&r1=955283&r2=955284&view=diff ============================================================================== --- commons/sandbox/csv/trunk/src/test/org/apache/commons/csv/CSVParserTest.java (original) +++ commons/sandbox/csv/trunk/src/test/org/apache/commons/csv/CSVParserTest.java Wed Jun 16 16:12:34 2010 @@ -45,6 +45,10 @@ public class CSVParserTest extends TestC TestCSVParser(Reader in) { super(in); } + + TestCSVParser(Reader in, CSVStrategy strategy) { + super(in, strategy); + } /** * Calls super.nextToken() and prints out a String representation of token * type and content. @@ -65,7 +69,6 @@ public class CSVParserTest extends TestC public void testNextToken1() throws IOException { String code = "abc,def, hijk, lmnop, qrst,uv ,wxy ,z , ,"; TestCSVParser parser = new TestCSVParser(new StringReader(code)); - parser.setStrategy(CSVStrategy.DEFAULT_STRATEGY); assertEquals(CSVParser.TT_TOKEN + ";abc;", parser.testNextToken()); assertEquals(CSVParser.TT_TOKEN + ";def;", parser.testNextToken()); assertEquals(CSVParser.TT_TOKEN + ";hijk;", parser.testNextToken()); @@ -88,10 +91,13 @@ public class CSVParserTest extends TestC * */ String code = "1,2,3,\na,b x,c\n#foo\n\nd,e,\n\n"; - TestCSVParser parser = new TestCSVParser(new StringReader(code)); - parser.getStrategy().setIgnoreEmptyLines(false); - parser.setStrategy(CSVStrategy.DEFAULT_STRATEGY); - parser.getStrategy().setCommentStart('#'); + CSVStrategy strategy = (CSVStrategy)CSVStrategy.DEFAULT_STRATEGY.clone(); + // strategy.setIgnoreEmptyLines(false); + strategy.setCommentStart('#'); + + TestCSVParser parser = new TestCSVParser(new StringReader(code), strategy); + + assertEquals(CSVParser.TT_TOKEN + ";1;", parser.testNextToken()); assertEquals(CSVParser.TT_TOKEN + ";2;", parser.testNextToken()); assertEquals(CSVParser.TT_TOKEN + ";3;", parser.testNextToken()); @@ -114,9 +120,10 @@ public class CSVParserTest extends TestC * \,, */ String code = "a,\\,,b\n\\,,"; - TestCSVParser parser = new TestCSVParser(new StringReader(code)); - parser.setStrategy(CSVStrategy.DEFAULT_STRATEGY); - parser.getStrategy().setCommentStart('#'); + CSVStrategy strategy = (CSVStrategy)CSVStrategy.DEFAULT_STRATEGY.clone(); + strategy.setCommentStart('#'); + TestCSVParser parser = new TestCSVParser(new StringReader(code), strategy); + assertEquals(CSVParser.TT_TOKEN + ";a;", parser.testNextToken()); // an unquoted single backslash is not an escape char assertEquals(CSVParser.TT_TOKEN + ";\\;", parser.testNextToken()); @@ -138,7 +145,6 @@ public class CSVParserTest extends TestC String code = "a,\"foo\",b\na, \" foo\",b\na,\"foo \" ,b\na, \" foo \" ,b"; TestCSVParser parser = new TestCSVParser(new StringReader(code)); - parser.setStrategy(CSVStrategy.DEFAULT_STRATEGY); assertEquals(CSVParser.TT_TOKEN + ";a;", parser.testNextToken()); assertEquals(CSVParser.TT_TOKEN + ";foo;", parser.testNextToken()); assertEquals(CSVParser.TT_EORECORD + ";b;", parser.testNextToken()); @@ -159,7 +165,6 @@ public class CSVParserTest extends TestC String code = "a,\"foo\n\",b\n\"foo\n baar ,,,\"\n\"\n\t \n\""; TestCSVParser parser = new TestCSVParser(new StringReader(code)); - parser.setStrategy(CSVStrategy.DEFAULT_STRATEGY); assertEquals(CSVParser.TT_TOKEN + ";a;", parser.testNextToken()); assertEquals(CSVParser.TT_TOKEN + ";foo\n;", parser.testNextToken()); assertEquals(CSVParser.TT_EORECORD + ";b;", parser.testNextToken()); @@ -177,8 +182,7 @@ public class CSVParserTest extends TestC * ;; */ String code = "a;'b and '' more\n'\n!comment;;;;\n;;"; - TestCSVParser parser = new TestCSVParser(new StringReader(code)); - parser.setStrategy( new CSVStrategy(';', '\'', '!') ); + TestCSVParser parser = new TestCSVParser(new StringReader(code), new CSVStrategy(';', '\'', '!')); assertEquals(CSVParser.TT_TOKEN + ";a;", parser.testNextToken()); assertEquals( CSVParser.TT_EORECORD + ";b and ' more\n;", @@ -265,8 +269,7 @@ public class CSVParserTest extends TestC {""}, {"world", ""} }; - CSVParser parser = new CSVParser(new StringReader(code)); - parser.setStrategy(CSVStrategy.EXCEL_STRATEGY); + CSVParser parser = new CSVParser(new StringReader(code), CSVStrategy.EXCEL_STRATEGY); String[][] tmp = parser.getAllValues(); assertEquals(res.length, tmp.length); assertTrue(tmp.length > 0); @@ -294,8 +297,7 @@ public class CSVParserTest extends TestC String code; for (int codeIndex = 0; codeIndex < codes.length; codeIndex++) { code = codes[codeIndex]; - CSVParser parser = new CSVParser(new StringReader(code)); - parser.setStrategy(CSVStrategy.EXCEL_STRATEGY); + CSVParser parser = new CSVParser(new StringReader(code), CSVStrategy.EXCEL_STRATEGY); String[][] tmp = parser.getAllValues(); assertEquals(res.length, tmp.length); assertTrue(tmp.length > 0); @@ -324,7 +326,6 @@ public class CSVParserTest extends TestC for (int codeIndex = 0; codeIndex < codes.length; codeIndex++) { code = codes[codeIndex]; CSVParser parser = new CSVParser(new StringReader(code)); - parser.setStrategy(CSVStrategy.DEFAULT_STRATEGY); String[][] tmp = parser.getAllValues(); assertEquals(res.length, tmp.length); assertTrue(tmp.length > 0); @@ -349,8 +350,7 @@ public class CSVParserTest extends TestC String code; for (int codeIndex = 0; codeIndex < codes.length; codeIndex++) { code = codes[codeIndex]; - CSVParser parser = new CSVParser(new StringReader(code)); - parser.setStrategy(CSVStrategy.EXCEL_STRATEGY); + CSVParser parser = new CSVParser(new StringReader(code), CSVStrategy.EXCEL_STRATEGY); String[][] tmp = parser.getAllValues(); assertEquals(res.length, tmp.length); assertTrue(tmp.length > 0); @@ -374,7 +374,6 @@ public class CSVParserTest extends TestC for (int codeIndex = 0; codeIndex < codes.length; codeIndex++) { code = codes[codeIndex]; CSVParser parser = new CSVParser(new StringReader(code)); - parser.setStrategy(CSVStrategy.DEFAULT_STRATEGY); String[][] tmp = parser.getAllValues(); assertEquals(res.length, tmp.length); assertTrue(tmp.length > 0); @@ -457,6 +456,61 @@ public class CSVParserTest extends TestC } } + public void testBackslashEscaping2() throws IOException { + + // To avoid confusion over the need for escaping chars in java code, + // We will test with a forward slash as the escape char, and a single + // quote as the encapsulator. + + String code = "" + + " , , \n" // 1) + + " \t , , \n" // 2) + + " // , /, , /,\n" // 3) + + ""; + String[][] res = { + { " ", " ", " " }, // 1 + { " \t ", " ", " " }, // 2 + { " / ", " , ", " ," }, //3 + }; + + + CSVStrategy strategy = new CSVStrategy(',',CSVStrategy.ENCAPSULATOR_DISABLED,CSVStrategy.COMMENTS_DISABLED,'/',false,false,true,true); + + CSVParser parser = new CSVParser(new StringReader(code), strategy); + String[][] tmp = parser.getAllValues(); + assertTrue(tmp.length > 0); + + if (!CSVPrinterTest.equals(res, tmp)) { + assertTrue(false); + } + + } + + + public void testDefaultStrategy() throws IOException { + + String code = "" + + "a,b\n" // 1) + + "\"\n\",\" \"\n" // 2) + + "\"\",#\n" // 2) + ; + String[][] res = { + { "a", "b" }, + { "\n", " " }, + { "", "#" }, // WARNING: TODO: this causes a hang if comments are enabled + }; + + CSVStrategy strategy = CSVStrategy.DEFAULT_STRATEGY; + assertEquals(CSVStrategy.COMMENTS_DISABLED, strategy.getCommentStart()); + + CSVParser parser = new CSVParser(new StringReader(code), strategy); + String[][] tmp = parser.getAllValues(); + assertTrue(tmp.length > 0); + + if (!CSVPrinterTest.equals(res, tmp)) { + assertTrue(false); + } + } public void testUnicodeEscape() throws IOException { @@ -502,8 +556,7 @@ public class CSVParserTest extends TestC // From SANDBOX-153 public void testDelimiterIsWhitespace() throws IOException { String code = "one\ttwo\t\tfour \t five\t six"; - TestCSVParser parser = new TestCSVParser(new StringReader(code)); - parser.setStrategy(CSVStrategy.TDF_STRATEGY); + TestCSVParser parser = new TestCSVParser(new StringReader(code), CSVStrategy.TDF_STRATEGY); assertEquals(CSVParser.TT_TOKEN + ";one;", parser.testNextToken()); assertEquals(CSVParser.TT_TOKEN + ";two;", parser.testNextToken()); assertEquals(CSVParser.TT_TOKEN + ";;", parser.testNextToken()); Modified: commons/sandbox/csv/trunk/src/test/org/apache/commons/csv/CSVPrinterTest.java URL: http://svn.apache.org/viewvc/commons/sandbox/csv/trunk/src/test/org/apache/commons/csv/CSVPrinterTest.java?rev=955284&r1=955283&r2=955284&view=diff ============================================================================== --- commons/sandbox/csv/trunk/src/test/org/apache/commons/csv/CSVPrinterTest.java (original) +++ commons/sandbox/csv/trunk/src/test/org/apache/commons/csv/CSVPrinterTest.java Wed Jun 16 16:12:34 2010 @@ -16,7 +16,12 @@ */ package org.apache.commons.csv; +import java.io.IOException; +import java.io.StringReader; import java.io.StringWriter; +import java.util.Arrays; +import java.util.Random; + import junit.framework.Test; import junit.framework.TestCase; import junit.framework.TestSuite; @@ -26,48 +31,161 @@ import junit.framework.TestSuite; */ public class CSVPrinterTest extends TestCase { - String lineSeparator = System.getProperty("line.separator"); + String lineSeparator = "\n"; - public void testPrinter1() { + public void testPrinter1() throws IOException { StringWriter sw = new StringWriter(); - CSVPrinter printer = new CSVPrinter(sw); + CSVPrinter printer = new CSVPrinter(sw, CSVStrategy.DEFAULT_STRATEGY); String[] line1 = {"a", "b"}; printer.println(line1); assertEquals("a,b" + lineSeparator, sw.toString()); } - public void testPrinter2() { + public void testPrinter2() throws IOException { StringWriter sw = new StringWriter(); - CSVPrinter printer = new CSVPrinter(sw); + CSVPrinter printer = new CSVPrinter(sw, CSVStrategy.DEFAULT_STRATEGY); String[] line1 = {"a,b", "b"}; printer.println(line1); assertEquals("\"a,b\",b" + lineSeparator, sw.toString()); } - public void testPrinter3() { + public void testPrinter3() throws IOException { StringWriter sw = new StringWriter(); - CSVPrinter printer = new CSVPrinter(sw); + CSVPrinter printer = new CSVPrinter(sw, CSVStrategy.DEFAULT_STRATEGY); String[] line1 = {"a, b", "b "}; printer.println(line1); assertEquals("\"a, b\",\"b \"" + lineSeparator, sw.toString()); } - public void testExcelPrinter1() { + public void testExcelPrinter1() throws IOException { StringWriter sw = new StringWriter(); - CSVPrinter printer = new CSVPrinter(sw); - printer.setStrategy(CSVStrategy.EXCEL_STRATEGY); + CSVPrinter printer = new CSVPrinter(sw, CSVStrategy.EXCEL_STRATEGY); String[] line1 = {"a", "b"}; printer.println(line1); assertEquals("a,b" + lineSeparator, sw.toString()); } - public void testExcelPrinter2() { + public void testExcelPrinter2() throws IOException { StringWriter sw = new StringWriter(); - CSVPrinter printer = new CSVPrinter(sw); - printer.setStrategy(CSVStrategy.EXCEL_STRATEGY); + CSVPrinter printer = new CSVPrinter(sw, CSVStrategy.EXCEL_STRATEGY); String[] line1 = {"a,b", "b"}; printer.println(line1); assertEquals("\"a,b\",b" + lineSeparator, sw.toString()); } + + + public void testRandom() throws Exception { + int iter=10000; + strategy = CSVStrategy.DEFAULT_STRATEGY; + doRandom(iter); + strategy = CSVStrategy.EXCEL_STRATEGY; + doRandom(iter); + + // Strategy for MySQL + strategy = new CSVStrategy('\t', CSVStrategy.ENCAPSULATOR_DISABLED, CSVStrategy.COMMENTS_DISABLED,'\\',false, false, false, false); + doRandom(iter); + } + + Random r = new Random(); + CSVStrategy strategy; + + public void doRandom(int iter) throws Exception { + for (int i=0; i<iter; i++) { + doOneRandom(); + } + } + + public void doOneRandom() throws Exception { + int nLines = r.nextInt(4)+1; + int nCol = r.nextInt(3)+1; + // nLines=1;nCol=2; + String[][] lines = new String[nLines][]; + for (int i=0; i<nLines; i++) { + String[] line = new String[nCol]; + lines[i] = line; + for (int j=0; j<nCol; j++) { + line[j] = randStr(); + } + } + + StringWriter sw = new StringWriter(); + CSVPrinter printer = new CSVPrinter(sw, strategy); + + for (int i=0; i<nLines; i++) { + // for (int j=0; j<lines[i].length; j++) System.out.println("### VALUE=:" + printable(lines[i][j])); + printer.println(lines[i]); + } + + printer.flush(); + String result = sw.toString(); + // System.out.println("### :" + printable(result)); + + StringReader reader = new StringReader(result); + + CSVParser parser = new CSVParser(reader, strategy); + String[][] parseResult = parser.getAllValues(); + + if (!equals(lines, parseResult)) { + System.out.println("Printer output :" + printable(result)); + assertTrue(false); + } + } + + public static boolean equals(String[][] a, String[][] b) { + for (int i=0; i<a.length; i++) { + String[] linea = a[i]; + String[] lineb = b[i]; + for (int j=0; j<linea.length; j++) { + String aval = linea[j]; + String bval = lineb[j]; + if (!aval.equals(bval)) { + System.out.println("expected :" + printable(aval)); + System.out.println("got :" + printable(bval)); + return false; + } + } + } + return true; + } + + public static String printable(String s) { + StringBuffer sb = new StringBuffer(); + for (int i=0; i<s.length(); i++) { + char ch = s.charAt(i); + if (ch<=' ' || ch>=128) { + sb.append("(" + (int)ch + ")"); + } else { + sb.append(ch); + } + } + return sb.toString(); + } + + public String randStr() { + int sz = r.nextInt(20); + // sz = r.nextInt(3); + char[] buf = new char[sz]; + for (int i=0; i<sz; i++) { + // stick in special chars with greater frequency + char ch; + int what = r.nextInt(20); + switch (what) { + case 0: ch = '\r'; break; + case 1: ch = '\n'; break; + case 2: ch = '\t'; break; + case 3: ch = '\f'; break; + case 4: ch = ' '; break; + case 5: ch = ','; break; + case 6: ch = '"'; break; + case 7: ch = '\''; break; + case 8: ch = '\\'; break; + default: ch = (char)r.nextInt(300); break; + // default: ch = 'a'; break; + } + buf[i] = ch; + } + return new String(buf); + } + } Modified: commons/sandbox/csv/trunk/src/test/org/apache/commons/csv/CSVStrategyTest.java URL: http://svn.apache.org/viewvc/commons/sandbox/csv/trunk/src/test/org/apache/commons/csv/CSVStrategyTest.java?rev=955284&r1=955283&r2=955284&view=diff ============================================================================== --- commons/sandbox/csv/trunk/src/test/org/apache/commons/csv/CSVStrategyTest.java (original) +++ commons/sandbox/csv/trunk/src/test/org/apache/commons/csv/CSVStrategyTest.java Wed Jun 16 16:12:34 2010 @@ -35,8 +35,7 @@ public class CSVStrategyTest extends Tes // getters / setters // ====================================================== public void testGetSetCommentStart() { - CSVParser parser = new CSVParser(new StringReader("hello world")); - CSVStrategy strategy = parser.getStrategy(); + CSVStrategy strategy = (CSVStrategy)CSVStrategy.DEFAULT_STRATEGY.clone(); strategy.setCommentStart('#'); assertEquals(strategy.getCommentStart(), '#'); strategy.setCommentStart('!'); @@ -44,8 +43,7 @@ public class CSVStrategyTest extends Tes } public void testGetSetEncapsulator() { - CSVParser parser = new CSVParser(new StringReader("hello world")); - CSVStrategy strategy = parser.getStrategy(); + CSVStrategy strategy = (CSVStrategy)CSVStrategy.DEFAULT_STRATEGY.clone(); strategy.setEncapsulator('"'); assertEquals(strategy.getEncapsulator(), '"'); strategy.setEncapsulator('\''); @@ -53,8 +51,7 @@ public class CSVStrategyTest extends Tes } public void testGetSetDelimiter() { - CSVParser parser = new CSVParser(new StringReader("hello world")); - CSVStrategy strategy = parser.getStrategy(); + CSVStrategy strategy = (CSVStrategy)CSVStrategy.DEFAULT_STRATEGY.clone(); strategy.setDelimiter(';'); assertEquals(strategy.getDelimiter(), ';'); strategy.setDelimiter(','); @@ -64,8 +61,7 @@ public class CSVStrategyTest extends Tes } public void testSetCSVStrategy() { - CSVParser parser = new CSVParser(new StringReader("hello world")); - CSVStrategy strategy = parser.getStrategy(); + CSVStrategy strategy = CSVStrategy.DEFAULT_STRATEGY; // default settings assertEquals(strategy.getDelimiter(), ','); assertEquals(strategy.getEncapsulator(), '"'); @@ -74,7 +70,6 @@ public class CSVStrategyTest extends Tes assertEquals(false, strategy.getUnicodeEscapeInterpretation()); assertEquals(true, strategy.getIgnoreEmptyLines()); // explicit csv settings - parser.setStrategy(CSVStrategy.DEFAULT_STRATEGY); assertEquals(strategy.getDelimiter(), ','); assertEquals(strategy.getEncapsulator(), '"'); assertEquals(strategy.getCommentStart(), CSVStrategy.COMMENTS_DISABLED);