Author: yonik
Date: Sun Jan 6 07:13:09 2008
New Revision: 609327
URL: http://svn.apache.org/viewvc?rev=609327&view=rev
Log:
SANDBOX-206: fix whitespace handling w/ escaping, add an option to not remove
trailing whitespace
Modified:
commons/sandbox/csv/trunk/src/java/org/apache/commons/csv/CSVParser.java
commons/sandbox/csv/trunk/src/java/org/apache/commons/csv/CSVStrategy.java
commons/sandbox/csv/trunk/src/java/org/apache/commons/csv/CharBuffer.java
commons/sandbox/csv/trunk/src/test/org/apache/commons/csv/CSVParserTest.java
Modified:
commons/sandbox/csv/trunk/src/java/org/apache/commons/csv/CSVParser.java
URL:
http://svn.apache.org/viewvc/commons/sandbox/csv/trunk/src/java/org/apache/commons/csv/CSVParser.java?rev=609327&r1=609326&r2=609327&view=diff
==============================================================================
--- commons/sandbox/csv/trunk/src/java/org/apache/commons/csv/CSVParser.java
(original)
+++ commons/sandbox/csv/trunk/src/java/org/apache/commons/csv/CSVParser.java
Sun Jan 6 07:13:09 2008
@@ -399,47 +399,39 @@
* @throws IOException on stream access error
*/
private Token simpleTokenLexer(Token tkn, int c) throws IOException {
- wsBuf.clear();
for (;;) {
if (isEndOfLine(c)) {
// end of record
tkn.type = TT_EORECORD;
tkn.isReady = true;
- return tkn;
+ break;
} else if (isEndOfFile(c)) {
// end of file
tkn.type = TT_EOF;
tkn.isReady = true;
- return tkn;
+ break;
} else if (c == strategy.getDelimiter()) {
// end of token
tkn.type = TT_TOKEN;
tkn.isReady = true;
- return tkn;
+ break;
} else if (c == '\\' && strategy.getUnicodeEscapeInterpretation() &&
in.lookAhead() == 'u') {
// interpret unicode escaped chars (like \u0070 -> p)
tkn.content.append((char) unicodeEscapeLexer(c));
- } else if (isWhitespace(c)) {
- // gather whitespaces
- // (as long as they are not at the beginning of a token)
- if (tkn.content.length() > 0) {
- wsBuf.append((char) c);
- }
} else if (c == strategy.getEscape()) {
tkn.content.append((char)readEscape(c));
} else {
- // prepend whitespaces (if we have)
- if (wsBuf.length() > 0) {
- tkn.content.append(wsBuf);
- wsBuf.clear();
- }
tkn.content.append((char) c);
}
- // get the next char
- if (!tkn.isReady) {
- c = in.read();
- }
+
+ c = in.read();
+ }
+
+ if (strategy.getIgnoreTrailingWhitespaces()) {
+ tkn.content.trimTrailingWhitespace();
}
+
+ return tkn;
}
Modified:
commons/sandbox/csv/trunk/src/java/org/apache/commons/csv/CSVStrategy.java
URL:
http://svn.apache.org/viewvc/commons/sandbox/csv/trunk/src/java/org/apache/commons/csv/CSVStrategy.java?rev=609327&r1=609326&r2=609327&view=diff
==============================================================================
--- commons/sandbox/csv/trunk/src/java/org/apache/commons/csv/CSVStrategy.java
(original)
+++ commons/sandbox/csv/trunk/src/java/org/apache/commons/csv/CSVStrategy.java
Sun Jan 6 07:13:09 2008
@@ -30,6 +30,7 @@
private char commentStart;
private char escape;
private boolean ignoreLeadingWhitespaces;
+ private boolean ignoreTrailingWhitespaces;
private boolean interpretUnicodeEscapes;
private boolean ignoreEmptyLines;
@@ -40,9 +41,9 @@
public static char COMMENTS_DISABLED = (char)-2;
public static char ESCAPE_DISABLED = (char)-2;
- public static CSVStrategy DEFAULT_STRATEGY = new CSVStrategy(',', '"',
COMMENTS_DISABLED, ESCAPE_DISABLED, true, false, true);
- public static CSVStrategy EXCEL_STRATEGY = new CSVStrategy(',', '"',
COMMENTS_DISABLED, ESCAPE_DISABLED, false, false, false);
- public static CSVStrategy TDF_STRATEGY = new CSVStrategy(' ', '"',
COMMENTS_DISABLED, ESCAPE_DISABLED, true, false, true);
+ public static CSVStrategy DEFAULT_STRATEGY = new CSVStrategy(',', '"',
COMMENTS_DISABLED, ESCAPE_DISABLED, true, true, false, true);
+ public static CSVStrategy EXCEL_STRATEGY = new CSVStrategy(',', '"',
COMMENTS_DISABLED, ESCAPE_DISABLED, false, false, false, false);
+ public static CSVStrategy TDF_STRATEGY = new CSVStrategy(' ', '"',
COMMENTS_DISABLED, ESCAPE_DISABLED, true, true, false, true);
public CSVStrategy(char delimiter, char encapsulator, char commentStart) {
@@ -67,6 +68,7 @@
char commentStart,
char escape,
boolean ignoreLeadingWhitespace,
+ boolean ignoreTrailingWhitespace,
boolean interpretUnicodeEscapes,
boolean ignoreEmptyLines)
{
@@ -75,6 +77,7 @@
setCommentStart(commentStart);
setEscape(escape);
setIgnoreLeadingWhitespaces(ignoreLeadingWhitespace);
+ setIgnoreTrailingWhitespaces(ignoreTrailingWhitespace);
setUnicodeEscapeInterpretation(interpretUnicodeEscapes);
setIgnoreEmptyLines(ignoreEmptyLines);
}
@@ -88,7 +91,7 @@
boolean interpretUnicodeEscapes,
boolean ignoreEmptyLines)
{
-
this(delimiter,encapsulator,commentStart,CSVStrategy.ESCAPE_DISABLED,ignoreLeadingWhitespace,interpretUnicodeEscapes,ignoreEmptyLines);
+
this(delimiter,encapsulator,commentStart,CSVStrategy.ESCAPE_DISABLED,ignoreLeadingWhitespace,true,interpretUnicodeEscapes,ignoreEmptyLines);
}
@@ -107,6 +110,9 @@
public void setIgnoreLeadingWhitespaces(boolean ignoreLeadingWhitespaces)
{ this.ignoreLeadingWhitespaces = ignoreLeadingWhitespaces; }
public boolean getIgnoreLeadingWhitespaces() { return
this.ignoreLeadingWhitespaces; }
+
+ public void setIgnoreTrailingWhitespaces(boolean
ignoreTrailingWhitespaces) { this.ignoreTrailingWhitespaces =
ignoreTrailingWhitespaces; }
+ public boolean getIgnoreTrailingWhitespaces() { return
this.ignoreTrailingWhitespaces; }
public void setUnicodeEscapeInterpretation(boolean
interpretUnicodeEscapes) { this.interpretUnicodeEscapes =
interpretUnicodeEscapes; }
public boolean getUnicodeEscapeInterpretation() { return
this.interpretUnicodeEscapes; }
Modified:
commons/sandbox/csv/trunk/src/java/org/apache/commons/csv/CharBuffer.java
URL:
http://svn.apache.org/viewvc/commons/sandbox/csv/trunk/src/java/org/apache/commons/csv/CharBuffer.java?rev=609327&r1=609326&r2=609327&view=diff
==============================================================================
--- commons/sandbox/csv/trunk/src/java/org/apache/commons/csv/CharBuffer.java
(original)
+++ commons/sandbox/csv/trunk/src/java/org/apache/commons/csv/CharBuffer.java
Sun Jan 6 07:13:09 2008
@@ -24,7 +24,7 @@
* grows as necessary.
* This class is not thread safe.
*
- * @author Ortwin Glück
+ * @author Ortwin Gl�ck
*/
public class CharBuffer {
private char[] c;
@@ -65,7 +65,7 @@
public int length() {
return length;
}
-
+
/**
* Returns the current capacity of the buffer.
* @return the maximum number of characters that can be stored in this
buffer without
@@ -74,6 +74,7 @@
public int capacity() {
return c.length;
}
+
/**
* Appends the contents of <code>cb</code> to the end of this CharBuffer.
@@ -142,6 +143,15 @@
c = newc;
}
+ /**
+ * Removes trailing whitespace.
+ */
+ public void trimTrailingWhitespace() {
+ while (length>0 && Character.isWhitespace(c[length-1])) {
+ length--;
+ }
+ }
+
/**
* Returns the contents of the buffer as a char[]. The returned array may
* be the internal array of the buffer, so the caller must take care when
@@ -156,7 +166,14 @@
System.arraycopy(c, 0, chars, 0, length);
return chars;
}
-
+
+ /**
+ * Returns the character at the specified position.
+ */
+ public char charAt(int pos) {
+ return c[pos];
+ }
+
/**
* Converts the contents of the buffer into a StringBuffer.
* This method involves copying the new data once!
Modified:
commons/sandbox/csv/trunk/src/test/org/apache/commons/csv/CSVParserTest.java
URL:
http://svn.apache.org/viewvc/commons/sandbox/csv/trunk/src/test/org/apache/commons/csv/CSVParserTest.java?rev=609327&r1=609326&r2=609327&view=diff
==============================================================================
---
commons/sandbox/csv/trunk/src/test/org/apache/commons/csv/CSVParserTest.java
(original)
+++
commons/sandbox/csv/trunk/src/test/org/apache/commons/csv/CSVParserTest.java
Sun Jan 6 07:13:09 2008
@@ -485,6 +485,8 @@
+ "/,,/,\n" // 5) separator escaped
+ "//,//\n" // 6) escape escaped
+ "'//','//'\n" // 7) escape escaped in encapsulation
+ + " 8 , \"quoted \"\" /\" // string\" \n" // don't eat spaces
+ + "9, /\n \n" // escaped newline
+ "";
String[][] res = {
{ "one", "two", "three" }, // 0
@@ -495,10 +497,12 @@
{ ",", "," }, // 5
{ "/", "/" }, // 6
{ "/", "/" }, // 7
+ { " 8 ", " \"quoted \"\" \" / string\" " },
+ { "9", " \n " },
};
- CSVStrategy strategy = new
CSVStrategy(',','\'',CSVStrategy.COMMENTS_DISABLED,'/',true,true,true);
+ CSVStrategy strategy = new
CSVStrategy(',','\'',CSVStrategy.COMMENTS_DISABLED,'/',false,false,true,true);
CSVParser parser = new CSVParser(new StringReader(code), strategy);
System.out.println("---------\n" + code + "\n-------------");
@@ -511,6 +515,7 @@
assertTrue(Arrays.equals(res[i], tmp[i]));
}
}
+
public void testUnicodeEscape() throws IOException {