CSVParserTest.java

yonik Sun, 06 Jan 2008 07:13:41 -0800

Author: yonik
Date: Sun Jan  6 07:13:09 2008
New Revision: 609327

URL: http://svn.apache.org/viewvc?rev=609327&view=rev
Log:
SANDBOX-206: fix whitespace handling w/ escaping, add an option to not remove 
trailing whitespace


Modified:
    commons/sandbox/csv/trunk/src/java/org/apache/commons/csv/CSVParser.java
    commons/sandbox/csv/trunk/src/java/org/apache/commons/csv/CSVStrategy.java
    commons/sandbox/csv/trunk/src/java/org/apache/commons/csv/CharBuffer.java
    commons/sandbox/csv/trunk/src/test/org/apache/commons/csv/CSVParserTest.java

Modified: 
commons/sandbox/csv/trunk/src/java/org/apache/commons/csv/CSVParser.java
URL: 
http://svn.apache.org/viewvc/commons/sandbox/csv/trunk/src/java/org/apache/commons/csv/CSVParser.java?rev=609327&r1=609326&r2=609327&view=diff
==============================================================================
--- commons/sandbox/csv/trunk/src/java/org/apache/commons/csv/CSVParser.java 
(original)
+++ commons/sandbox/csv/trunk/src/java/org/apache/commons/csv/CSVParser.java 
Sun Jan  6 07:13:09 2008
@@ -399,47 +399,39 @@
    * @throws IOException on stream access error
    */
   private Token simpleTokenLexer(Token tkn, int c) throws IOException {
-    wsBuf.clear();
     for (;;) {
       if (isEndOfLine(c)) {
         // end of record
         tkn.type = TT_EORECORD;
         tkn.isReady = true;
-        return tkn;
+        break;
       } else if (isEndOfFile(c)) {
         // end of file
         tkn.type = TT_EOF;
         tkn.isReady = true;
-        return tkn;
+        break;
       } else if (c == strategy.getDelimiter()) {
         // end of token
         tkn.type = TT_TOKEN;
         tkn.isReady = true;
-        return tkn;
+        break;
       } else if (c == '\\' && strategy.getUnicodeEscapeInterpretation() && 
in.lookAhead() == 'u') {
         // interpret unicode escaped chars (like \u0070 -> p)
         tkn.content.append((char) unicodeEscapeLexer(c));
-      } else if (isWhitespace(c)) {
-        // gather whitespaces 
-        // (as long as they are not at the beginning of a token)
-        if (tkn.content.length() > 0) {
-          wsBuf.append((char) c);
-        }
       } else if (c == strategy.getEscape()) {
         tkn.content.append((char)readEscape(c));
       } else {
-        // prepend whitespaces (if we have)
-        if (wsBuf.length() > 0) {
-          tkn.content.append(wsBuf);
-          wsBuf.clear();
-        }
         tkn.content.append((char) c);
       }
-      // get the next char
-      if (!tkn.isReady) {
-        c = in.read();
-      }
+      
+      c = in.read();
+    }
+
+    if (strategy.getIgnoreTrailingWhitespaces()) {
+      tkn.content.trimTrailingWhitespace();
     }
+
+    return tkn;
   }
   
   

Modified: 
commons/sandbox/csv/trunk/src/java/org/apache/commons/csv/CSVStrategy.java
URL: 
http://svn.apache.org/viewvc/commons/sandbox/csv/trunk/src/java/org/apache/commons/csv/CSVStrategy.java?rev=609327&r1=609326&r2=609327&view=diff
==============================================================================
--- commons/sandbox/csv/trunk/src/java/org/apache/commons/csv/CSVStrategy.java 
(original)
+++ commons/sandbox/csv/trunk/src/java/org/apache/commons/csv/CSVStrategy.java 
Sun Jan  6 07:13:09 2008
@@ -30,6 +30,7 @@
     private char commentStart;
     private char escape;
     private boolean ignoreLeadingWhitespaces;
+    private boolean ignoreTrailingWhitespaces;
     private boolean interpretUnicodeEscapes;
     private boolean ignoreEmptyLines;
 
@@ -40,9 +41,9 @@
     public static char COMMENTS_DISABLED       = (char)-2;
     public static char ESCAPE_DISABLED         = (char)-2;
 
-    public static CSVStrategy DEFAULT_STRATEGY = new CSVStrategy(',', '"', 
COMMENTS_DISABLED, ESCAPE_DISABLED, true,  false, true);
-    public static CSVStrategy EXCEL_STRATEGY   = new CSVStrategy(',', '"', 
COMMENTS_DISABLED, ESCAPE_DISABLED, false, false, false);
-    public static CSVStrategy TDF_STRATEGY     = new CSVStrategy('     ', '"', 
COMMENTS_DISABLED, ESCAPE_DISABLED, true,  false, true);
+    public static CSVStrategy DEFAULT_STRATEGY = new CSVStrategy(',', '"', 
COMMENTS_DISABLED, ESCAPE_DISABLED, true, true,  false, true);
+    public static CSVStrategy EXCEL_STRATEGY   = new CSVStrategy(',', '"', 
COMMENTS_DISABLED, ESCAPE_DISABLED, false, false, false, false);
+    public static CSVStrategy TDF_STRATEGY     = new CSVStrategy('     ', '"', 
COMMENTS_DISABLED, ESCAPE_DISABLED, true, true,  false, true);
 
 
     public CSVStrategy(char delimiter, char encapsulator, char commentStart) {
@@ -67,6 +68,7 @@
         char commentStart,
         char escape,
         boolean ignoreLeadingWhitespace, 
+        boolean ignoreTrailingWhitespace, 
         boolean interpretUnicodeEscapes,
         boolean ignoreEmptyLines) 
     {
@@ -75,6 +77,7 @@
         setCommentStart(commentStart);
         setEscape(escape);
         setIgnoreLeadingWhitespaces(ignoreLeadingWhitespace);
+        setIgnoreTrailingWhitespaces(ignoreTrailingWhitespace);
         setUnicodeEscapeInterpretation(interpretUnicodeEscapes);
         setIgnoreEmptyLines(ignoreEmptyLines);
     }
@@ -88,7 +91,7 @@
         boolean interpretUnicodeEscapes,
         boolean ignoreEmptyLines)
     {
-        
this(delimiter,encapsulator,commentStart,CSVStrategy.ESCAPE_DISABLED,ignoreLeadingWhitespace,interpretUnicodeEscapes,ignoreEmptyLines);
+        
this(delimiter,encapsulator,commentStart,CSVStrategy.ESCAPE_DISABLED,ignoreLeadingWhitespace,true,interpretUnicodeEscapes,ignoreEmptyLines);
     }
 
 
@@ -107,6 +110,9 @@
 
     public void setIgnoreLeadingWhitespaces(boolean ignoreLeadingWhitespaces) 
{ this.ignoreLeadingWhitespaces = ignoreLeadingWhitespaces; }
     public boolean getIgnoreLeadingWhitespaces() { return 
this.ignoreLeadingWhitespaces; }
+
+    public void setIgnoreTrailingWhitespaces(boolean 
ignoreTrailingWhitespaces) { this.ignoreTrailingWhitespaces = 
ignoreTrailingWhitespaces; }
+    public boolean getIgnoreTrailingWhitespaces() { return 
this.ignoreTrailingWhitespaces; }
 
     public void setUnicodeEscapeInterpretation(boolean 
interpretUnicodeEscapes) { this.interpretUnicodeEscapes = 
interpretUnicodeEscapes; }
     public boolean getUnicodeEscapeInterpretation() { return 
this.interpretUnicodeEscapes; }

Modified: 
commons/sandbox/csv/trunk/src/java/org/apache/commons/csv/CharBuffer.java
URL: 
http://svn.apache.org/viewvc/commons/sandbox/csv/trunk/src/java/org/apache/commons/csv/CharBuffer.java?rev=609327&r1=609326&r2=609327&view=diff
==============================================================================
--- commons/sandbox/csv/trunk/src/java/org/apache/commons/csv/CharBuffer.java 
(original)
+++ commons/sandbox/csv/trunk/src/java/org/apache/commons/csv/CharBuffer.java 
Sun Jan  6 07:13:09 2008
@@ -24,7 +24,7 @@
  * grows as necessary.
  * This class is not thread safe.
  * 
- * @author Ortwin Glück
+ * @author Ortwin Glï¿½ck
  */
 public class CharBuffer {
     private char[] c;
@@ -65,7 +65,7 @@
     public int length() {
         return length;
     }
-    
+
     /**
      * Returns the current capacity of the buffer.
      * @return the maximum number of characters that can be stored in this 
buffer without
@@ -74,6 +74,7 @@
     public int capacity() {
         return c.length;
     }
+
     
     /**
      * Appends the contents of <code>cb</code> to the end of this CharBuffer.
@@ -142,6 +143,15 @@
         c = newc;
     }
 
+   /**
+    * Removes trailing whitespace.
+    */
+    public void trimTrailingWhitespace() {
+      while (length>0 && Character.isWhitespace(c[length-1])) {
+        length--;
+      }
+    }
+
     /**
      * Returns the contents of the buffer as a char[]. The returned array may
      * be the internal array of the buffer, so the caller must take care when
@@ -156,7 +166,14 @@
         System.arraycopy(c, 0, chars, 0, length);
         return chars;
     }
-    
+
+   /**
+    * Returns the character at the specified position.
+    */
+    public char charAt(int pos) {
+      return c[pos];
+   }
+
     /**
      * Converts the contents of the buffer into a StringBuffer.
      * This method involves copying the new data once!

Modified: 
commons/sandbox/csv/trunk/src/test/org/apache/commons/csv/CSVParserTest.java
URL: 
http://svn.apache.org/viewvc/commons/sandbox/csv/trunk/src/test/org/apache/commons/csv/CSVParserTest.java?rev=609327&r1=609326&r2=609327&view=diff
==============================================================================
--- 
commons/sandbox/csv/trunk/src/test/org/apache/commons/csv/CSVParserTest.java 
(original)
+++ 
commons/sandbox/csv/trunk/src/test/org/apache/commons/csv/CSVParserTest.java 
Sun Jan  6 07:13:09 2008
@@ -485,6 +485,8 @@
       + "/,,/,\n"       // 5) separator escaped
       + "//,//\n"       // 6) escape escaped
       + "'//','//'\n"   // 7) escape escaped in encapsulation
+      + "   8   ,   \"quoted \"\" /\" // string\"   \n"     // don't eat spaces
+      + "9,   /\n   \n"  // escaped newline
       + "";
     String[][] res = {
         { "one", "two", "three" }, // 0
@@ -495,10 +497,12 @@
         { ",", "," },              // 5
         { "/", "/" },              // 6
         { "/", "/" },              // 7
+        { "   8   ", "   \"quoted \"\" \" / string\"   " },
+        { "9", "   \n   " },
       };
 
 
-    CSVStrategy strategy = new 
CSVStrategy(',','\'',CSVStrategy.COMMENTS_DISABLED,'/',true,true,true);
+    CSVStrategy strategy = new 
CSVStrategy(',','\'',CSVStrategy.COMMENTS_DISABLED,'/',false,false,true,true);
 
     CSVParser parser = new CSVParser(new StringReader(code), strategy);
     System.out.println("---------\n" + code + "\n-------------");
@@ -511,6 +515,7 @@
       assertTrue(Arrays.equals(res[i], tmp[i]));
     }
   }
+
 
 
     public void testUnicodeEscape() throws IOException {

svn commit: r609327 - in /commons/sandbox/csv/trunk/src: java/org/apache/commons/csv/CSVParser.java java/org/apache/commons/csv/CSVStrategy.java java/org/apache/commons/csv/CharBuffer.java test/org/apache/commons/csv/CSVParserTest.java

Reply via email to