This is an automated email from the ASF dual-hosted git repository.

ggregory pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/commons-csv.git


The following commit(s) were added to refs/heads/master by this push:
     new c15a06e  CSV-288 Fix for multi-char delimiter not working as expected 
(#218)
c15a06e is described below

commit c15a06eee95e227fbe5c61849dd12b7bf126016a
Author: Angus <angus...@gmail.com>
AuthorDate: Sun Feb 20 00:53:11 2022 +0800

    CSV-288 Fix for multi-char delimiter not working as expected (#218)
    
    When checking if previous token is delimiter, isDelimiter(lastChar) 
unintentionally advance the buffer pointer. Also isDelimiter(lastChar) cannot 
handle multi-char delimiter. To fix this, create a new indicator 
isLastTokenDelimiter instead of using isDelimiter(lastChar), the indicator is 
set/reset in isDelimiter()
---
 src/main/java/org/apache/commons/csv/Lexer.java    |  11 +-
 .../apache/commons/csv/issues/JiraCsv288Test.java  | 230 +++++++++++++++++++++
 2 files changed, 238 insertions(+), 3 deletions(-)

diff --git a/src/main/java/org/apache/commons/csv/Lexer.java 
b/src/main/java/org/apache/commons/csv/Lexer.java
index edd9576..f424039 100644
--- a/src/main/java/org/apache/commons/csv/Lexer.java
+++ b/src/main/java/org/apache/commons/csv/Lexer.java
@@ -62,6 +62,8 @@ final class Lexer implements Closeable {
     private final ExtendedBufferedReader reader;
     private String firstEol;
 
+    private boolean isLastTokenDelimiter;
+
     Lexer(final CSVFormat format, final ExtendedBufferedReader reader) {
         this.reader = reader;
         this.delimiter = format.getDelimiterString().toCharArray();
@@ -124,11 +126,13 @@ final class Lexer implements Closeable {
      * @throws IOException If an I/O error occurs.
      */
     boolean isDelimiter(final int ch) throws IOException {
+        isLastTokenDelimiter = false;
         if (ch != delimiter[0]) {
             return false;
         }
         if (delimiter.length == 1) {
-          return true;
+            isLastTokenDelimiter = true;
+            return true;
         }
         reader.lookAhead(delimiterBuf);
         for (int i = 0; i < delimiterBuf.length; i++) {
@@ -137,7 +141,8 @@ final class Lexer implements Closeable {
             }
         }
         final int count = reader.read(delimiterBuf, 0, delimiterBuf.length);
-        return count != END_OF_STREAM;
+        isLastTokenDelimiter = count != END_OF_STREAM;
+        return isLastTokenDelimiter;
     }
 
     /**
@@ -243,7 +248,7 @@ final class Lexer implements Closeable {
         }
 
         // did we reach eof during the last iteration already ? EOF
-        if (isEndOfFile(lastChar) || !isDelimiter(lastChar) && isEndOfFile(c)) 
{
+        if (isEndOfFile(lastChar) || !isLastTokenDelimiter && isEndOfFile(c)) {
             token.type = EOF;
             // don't set token.isReady here because no content
             return token;
diff --git a/src/test/java/org/apache/commons/csv/issues/JiraCsv288Test.java 
b/src/test/java/org/apache/commons/csv/issues/JiraCsv288Test.java
new file mode 100644
index 0000000..354f305
--- /dev/null
+++ b/src/test/java/org/apache/commons/csv/issues/JiraCsv288Test.java
@@ -0,0 +1,230 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.commons.csv.issues;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+
+import java.io.Reader;
+import java.io.StringReader;
+
+import org.apache.commons.csv.CSVFormat;
+import org.apache.commons.csv.CSVParser;
+import org.apache.commons.csv.CSVPrinter;
+import org.apache.commons.csv.CSVRecord;
+import org.junit.jupiter.api.Test;
+
+public class JiraCsv288Test {
+    @Test
+    // Before fix:
+    // expected: <a,b,c,d,,f> but was: <a,b|c,d,|f>
+    public void testParseWithDoublePipeDelimiter() throws Exception {
+        final Reader in = new StringReader("a||b||c||d||||f");
+        StringBuilder stringBuilder = new StringBuilder();
+        try (CSVPrinter csvPrinter = new CSVPrinter(stringBuilder, 
CSVFormat.EXCEL);
+                CSVParser csvParser = CSVParser.parse(in, 
CSVFormat.Builder.create().setDelimiter("||").build())) {
+            for (CSVRecord csvRecord : csvParser) {
+                for (int i = 0; i < csvRecord.size(); i++) {
+                    csvPrinter.print(csvRecord.get(i));
+                }
+                assertEquals("a,b,c,d,,f", stringBuilder.toString());
+            }
+        }
+    }
+
+    @Test
+    // Before fix:
+    // expected: <a,b,c,d,,f> but was: <a,b|c,d,|f>
+    public void testParseWithTriplePipeDelimiter() throws Exception {
+        final Reader in = new StringReader("a|||b|||c|||d||||||f");
+        StringBuilder stringBuilder = new StringBuilder();
+        try (CSVPrinter csvPrinter = new CSVPrinter(stringBuilder, 
CSVFormat.EXCEL);
+                CSVParser csvParser = CSVParser.parse(in, 
CSVFormat.Builder.create().setDelimiter("|||").build())) {
+            for (CSVRecord csvRecord : csvParser) {
+                for (int i = 0; i < csvRecord.size(); i++) {
+                    csvPrinter.print(csvRecord.get(i));
+                }
+                assertEquals("a,b,c,d,,f", stringBuilder.toString());
+            }
+        }
+    }
+
+    @Test
+    // Before fix:
+    // expected: <a,b,c,d,,f> but was: <a,b,c,d,|f>
+    public void testParseWithABADelimiter() throws Exception {
+        final Reader in = new StringReader("a|~|b|~|c|~|d|~||~|f");
+        StringBuilder stringBuilder = new StringBuilder();
+        try (CSVPrinter csvPrinter = new CSVPrinter(stringBuilder, 
CSVFormat.EXCEL);
+                CSVParser csvParser = CSVParser.parse(in, 
CSVFormat.Builder.create().setDelimiter("|~|").build())) {
+            for (CSVRecord csvRecord : csvParser) {
+                for (int i = 0; i < csvRecord.size(); i++) {
+                    csvPrinter.print(csvRecord.get(i));
+                }
+                assertEquals("a,b,c,d,,f", stringBuilder.toString());
+            }
+        }
+    }
+
+    @Test
+    // Before fix:
+    // expected: <a,b||c,d,,f> but was: <a,b||c,d,|f>
+    public void testParseWithDoublePipeDelimiterQuoted() throws Exception {
+        final Reader in = new StringReader("a||\"b||c\"||d||||f");
+        StringBuilder stringBuilder = new StringBuilder();
+        try (CSVPrinter csvPrinter = new CSVPrinter(stringBuilder, 
CSVFormat.EXCEL);
+                CSVParser csvParser = CSVParser.parse(in, 
CSVFormat.Builder.create().setDelimiter("||").build())) {
+            for (CSVRecord csvRecord : csvParser) {
+                for (int i = 0; i < csvRecord.size(); i++) {
+                    csvPrinter.print(csvRecord.get(i));
+                }
+                assertEquals("a,b||c,d,,f", stringBuilder.toString());
+            }
+        }
+    }
+
+    @Test
+    // Before fix:
+    // expected: <a,b,c,d,,f,> but was: <a,b|c,d,|f>
+    public void testParseWithDoublePipeDelimiterEndsWithDelimiter() throws 
Exception {
+        final Reader in = new StringReader("a||b||c||d||||f||");
+        StringBuilder stringBuilder = new StringBuilder();
+        try (CSVPrinter csvPrinter = new CSVPrinter(stringBuilder, 
CSVFormat.EXCEL);
+                CSVParser csvParser = CSVParser.parse(in, 
CSVFormat.Builder.create().setDelimiter("||").build())) {
+            for (CSVRecord csvRecord : csvParser) {
+                for (int i = 0; i < csvRecord.size(); i++) {
+                    csvPrinter.print(csvRecord.get(i));
+                }
+                assertEquals("a,b,c,d,,f,", stringBuilder.toString());
+            }
+        }
+    }
+
+    @Test
+    // Before fix:
+    // expected: <a,b,c,d,,f,> but was: <a,b,c,d,,f>
+    public void testParseWithTwoCharDelimiterEndsWithDelimiter() throws 
Exception {
+        final Reader in = new StringReader("a~|b~|c~|d~|~|f~|");
+        StringBuilder stringBuilder = new StringBuilder();
+        try (CSVPrinter csvPrinter = new CSVPrinter(stringBuilder, 
CSVFormat.EXCEL);
+                CSVParser csvParser = CSVParser.parse(in, 
CSVFormat.Builder.create().setDelimiter("~|").build())) {
+            for (CSVRecord csvRecord : csvParser) {
+                for (int i = 0; i < csvRecord.size(); i++) {
+                    csvPrinter.print(csvRecord.get(i));
+                }
+                assertEquals("a,b,c,d,,f,", stringBuilder.toString());
+            }
+        }
+    }
+
+    @Test
+    // Regression, already passed before fix
+
+    public void testParseWithDoublePipeDelimiterDoubleCharValue() throws 
Exception {
+        final Reader in = new StringReader("a||bb||cc||dd||f");
+        StringBuilder stringBuilder = new StringBuilder();
+        try (CSVPrinter csvPrinter = new CSVPrinter(stringBuilder, 
CSVFormat.EXCEL);
+                CSVParser csvParser = CSVParser.parse(in, 
CSVFormat.Builder.create().setDelimiter("||").build())) {
+            for (CSVRecord csvRecord : csvParser) {
+                for (int i = 0; i < csvRecord.size(); i++) {
+                    csvPrinter.print(csvRecord.get(i));
+                }
+                assertEquals("a,bb,cc,dd,f", stringBuilder.toString());
+            }
+        }
+    }
+
+    @Test
+    // Regression, already passed before fix
+    public void testParseWithTwoCharDelimiter1() throws Exception {
+        final Reader in = new StringReader("a~|b~|c~|d~|~|f");
+        StringBuilder stringBuilder = new StringBuilder();
+        try (CSVPrinter csvPrinter = new CSVPrinter(stringBuilder, 
CSVFormat.EXCEL);
+                CSVParser csvParser = CSVParser.parse(in, 
CSVFormat.Builder.create().setDelimiter("~|").build())) {
+            for (CSVRecord csvRecord : csvParser) {
+                for (int i = 0; i < csvRecord.size(); i++) {
+                    csvPrinter.print(csvRecord.get(i));
+                }
+                assertEquals("a,b,c,d,,f", stringBuilder.toString());
+            }
+        }
+    }
+
+    @Test
+    // Regression, already passed before fix
+    public void testParseWithTwoCharDelimiter2() throws Exception {
+        final Reader in = new StringReader("a~|b~|c~|d~|~|f~");
+        StringBuilder stringBuilder = new StringBuilder();
+        try (CSVPrinter csvPrinter = new CSVPrinter(stringBuilder, 
CSVFormat.EXCEL);
+                CSVParser csvParser = CSVParser.parse(in, 
CSVFormat.Builder.create().setDelimiter("~|").build())) {
+            for (CSVRecord csvRecord : csvParser) {
+                for (int i = 0; i < csvRecord.size(); i++) {
+                    csvPrinter.print(csvRecord.get(i));
+                }
+                assertEquals("a,b,c,d,,f~", stringBuilder.toString());
+            }
+        }
+    }
+
+    @Test
+    // Regression, already passed before fix
+    public void testParseWithTwoCharDelimiter3() throws Exception {
+        final Reader in = new StringReader("a~|b~|c~|d~|~|f|");
+        StringBuilder stringBuilder = new StringBuilder();
+        try (CSVPrinter csvPrinter = new CSVPrinter(stringBuilder, 
CSVFormat.EXCEL);
+                CSVParser csvParser = CSVParser.parse(in, 
CSVFormat.Builder.create().setDelimiter("~|").build())) {
+            for (CSVRecord csvRecord : csvParser) {
+                for (int i = 0; i < csvRecord.size(); i++) {
+                    csvPrinter.print(csvRecord.get(i));
+                }
+                assertEquals("a,b,c,d,,f|", stringBuilder.toString());
+            }
+        }
+    }
+
+    @Test
+    // Regression, already passed before fix
+    public void testParseWithTwoCharDelimiter4() throws Exception {
+        final Reader in = new StringReader("a~|b~|c~|d~|~|f~~||g");
+        StringBuilder stringBuilder = new StringBuilder();
+        try (CSVPrinter csvPrinter = new CSVPrinter(stringBuilder, 
CSVFormat.EXCEL);
+                CSVParser csvParser = CSVParser.parse(in, 
CSVFormat.Builder.create().setDelimiter("~|").build())) {
+            for (CSVRecord csvRecord : csvParser) {
+                for (int i = 0; i < csvRecord.size(); i++) {
+                    csvPrinter.print(csvRecord.get(i));
+                }
+                assertEquals("a,b,c,d,,f~,|g", stringBuilder.toString());
+            }
+        }
+    }
+
+    @Test
+    // Regression, already passed before fix
+    public void testParseWithSinglePipeDelimiterEndsWithDelimiter() throws 
Exception {
+        final Reader in = new StringReader("a|b|c|d||f|");
+        StringBuilder stringBuilder = new StringBuilder();
+        try (CSVPrinter csvPrinter = new CSVPrinter(stringBuilder, 
CSVFormat.EXCEL);
+                CSVParser csvParser = CSVParser.parse(in, 
CSVFormat.Builder.create().setDelimiter("|").build())) {
+            for (CSVRecord csvRecord : csvParser) {
+                for (int i = 0; i < csvRecord.size(); i++) {
+                    csvPrinter.print(csvRecord.get(i));
+                }
+                assertEquals("a,b,c,d,,f,", stringBuilder.toString());
+            }
+        }
+    }
+}
\ No newline at end of file

Reply via email to