[commons-csv] branch master updated: [CSV-265] Update buffer position when reading line comment (#120)

ggregory Wed, 21 Jul 2021 07:38:05 -0700

This is an automated email from the ASF dual-hosted git repository.

ggregory pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/commons-csv.git



The following commit(s) were added to refs/heads/master by this push:
     new a4c6037  [CSV-265] Update buffer position when reading line comment 
(#120)
a4c6037 is described below

commit a4c6037d0ccfc5b7176d6c522fcc0ab38b9bdd28
Author: king-tyler <8304181+king-ty...@users.noreply.github.com>
AuthorDate: Wed Jul 21 09:37:53 2021 -0500

    [CSV-265] Update buffer position when reading line comment (#120)
    
    * [CSV-265] Add JiraCsv265Test
    
    * [CSV-265] Update buffer position when reading line comment
    
    * Update JiraCsv265Test.java
    
    File should end in a new line.
    
    Co-authored-by: Tyler King <tylerking...@hotmail.com>
    Co-authored-by: Gary Gregory <garydgreg...@users.noreply.github.com>
---
 .../apache/commons/csv/ExtendedBufferedReader.java | 34 +++++----
 .../apache/commons/csv/issues/JiraCsv265Test.java  | 88 ++++++++++++++++++++++
 2 files changed, 109 insertions(+), 13 deletions(-)

diff --git a/src/main/java/org/apache/commons/csv/ExtendedBufferedReader.java 
b/src/main/java/org/apache/commons/csv/ExtendedBufferedReader.java
index 4b3fb48..1067dd2 100644
--- a/src/main/java/org/apache/commons/csv/ExtendedBufferedReader.java
+++ b/src/main/java/org/apache/commons/csv/ExtendedBufferedReader.java
@@ -198,29 +198,37 @@ final class ExtendedBufferedReader extends BufferedReader 
{
     }
 
     /**
-     * Calls {@link BufferedReader#readLine()} which drops the line 
terminator(s). This method should only be called
-     * when processing a comment, otherwise information can be lost.
+     * Gets the next line, dropping the line terminator(s). This method should 
only be called when processing a
+     * comment, otherwise information can be lost.
      * <p>
-     * Increments {@link #eolCounter}.
+     * Increments {@link #eolCounter} and updates {@link #position}.
      * </p>
      * <p>
-     * Sets {@link #lastChar} to {@link Constants#END_OF_STREAM} at EOF, 
otherwise to LF.
+     * Sets {@link #lastChar} to {@link Constants#END_OF_STREAM} at EOF, 
otherwise the last EOL character.
      * </p>
      *
      * @return the line that was read, or null if reached EOF.
      */
     @Override
     public String readLine() throws IOException {
-        final String line = super.readLine();
-
-        if (line != null) {
-            lastChar = LF; // needed for detecting start of line
-            eolCounter++;
-        } else {
-            lastChar = END_OF_STREAM;
+        if (lookAhead() == END_OF_STREAM) {
+            return null;
         }
-
-        return line;
+        final StringBuilder buffer = new StringBuilder();
+        while (true) {
+            final int current = read();
+            if (current == CR) {
+                final int next = lookAhead();
+                if (next == LF) {
+                    read();
+                }
+            }
+            if (current == END_OF_STREAM || current == LF || current == CR) {
+                break;
+            }
+            buffer.append((char) current);
+        }
+        return buffer.toString();
     }
 
 }
diff --git a/src/test/java/org/apache/commons/csv/issues/JiraCsv265Test.java 
b/src/test/java/org/apache/commons/csv/issues/JiraCsv265Test.java
new file mode 100644
index 0000000..f62b866
--- /dev/null
+++ b/src/test/java/org/apache/commons/csv/issues/JiraCsv265Test.java
@@ -0,0 +1,88 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.commons.csv.issues;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+
+import java.io.IOException;
+import java.io.StringReader;
+import java.util.Iterator;
+
+import org.apache.commons.csv.CSVFormat;
+import org.apache.commons.csv.CSVParser;
+import org.apache.commons.csv.CSVRecord;
+import org.junit.jupiter.api.Test;
+
+/**
+ * Tests [CSV-265] {@link CSVRecord#getCharacterPosition()} returns the 
correct position after encountering a comment.
+ */
+public class JiraCsv265Test {
+
+    @Test
+    public void testCharacterPositionWithComments() throws IOException {
+        // @formatter:off
+        final String csv = "# Comment1\n"
+                         + "Header1,Header2\n"
+                         + "# Comment2\n"
+                         + "Value1,Value2\n"
+                         + "# Comment3\n"
+                         + "Value3,Value4\n"
+                         + "# Comment4\n";
+        final CSVFormat csvFormat = CSVFormat.DEFAULT.builder()
+            .setCommentMarker('#')
+            .setHeader()
+            .setSkipHeaderRecord(true)
+            .build();
+        // @formatter:on
+        try (final CSVParser parser = csvFormat.parse(new StringReader(csv))) {
+            final Iterator<CSVRecord> itr = parser.iterator();
+            final CSVRecord record1 = itr.next();
+            assertEquals(csv.indexOf("# Comment2"), 
record1.getCharacterPosition());
+            final CSVRecord record2 = itr.next();
+            assertEquals(csv.indexOf("# Comment3"), 
record2.getCharacterPosition());
+        }
+    }
+
+    @Test
+    public void testCharacterPositionWithCommentsSpanningMultipleLines() 
throws IOException {
+        // @formatter:off
+        final String csv = "# Comment1\n"
+                         + "# Comment2\n"
+                         + "Header1,Header2\n"
+                         + "# Comment3\n"
+                         + "# Comment4\n"
+                         + "Value1,Value2\n"
+                         + "# Comment5\n"
+                         + "# Comment6\n"
+                         + "Value3,Value4";
+        final CSVFormat csvFormat = CSVFormat.DEFAULT.builder()
+            .setCommentMarker('#')
+            .setHeader()
+            .setSkipHeaderRecord(true)
+            .build();
+        // @formatter:on
+        try (final CSVParser parser = csvFormat.parse(new StringReader(csv))) {
+            final Iterator<CSVRecord> itr = parser.iterator();
+            final CSVRecord record1 = itr.next();
+            assertEquals(csv.indexOf("# Comment3"), 
record1.getCharacterPosition());
+            final CSVRecord record2 = itr.next();
+            assertEquals(csv.indexOf("# Comment5"), 
record2.getCharacterPosition());
+        }
+    }
+
+}

[commons-csv] branch master updated: [CSV-265] Update buffer position when reading line comment (#120)

Reply via email to