This is an automated email from the ASF dual-hosted git repository.

ggregory pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/commons-csv.git


The following commit(s) were added to refs/heads/master by this push:
     new 3ac702b  CSV-278: Reuse Buffers in Lexer for Delimiter Detection (#162)
3ac702b is described below

commit 3ac702b190fd04c56118cb03aa87577f0a6a86f7
Author: belugabehr <12578579+belugab...@users.noreply.github.com>
AuthorDate: Thu Jul 15 09:41:22 2021 -0400

    CSV-278: Reuse Buffers in Lexer for Delimiter Detection (#162)
    
    * CSV-278: Reuse Buffers in Lexer for Delimiter Detection
    
    * Remove erroneous tab character
    
    * Reduce change set with fewer formatting changes
    
    * Reduce change set with fewer formatting changes
---
 .../apache/commons/csv/ExtendedBufferedReader.java | 15 +++++++++++++
 src/main/java/org/apache/commons/csv/Lexer.java    | 26 ++++++++++++----------
 2 files changed, 29 insertions(+), 12 deletions(-)

diff --git a/src/main/java/org/apache/commons/csv/ExtendedBufferedReader.java 
b/src/main/java/org/apache/commons/csv/ExtendedBufferedReader.java
index 63efb64..4b3fb48 100644
--- a/src/main/java/org/apache/commons/csv/ExtendedBufferedReader.java
+++ b/src/main/java/org/apache/commons/csv/ExtendedBufferedReader.java
@@ -132,6 +132,21 @@ final class ExtendedBufferedReader extends BufferedReader {
      */
     char[] lookAhead(final int n) throws IOException {
         final char[] buf = new char[n];
+        return lookAhead(buf);
+    }
+
+    /**
+     * Populates the buffer with the next {@code buf.length} characters in the
+     * current reader without consuming them. The next call to {@link #read()} 
will
+     * still return the next value. This doesn't affect line number or last
+     * character.
+     *
+     * @param buf the buffer to fill for the look ahead.
+     * @return the buffer itself
+     * @throws IOException If an I/O error occurs
+     */
+    char[] lookAhead(final char[] buf) throws IOException {
+        final int n = buf.length;
         super.mark(n);
         super.read(buf, 0, n);
         super.reset();
diff --git a/src/main/java/org/apache/commons/csv/Lexer.java 
b/src/main/java/org/apache/commons/csv/Lexer.java
index cfd64fc..a8afc50 100644
--- a/src/main/java/org/apache/commons/csv/Lexer.java
+++ b/src/main/java/org/apache/commons/csv/Lexer.java
@@ -49,6 +49,8 @@ final class Lexer implements Closeable {
     private static final char DISABLED = '\ufffe';
 
     private final char[] delimiter;
+    private final char[] delimiterBuf;
+    private final char[] escapeDelimiterBuf;
     private final char escape;
     private final char quoteChar;
     private final char commentStart;
@@ -68,6 +70,8 @@ final class Lexer implements Closeable {
         this.commentStart = mapNullToDisabled(format.getCommentMarker());
         this.ignoreSurroundingSpaces = format.getIgnoreSurroundingSpaces();
         this.ignoreEmptyLines = format.getIgnoreEmptyLines();
+        this.delimiterBuf = new char[delimiter.length - 1];
+        this.escapeDelimiterBuf = new char[2 * delimiter.length - 1];
     }
 
     /**
@@ -112,7 +116,7 @@ final class Lexer implements Closeable {
     }
 
     /**
-     * Determine whether the next characters constitute a delimiter through 
{@link ExtendedBufferedReader#lookAhead(int)}
+     * Determine whether the next characters constitute a delimiter through 
{@link ExtendedBufferedReader#lookAhead(char[])}.
      *
      * @param ch
      *             the current character.
@@ -126,14 +130,13 @@ final class Lexer implements Closeable {
         if (delimiter.length == 1) {
           return true;
         }
-        final int len = delimiter.length - 1;
-        final char[] buf = reader.lookAhead(len);
-        for (int i = 0; i < len; i++) {
-            if (buf[i] != delimiter[i+1]) {
+        reader.lookAhead(delimiterBuf);
+        for (int i = 0; i < delimiterBuf.length; i++) {
+            if (delimiterBuf[i] != delimiter[i+1]) {
                 return false;
             }
         }
-        final int count = reader.read(buf, 0, len);
+        final int count = reader.read(delimiterBuf, 0, delimiterBuf.length);
         return count != END_OF_STREAM;
     }
 
@@ -156,7 +159,7 @@ final class Lexer implements Closeable {
     }
 
     /**
-     * Tests if the next characters constitute a escape delimiter through 
{@link ExtendedBufferedReader#lookAhead(int)}.
+     * Tests if the next characters constitute a escape delimiter through 
{@link ExtendedBufferedReader#lookAhead(char[])}.
      *
      * For example, for delimiter "[|]" and escape '!', return true if the 
next characters constitute "![!|!]".
      *
@@ -164,17 +167,16 @@ final class Lexer implements Closeable {
      * @throws IOException If an I/O error occurs.
      */
     boolean isEscapeDelimiter() throws IOException {
-        final int len = 2 * delimiter.length - 1;
-        final char[] buf = reader.lookAhead(len);
-        if (buf[0] != delimiter[0]) {
+        reader.lookAhead(escapeDelimiterBuf);
+        if (escapeDelimiterBuf[0] != delimiter[0]) {
             return false;
         }
         for (int i = 1; i < delimiter.length; i++) {
-            if (buf[2 * i] != delimiter[i] || buf[2 * i - 1] != escape) {
+            if (escapeDelimiterBuf[2 * i] != delimiter[i] || 
escapeDelimiterBuf[2 * i - 1] != escape) {
                 return false;
             }
         }
-        final int count = reader.read(buf, 0, len);
+        final int count = reader.read(escapeDelimiterBuf, 0, 
escapeDelimiterBuf.length);
         return count != END_OF_STREAM;
     }
 

Reply via email to