(commons-csv) 02/03: Sort members

ggregory Thu, 02 Jan 2025 12:15:48 -0800

This is an automated email from the ASF dual-hosted git repository.

ggregory pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/commons-csv.git


commit 76981db68af50e0475a2d5d0fcc6bdf87efb18f1
Author: Gary Gregory <garydgreg...@gmail.com>
AuthorDate: Thu Jan 2 15:14:13 2025 -0500

    Sort members
---
 .../java/org/apache/commons/csv/CSVParser.java     |  24 ++--
 .../java/org/apache/commons/csv/CSVRecord.java     |  20 +--
 .../apache/commons/csv/ExtendedBufferedReader.java | 108 ++++++++--------
 src/main/java/org/apache/commons/csv/Lexer.java    |  16 +--
 .../java/org/apache/commons/csv/CSVParserTest.java | 140 ++++++++++-----------
 5 files changed, 154 insertions(+), 154 deletions(-)

diff --git a/src/main/java/org/apache/commons/csv/CSVParser.java 
b/src/main/java/org/apache/commons/csv/CSVParser.java
index 07028ea7..a7067657 100644
--- a/src/main/java/org/apache/commons/csv/CSVParser.java
+++ b/src/main/java/org/apache/commons/csv/CSVParser.java
@@ -181,6 +181,18 @@ public final class CSVParser implements 
Iterable<CSVRecord>, Closeable {
             return asThis();
         }
 
+        /**
+         * Sets whether to enable byte tracking for the parser.
+         *
+         * @param enableByteTracking {@code true} to enable byte tracking; 
{@code false} to disable it.
+         * @return this instance.
+         * @since 1.13.0
+         */
+        public Builder setEnableByteTracking(final boolean enableByteTracking) 
{
+            this.enableByteTracking = enableByteTracking;
+            return asThis();
+        }
+
         /**
          * Sets the CSV format. A copy of the given format is kept.
          *
@@ -203,18 +215,6 @@ public final class CSVParser implements 
Iterable<CSVRecord>, Closeable {
             return asThis();
         }
 
-        /**
-         * Sets whether to enable byte tracking for the parser.
-         *
-         * @param enableByteTracking {@code true} to enable byte tracking; 
{@code false} to disable it.
-         * @return this instance.
-         * @since 1.13.0
-         */
-        public Builder setEnableByteTracking(final boolean enableByteTracking) 
{
-            this.enableByteTracking = enableByteTracking;
-            return asThis();
-        }
-
     }
 
     final class CSVRecordIterator implements Iterator<CSVRecord> {
diff --git a/src/main/java/org/apache/commons/csv/CSVRecord.java 
b/src/main/java/org/apache/commons/csv/CSVRecord.java
index 284220c3..689cd0a2 100644
--- a/src/main/java/org/apache/commons/csv/CSVRecord.java
+++ b/src/main/java/org/apache/commons/csv/CSVRecord.java
@@ -141,16 +141,6 @@ public final class CSVRecord implements Serializable, 
Iterable<String> {
         }
     }
 
-    /**
-     * Returns the start position of this record as a character position in 
the source stream. This may or may not
-     * correspond to the byte position depending on the character set.
-     *
-     * @return the position of this record in the source stream.
-     */
-    public long getCharacterPosition() {
-        return characterPosition;
-    }
-
     /**
      * Returns the starting position of this record in the source stream, 
measured in bytes.
      *
@@ -161,6 +151,16 @@ public final class CSVRecord implements Serializable, 
Iterable<String> {
         return bytePosition;
     }
 
+    /**
+     * Returns the start position of this record as a character position in 
the source stream. This may or may not
+     * correspond to the byte position depending on the character set.
+     *
+     * @return the position of this record in the source stream.
+     */
+    public long getCharacterPosition() {
+        return characterPosition;
+    }
+
     /**
      * Returns the comment for this record, if any.
      * Note that comments are attached to the following record.
diff --git a/src/main/java/org/apache/commons/csv/ExtendedBufferedReader.java 
b/src/main/java/org/apache/commons/csv/ExtendedBufferedReader.java
index 6043ccaf..31890db8 100644
--- a/src/main/java/org/apache/commons/csv/ExtendedBufferedReader.java
+++ b/src/main/java/org/apache/commons/csv/ExtendedBufferedReader.java
@@ -98,6 +98,60 @@ final class ExtendedBufferedReader extends 
UnsynchronizedBufferedReader {
         super.close();
     }
 
+    /**
+     * Gets the number of bytes read by the reader.
+     *
+     * @return the number of bytes read by the read
+     */
+    long getBytesRead() {
+        return this.bytesRead;
+    }
+
+    /**
+     * Gets the byte length of the given character based on the the original 
Unicode
+     * specification, which defined characters as fixed-width 16-bit entities.
+     * <p>
+     * The Unicode characters are divided into two main ranges:
+     * <ul>
+     *   <li><b>U+0000 to U+FFFF (Basic Multilingual Plane, BMP):</b>
+     *     <ul>
+     *       <li>Represented using a single 16-bit {@code char}.</li>
+     *       <li>Includes UTF-8 encodings of 1-byte, 2-byte, and some 3-byte 
characters.</li>
+     *     </ul>
+     *   </li>
+     *   <li><b>U+10000 to U+10FFFF (Supplementary Characters):</b>
+     *     <ul>
+     *       <li>Represented as a pair of {@code char}s:</li>
+     *       <li>The first {@code char} is from the high-surrogates range 
(\uD800-\uDBFF).</li>
+     *       <li>The second {@code char} is from the low-surrogates range 
(\uDC00-\uDFFF).</li>
+     *       <li>Includes UTF-8 encodings of some 3-byte characters and all 
4-byte characters.</li>
+     *     </ul>
+     *   </li>
+     * </ul>
+     *
+     * @param current the current character to process.
+     * @return the byte length of the character.
+     * @throws CharacterCodingException if the character cannot be encoded.
+     */
+    private int getEncodedCharLength(int current) throws 
CharacterCodingException {
+        final char cChar = (char) current;
+        final char lChar = (char) lastChar;
+        if (!Character.isSurrogate(cChar)) {
+            return encoder.encode(
+                CharBuffer.wrap(new char[] {cChar})).limit();
+        } else {
+            if (Character.isHighSurrogate(cChar)) {
+                // Move on to the next char (low surrogate)
+                return 0;
+            } else if (Character.isSurrogatePair(lChar, cChar)) {
+                return encoder.encode(
+                    CharBuffer.wrap(new char[] {lChar, cChar})).limit();
+            } else {
+                throw new CharacterCodingException();
+            }
+        }
+    }
+
     /**
      * Returns the last character that was read as an integer (0 to 65535). 
This will be the last character returned by
      * any of the read methods. This will not include a character read using 
the {@link #peek()} method. If no
@@ -156,51 +210,6 @@ final class ExtendedBufferedReader extends 
UnsynchronizedBufferedReader {
         return lastChar;
     }
 
-    /**
-     * Gets the byte length of the given character based on the the original 
Unicode
-     * specification, which defined characters as fixed-width 16-bit entities.
-     * <p>
-     * The Unicode characters are divided into two main ranges:
-     * <ul>
-     *   <li><b>U+0000 to U+FFFF (Basic Multilingual Plane, BMP):</b>
-     *     <ul>
-     *       <li>Represented using a single 16-bit {@code char}.</li>
-     *       <li>Includes UTF-8 encodings of 1-byte, 2-byte, and some 3-byte 
characters.</li>
-     *     </ul>
-     *   </li>
-     *   <li><b>U+10000 to U+10FFFF (Supplementary Characters):</b>
-     *     <ul>
-     *       <li>Represented as a pair of {@code char}s:</li>
-     *       <li>The first {@code char} is from the high-surrogates range 
(\uD800-\uDBFF).</li>
-     *       <li>The second {@code char} is from the low-surrogates range 
(\uDC00-\uDFFF).</li>
-     *       <li>Includes UTF-8 encodings of some 3-byte characters and all 
4-byte characters.</li>
-     *     </ul>
-     *   </li>
-     * </ul>
-     *
-     * @param current the current character to process.
-     * @return the byte length of the character.
-     * @throws CharacterCodingException if the character cannot be encoded.
-     */
-    private int getEncodedCharLength(int current) throws 
CharacterCodingException {
-        final char cChar = (char) current;
-        final char lChar = (char) lastChar;
-        if (!Character.isSurrogate(cChar)) {
-            return encoder.encode(
-                CharBuffer.wrap(new char[] {cChar})).limit();
-        } else {
-            if (Character.isHighSurrogate(cChar)) {
-                // Move on to the next char (low surrogate)
-                return 0;
-            } else if (Character.isSurrogatePair(lChar, cChar)) {
-                return encoder.encode(
-                    CharBuffer.wrap(new char[] {lChar, cChar})).limit();
-            } else {
-                throw new CharacterCodingException();
-            }
-        }
-    }
-
     @Override
     public int read(final char[] buf, final int offset, final int length) 
throws IOException {
         if (length == 0) {
@@ -269,13 +278,4 @@ final class ExtendedBufferedReader extends 
UnsynchronizedBufferedReader {
         super.reset();
     }
 
-    /**
-     * Gets the number of bytes read by the reader.
-     *
-     * @return the number of bytes read by the read
-     */
-    long getBytesRead() {
-        return this.bytesRead;
-    }
-
 }
diff --git a/src/main/java/org/apache/commons/csv/Lexer.java 
b/src/main/java/org/apache/commons/csv/Lexer.java
index 2e7d2d04..2e9e7137 100644
--- a/src/main/java/org/apache/commons/csv/Lexer.java
+++ b/src/main/java/org/apache/commons/csv/Lexer.java
@@ -97,21 +97,21 @@ final class Lexer implements Closeable {
     }
 
     /**
-     * Returns the current character position
+     * Gets the number of bytes read
      *
-     * @return the current character position
+     * @return the number of bytes read
      */
-    long getCharacterPosition() {
-        return reader.getPosition();
+    long getBytesRead() {
+        return reader.getBytesRead();
     }
 
     /**
-     * Gets the number of bytes read
+     * Returns the current character position
      *
-     * @return the number of bytes read
+     * @return the current character position
      */
-    long getBytesRead() {
-        return reader.getBytesRead();
+    long getCharacterPosition() {
+        return reader.getPosition();
     }
 
     /**
diff --git a/src/test/java/org/apache/commons/csv/CSVParserTest.java 
b/src/test/java/org/apache/commons/csv/CSVParserTest.java
index c42a3c25..2f508b36 100644
--- a/src/test/java/org/apache/commons/csv/CSVParserTest.java
+++ b/src/test/java/org/apache/commons/csv/CSVParserTest.java
@@ -703,76 +703,6 @@ public class CSVParserTest {
         }
     }
 
-    @Test
-    public void testGetRecordThreeBytesRead() throws Exception {
-        final String code = "id,date,val5,val4\n" +
-            "11111111111111,'4017-09-01',きちんと節分近くには咲いてる～,v4\n" +
-            "22222222222222,'4017-01-01',おはよう私の友人～,v4\n" +
-            "33333333333333,'4017-01-01',きる自然の力ってすごいな～,v4\n";
-        final CSVFormat format = CSVFormat.Builder.create()
-            .setDelimiter(',')
-            .setQuote('\'')
-            .get();
-        try (CSVParser parser = CSVParser.builder().setReader(new 
StringReader(code)).setFormat(format).setCharset(UTF_8).setEnableByteTracking(true).get()
 ) {
-            CSVRecord record = new CSVRecord(parser, null, null, 1L, 0L, 0L);
-
-            assertEquals(0, parser.getRecordNumber());
-            assertNotNull(record = parser.nextRecord());
-            assertEquals(1, record.getRecordNumber());
-            assertEquals(code.indexOf('i'), record.getCharacterPosition());
-            assertEquals(record.getBytePosition(), 
record.getCharacterPosition());
-
-            assertNotNull(record = parser.nextRecord());
-            assertEquals(2, record.getRecordNumber());
-            assertEquals(code.indexOf('1'), record.getCharacterPosition());
-            assertEquals(record.getBytePosition(), 
record.getCharacterPosition());
-
-            assertNotNull(record = parser.nextRecord());
-            assertEquals(3, record.getRecordNumber());
-            assertEquals(code.indexOf('2'), record.getCharacterPosition());
-            assertEquals(record.getBytePosition(), 95);
-
-            assertNotNull(record = parser.nextRecord());
-            assertEquals(4, record.getRecordNumber());
-            assertEquals(code.indexOf('3'), record.getCharacterPosition());
-            assertEquals(record.getBytePosition(), 154);
-        }
-    }
-
-    @Test
-    public void testGetRecordFourBytesRead() throws Exception {
-        final String code = "id,a,b,c\n" +
-            "1,😊,🤔,😂\n" +
-            "2,😊,🤔,😂\n" +
-            "3,😊,🤔,😂\n";
-        final CSVFormat format = CSVFormat.Builder.create()
-            .setDelimiter(',')
-            .setQuote('\'')
-            .get();
-        try (CSVParser parser = CSVParser.builder().setReader(new 
StringReader(code)).setFormat(format).setCharset(UTF_8).setEnableByteTracking(true).get())
 {
-            CSVRecord record = new CSVRecord(parser, null, null, 1L, 0L, 0L);
-
-            assertEquals(0, parser.getRecordNumber());
-            assertNotNull(record = parser.nextRecord());
-            assertEquals(1, record.getRecordNumber());
-            assertEquals(code.indexOf('i'), record.getCharacterPosition());
-            assertEquals(record.getBytePosition(), 
record.getCharacterPosition());
-
-            assertNotNull(record = parser.nextRecord());
-            assertEquals(2, record.getRecordNumber());
-            assertEquals(code.indexOf('1'), record.getCharacterPosition());
-            assertEquals(record.getBytePosition(), 
record.getCharacterPosition());
-            assertNotNull(record = parser.nextRecord());
-            assertEquals(3, record.getRecordNumber());
-            assertEquals(code.indexOf('2'), record.getCharacterPosition());
-            assertEquals(record.getBytePosition(), 26);
-            assertNotNull(record = parser.nextRecord());
-            assertEquals(4, record.getRecordNumber());
-            assertEquals(code.indexOf('3'), record.getCharacterPosition());
-            assertEquals(record.getBytePosition(), 43);
-        }
-    }
-
     @Test
     public void testGetHeaderMap() throws Exception {
         try (CSVParser parser = CSVParser.parse("a,b,c\n1,2,3\nx,y,z", 
CSVFormat.DEFAULT.withHeader("A", "B", "C"))) {
@@ -878,6 +808,40 @@ public class CSVParserTest {
         }
     }
 
+    @Test
+    public void testGetRecordFourBytesRead() throws Exception {
+        final String code = "id,a,b,c\n" +
+            "1,😊,🤔,😂\n" +
+            "2,😊,🤔,😂\n" +
+            "3,😊,🤔,😂\n";
+        final CSVFormat format = CSVFormat.Builder.create()
+            .setDelimiter(',')
+            .setQuote('\'')
+            .get();
+        try (CSVParser parser = CSVParser.builder().setReader(new 
StringReader(code)).setFormat(format).setCharset(UTF_8).setEnableByteTracking(true).get())
 {
+            CSVRecord record = new CSVRecord(parser, null, null, 1L, 0L, 0L);
+
+            assertEquals(0, parser.getRecordNumber());
+            assertNotNull(record = parser.nextRecord());
+            assertEquals(1, record.getRecordNumber());
+            assertEquals(code.indexOf('i'), record.getCharacterPosition());
+            assertEquals(record.getBytePosition(), 
record.getCharacterPosition());
+
+            assertNotNull(record = parser.nextRecord());
+            assertEquals(2, record.getRecordNumber());
+            assertEquals(code.indexOf('1'), record.getCharacterPosition());
+            assertEquals(record.getBytePosition(), 
record.getCharacterPosition());
+            assertNotNull(record = parser.nextRecord());
+            assertEquals(3, record.getRecordNumber());
+            assertEquals(code.indexOf('2'), record.getCharacterPosition());
+            assertEquals(record.getBytePosition(), 26);
+            assertNotNull(record = parser.nextRecord());
+            assertEquals(4, record.getRecordNumber());
+            assertEquals(code.indexOf('3'), record.getCharacterPosition());
+            assertEquals(record.getBytePosition(), 43);
+        }
+    }
+
     @Test
     public void testGetRecordNumberWithCR() throws Exception {
         validateRecordNumbers(String.valueOf(CR));
@@ -923,6 +887,42 @@ public class CSVParserTest {
 
     }
 
+    @Test
+    public void testGetRecordThreeBytesRead() throws Exception {
+        final String code = "id,date,val5,val4\n" +
+            "11111111111111,'4017-09-01',きちんと節分近くには咲いてる～,v4\n" +
+            "22222222222222,'4017-01-01',おはよう私の友人～,v4\n" +
+            "33333333333333,'4017-01-01',きる自然の力ってすごいな～,v4\n";
+        final CSVFormat format = CSVFormat.Builder.create()
+            .setDelimiter(',')
+            .setQuote('\'')
+            .get();
+        try (CSVParser parser = CSVParser.builder().setReader(new 
StringReader(code)).setFormat(format).setCharset(UTF_8).setEnableByteTracking(true).get()
 ) {
+            CSVRecord record = new CSVRecord(parser, null, null, 1L, 0L, 0L);
+
+            assertEquals(0, parser.getRecordNumber());
+            assertNotNull(record = parser.nextRecord());
+            assertEquals(1, record.getRecordNumber());
+            assertEquals(code.indexOf('i'), record.getCharacterPosition());
+            assertEquals(record.getBytePosition(), 
record.getCharacterPosition());
+
+            assertNotNull(record = parser.nextRecord());
+            assertEquals(2, record.getRecordNumber());
+            assertEquals(code.indexOf('1'), record.getCharacterPosition());
+            assertEquals(record.getBytePosition(), 
record.getCharacterPosition());
+
+            assertNotNull(record = parser.nextRecord());
+            assertEquals(3, record.getRecordNumber());
+            assertEquals(code.indexOf('2'), record.getCharacterPosition());
+            assertEquals(record.getBytePosition(), 95);
+
+            assertNotNull(record = parser.nextRecord());
+            assertEquals(4, record.getRecordNumber());
+            assertEquals(code.indexOf('3'), record.getCharacterPosition());
+            assertEquals(record.getBytePosition(), 154);
+        }
+    }
+
     @Test
     public void testGetRecordWithMultiLineValues() throws Exception {
         try (CSVParser parser = CSVParser.parse("\"a\r\n1\",\"a\r\n2\"" + CRLF 
+ "\"b\r\n1\",\"b\r\n2\"" + CRLF + "\"c\r\n1\",\"c\r\n2\"",

(commons-csv) 02/03: Sort members

Reply via email to