[commons-csv] branch master updated: [CSV-239] Cannot get headers in column order from CSVRecord.

ggregory Sun, 19 May 2019 07:26:45 -0700

This is an automated email from the ASF dual-hosted git repository.

ggregory pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/commons-csv.git



The following commit(s) were added to refs/heads/master by this push:
     new 788f2aa  [CSV-239] Cannot get headers in column order from CSVRecord.
788f2aa is described below

commit 788f2aaa7aff361416e11fba5df7cfc31060cbb1
Author: Gary Gregory <gardgreg...@gmail.com>
AuthorDate: Sun May 19 10:26:27 2019 -0400

    [CSV-239] Cannot get headers in column order from CSVRecord.
---
 src/changes/changes.xml                            |   1 +
 .../java/org/apache/commons/csv/CSVParser.java     | 219 +++++++++++----------
 .../java/org/apache/commons/csv/CSVRecord.java     |  89 ++++++---
 .../java/org/apache/commons/csv/CSVRecordTest.java |  31 ++-
 4 files changed, 196 insertions(+), 144 deletions(-)

diff --git a/src/changes/changes.xml b/src/changes/changes.xml
index ce721ef..2fa9369 100644
--- a/src/changes/changes.xml
+++ b/src/changes/changes.xml
@@ -45,6 +45,7 @@
       <action issue="CSV-234" type="add" dev="ggregory" due-to="Roberto 
Benedetti, Gary Gregory">Add support for java.sql.Clob.</action>
       <action issue="CSV-237" type="update" dev="ggregory" due-to="Gary 
Gregory">Update to Java 8.</action>
       <action issue="CSV-238" type="fix" dev="ggregory" due-to="Stephen 
Olander-Waters">Escape quotes in CLOBs #39.</action>
+      <action issue="CSV-239" type="add" dev="ggregory" due-to="Gary Gregory, 
Dave Moten">Cannot get headers in column order from CSVRecord.</action>
       <action                 type="update" dev="ggregory" due-to="Gary 
Gregory">Update tests from H2 1.4.198 to 1.4.199.</action>
     </release>
     <release version="1.6" date="2018-09-22" description="Feature and bug fix 
release (Java 7)">
diff --git a/src/main/java/org/apache/commons/csv/CSVParser.java 
b/src/main/java/org/apache/commons/csv/CSVParser.java
index a3decdf..6eb97f8 100644
--- a/src/main/java/org/apache/commons/csv/CSVParser.java
+++ b/src/main/java/org/apache/commons/csv/CSVParser.java
@@ -39,6 +39,7 @@ import java.util.List;
 import java.util.Map;
 import java.util.NoSuchElementException;
 import java.util.TreeMap;
+import java.util.stream.Collectors;
 
 /**
  * Parses CSV files according to the specified format.
@@ -133,6 +134,61 @@ import java.util.TreeMap;
  */
 public final class CSVParser implements Iterable<CSVRecord>, Closeable {
 
+    class CSVRecordIterator implements Iterator<CSVRecord> {
+        private CSVRecord current;
+
+        private CSVRecord getNextRecord() {
+            try {
+                return CSVParser.this.nextRecord();
+            } catch (final IOException e) {
+                throw new IllegalStateException(
+                        e.getClass().getSimpleName() + " reading next record: 
" + e.toString(), e);
+            }
+        }
+
+        @Override
+        public boolean hasNext() {
+            if (CSVParser.this.isClosed()) {
+                return false;
+            }
+            if (this.current == null) {
+                this.current = this.getNextRecord();
+            }
+
+            return this.current != null;
+        }
+
+        @Override
+        public CSVRecord next() {
+            if (CSVParser.this.isClosed()) {
+                throw new NoSuchElementException("CSVParser has been closed");
+            }
+            CSVRecord next = this.current;
+            this.current = null;
+
+            if (next == null) {
+                // hasNext() wasn't called before
+                next = this.getNextRecord();
+                if (next == null) {
+                    throw new NoSuchElementException("No more CSV records 
available");
+                }
+            }
+
+            return next;
+        }
+
+        @Override
+        public void remove() {
+            throw new UnsupportedOperationException();
+        }
+    }
+
+    static List<String> createHeaderNames(final Map<String, Integer> 
headerMap) {
+        return headerMap == null ? null
+            : 
headerMap.entrySet().stream().sorted(Map.Entry.comparingByValue()).map(Map.Entry::getKey)
+                .collect(Collectors.toList());
+    }
+
     /**
      * Creates a parser for the given {@link File}.
      *
@@ -229,6 +285,8 @@ public final class CSVParser implements 
Iterable<CSVRecord>, Closeable {
         return new CSVParser(reader, format);
     }
 
+    // the following objects are shared to reduce garbage
+
     /**
      * Creates a parser for the given {@link String}.
      *
@@ -277,13 +335,14 @@ public final class CSVParser implements 
Iterable<CSVRecord>, Closeable {
         return new CSVParser(new InputStreamReader(url.openStream(), charset), 
format);
     }
 
-    // the following objects are shared to reduce garbage
-
     private final CSVFormat format;
 
     /** A mapping of column names to column indices */
     private final Map<String, Integer> headerMap;
 
+    /** Preserve the column order to avoid re-computing it. */
+    private final List<String> headerNames;
+
     private final Lexer lexer;
 
     private final CSVRecordIterator csvRecordIterator;
@@ -349,14 +408,15 @@ public final class CSVParser implements 
Iterable<CSVRecord>, Closeable {
      */
     @SuppressWarnings("resource")
     public CSVParser(final Reader reader, final CSVFormat format, final long 
characterOffset, final long recordNumber)
-            throws IOException {
+        throws IOException {
         Assertions.notNull(reader, "reader");
         Assertions.notNull(format, "format");
 
         this.format = format;
         this.lexer = new Lexer(format, new ExtendedBufferedReader(reader));
         this.csvRecordIterator = new CSVRecordIterator();
-        this.headerMap = this.createHeaderMap();
+        this.headerMap = createHeaderMap(); // 1st
+        this.headerNames = createHeaderNames(this.headerMap); // 2nd
         this.characterOffset = characterOffset;
         this.recordNumber = recordNumber - 1;
     }
@@ -385,6 +445,53 @@ public final class CSVParser implements 
Iterable<CSVRecord>, Closeable {
     }
 
     /**
+     * Creates the name to index mapping if the format defines a header.
+     *
+     * @return null if the format has no header.
+     * @throws IOException if there is a problem reading the header or 
skipping the first record
+     */
+    private Map<String, Integer> createHeaderMap() throws IOException {
+        Map<String, Integer> hdrMap = null;
+        final String[] formatHeader = this.format.getHeader();
+        if (formatHeader != null) {
+            hdrMap = this.format.getIgnoreHeaderCase() ?
+                    new TreeMap<>(String.CASE_INSENSITIVE_ORDER) :
+                    new TreeMap<>();
+
+            String[] headerRecord = null;
+            if (formatHeader.length == 0) {
+                // read the header from the first line of the file
+                final CSVRecord nextRecord = this.nextRecord();
+                if (nextRecord != null) {
+                    headerRecord = nextRecord.values();
+                }
+            } else {
+                if (this.format.getSkipHeaderRecord()) {
+                    this.nextRecord();
+                }
+                headerRecord = formatHeader;
+            }
+
+            // build the name to index mappings
+            if (headerRecord != null) {
+                for (int i = 0; i < headerRecord.length; i++) {
+                    final String header = headerRecord[i];
+                    final boolean containsHeader = header == null ? false : 
hdrMap.containsKey(header);
+                    final boolean emptyHeader = header == null || 
header.trim().isEmpty();
+                    if (containsHeader && (!emptyHeader || 
!this.format.getAllowMissingColumnNames())) {
+                        throw new IllegalArgumentException("The header 
contains a duplicate name: \"" + header
+                                + "\" in " + Arrays.toString(headerRecord));
+                    }
+                    if (header != null) {
+                        hdrMap.put(header, Integer.valueOf(i));
+                    }
+                }
+            }
+        }
+        return hdrMap;
+    }
+
+    /**
      * Returns the current line number in the input stream.
      *
      * <p>
@@ -409,11 +516,11 @@ public final class CSVParser implements 
Iterable<CSVRecord>, Closeable {
     }
 
     /**
-     * Returns a copy of the header map that iterates in column order.
+     * Returns a copy of the header map.
      * <p>
      * The map keys are column names. The map values are 0-based indices.
      * </p>
-     * @return a copy of the header map that iterates in column order.
+     * @return a copy of the header map.
      */
     public Map<String, Integer> getHeaderMap() {
         return this.headerMap == null ? null : new 
LinkedHashMap<>(this.headerMap);
@@ -455,53 +562,6 @@ public final class CSVParser implements 
Iterable<CSVRecord>, Closeable {
     }
 
     /**
-     * Creates the name to index mapping if the format defines a header.
-     *
-     * @return null if the format has no header.
-     * @throws IOException if there is a problem reading the header or 
skipping the first record
-     */
-    private Map<String, Integer> createHeaderMap() throws IOException {
-        Map<String, Integer> hdrMap = null;
-        final String[] formatHeader = this.format.getHeader();
-        if (formatHeader != null) {
-            hdrMap = this.format.getIgnoreHeaderCase() ?
-                    new TreeMap<>(String.CASE_INSENSITIVE_ORDER) :
-                    new TreeMap<>();
-
-            String[] headerRecord = null;
-            if (formatHeader.length == 0) {
-                // read the header from the first line of the file
-                final CSVRecord nextRecord = this.nextRecord();
-                if (nextRecord != null) {
-                    headerRecord = nextRecord.values();
-                }
-            } else {
-                if (this.format.getSkipHeaderRecord()) {
-                    this.nextRecord();
-                }
-                headerRecord = formatHeader;
-            }
-
-            // build the name to index mappings
-            if (headerRecord != null) {
-                for (int i = 0; i < headerRecord.length; i++) {
-                    final String header = headerRecord[i];
-                    final boolean containsHeader = header == null ? false : 
hdrMap.containsKey(header);
-                    final boolean emptyHeader = header == null || 
header.trim().isEmpty();
-                    if (containsHeader && (!emptyHeader || 
!this.format.getAllowMissingColumnNames())) {
-                        throw new IllegalArgumentException("The header 
contains a duplicate name: \"" + header
-                                + "\" in " + Arrays.toString(headerRecord));
-                    }
-                    if (header != null) {
-                        hdrMap.put(header, Integer.valueOf(i));
-                    }
-                }
-            }
-        }
-        return hdrMap;
-    }
-
-    /**
      * Gets whether this parser is closed.
      *
      * @return whether this parser is closed.
@@ -527,55 +587,6 @@ public final class CSVParser implements 
Iterable<CSVRecord>, Closeable {
         return csvRecordIterator;
     }
 
-    class CSVRecordIterator implements Iterator<CSVRecord> {
-        private CSVRecord current;
-
-        private CSVRecord getNextRecord() {
-            try {
-                return CSVParser.this.nextRecord();
-            } catch (final IOException e) {
-                throw new IllegalStateException(
-                        e.getClass().getSimpleName() + " reading next record: 
" + e.toString(), e);
-            }
-        }
-
-        @Override
-        public boolean hasNext() {
-            if (CSVParser.this.isClosed()) {
-                return false;
-            }
-            if (this.current == null) {
-                this.current = this.getNextRecord();
-            }
-
-            return this.current != null;
-        }
-
-        @Override
-        public CSVRecord next() {
-            if (CSVParser.this.isClosed()) {
-                throw new NoSuchElementException("CSVParser has been closed");
-            }
-            CSVRecord next = this.current;
-            this.current = null;
-
-            if (next == null) {
-                // hasNext() wasn't called before
-                next = this.getNextRecord();
-                if (next == null) {
-                    throw new NoSuchElementException("No more CSV records 
available");
-                }
-            }
-
-            return next;
-        }
-
-        @Override
-        public void remove() {
-            throw new UnsupportedOperationException();
-        }
-    }
-
     /**
      * Parses the next record from the current point in the stream.
      *
@@ -622,8 +633,8 @@ public final class CSVParser implements 
Iterable<CSVRecord>, Closeable {
         if (!this.recordList.isEmpty()) {
             this.recordNumber++;
             final String comment = sb == null ? null : sb.toString();
-            result = new CSVRecord(this.recordList.toArray(new 
String[this.recordList.size()]), this.headerMap, comment,
-                    this.recordNumber, startCharPosition);
+            result = new CSVRecord(this.recordList.toArray(new 
String[this.recordList.size()]), this.headerMap,
+                this.headerNames, comment, this.recordNumber, 
startCharPosition);
         }
         return result;
     }
diff --git a/src/main/java/org/apache/commons/csv/CSVRecord.java 
b/src/main/java/org/apache/commons/csv/CSVRecord.java
index 34a3ba2..3984405 100644
--- a/src/main/java/org/apache/commons/csv/CSVRecord.java
+++ b/src/main/java/org/apache/commons/csv/CSVRecord.java
@@ -18,9 +18,10 @@
 package org.apache.commons.csv;
 
 import java.io.Serializable;
+import java.util.ArrayList;
 import java.util.Arrays;
-import java.util.HashMap;
 import java.util.Iterator;
+import java.util.LinkedHashMap;
 import java.util.List;
 import java.util.Map;
 import java.util.Map.Entry;
@@ -40,7 +41,10 @@ public final class CSVRecord implements Serializable, 
Iterable<String> {
     private final String comment;
 
     /** The column name to index mapping. */
-    private final Map<String, Integer> mapping;
+    private final Map<String, Integer> headerMap;
+
+    /** The column order to avoid re-computing it. */
+    private final List<String> headerNames;
 
     /** The record number. */
     private final long recordNumber;
@@ -48,11 +52,12 @@ public final class CSVRecord implements Serializable, 
Iterable<String> {
     /** The values of the record */
     private final String[] values;
 
-    CSVRecord(final String[] values, final Map<String, Integer> mapping, final 
String comment, final long recordNumber,
-            final long characterPosition) {
+    CSVRecord(final String[] values, final Map<String, Integer> headerMap, 
List<String> headerNames, final String comment,
+            final long recordNumber, final long characterPosition) {
         this.recordNumber = recordNumber;
         this.values = values != null ? values : EMPTY_STRING_ARRAY;
-        this.mapping = mapping;
+        this.headerMap = headerMap;
+        this.headerNames = headerNames;
         this.comment = comment;
         this.characterPosition = characterPosition;
     }
@@ -93,14 +98,10 @@ public final class CSVRecord implements Serializable, 
Iterable<String> {
      * @see CSVFormat#withNullString(String)
      */
     public String get(final String name) {
-        if (mapping == null) {
-            throw new IllegalStateException(
-                "No header mapping was specified, the record values can't be 
accessed by name");
-        }
-        final Integer index = mapping.get(name);
+        final Integer index = getIndex(name);
         if (index == null) {
             throw new IllegalArgumentException(String.format("Mapping for %s 
not found, expected one of %s", name,
-                mapping.keySet()));
+                headerMap.keySet()));
         }
         try {
             return values[index.intValue()];
@@ -134,6 +135,32 @@ public final class CSVRecord implements Serializable, 
Iterable<String> {
     }
 
     /**
+     * Returns a copy of the header names that iterates in column order.
+     * 
+     * @return a copy of the header names that iterates in column order.
+     * @since 1.7
+     */
+    public List<String> getHeaderNames() {
+        return new ArrayList<>(headerNames);
+    }
+
+    Integer getIndex(final String name) {
+        if (headerMap == null) {
+            throw new IllegalStateException(
+                "No header mapping was specified, the record values can't be 
accessed by name");
+        }
+        return headerMap.get(name);
+    }
+
+    String getName(final int index) {
+        if (headerMap == null) {
+            throw new IllegalStateException(
+                "No header mapping was specified, the record values can't be 
accessed by name");
+        }
+        return headerNames.get(index);
+    }
+
+    /**
      * Returns the number of this record in the parsed CSV file.
      *
      * <p>
@@ -149,20 +176,6 @@ public final class CSVRecord implements Serializable, 
Iterable<String> {
     }
 
     /**
-     * Tells whether the record size matches the header size.
-     *
-     * <p>
-     * Returns true if the sizes for this record match and false if not. Some 
programs can export files that fail this
-     * test but still produce parsable files.
-     * </p>
-     *
-     * @return true of this record is valid, false if not
-     */
-    public boolean isConsistent() {
-        return mapping == null || mapping.size() == values.length;
-    }
-
-    /**
      * Checks whether this record has a comment, false otherwise.
      * Note that comments are attached to the following record.
      * If there is no following record (i.e. the comment is at EOF)
@@ -176,6 +189,20 @@ public final class CSVRecord implements Serializable, 
Iterable<String> {
     }
 
     /**
+     * Tells whether the record size matches the header size.
+     *
+     * <p>
+     * Returns true if the sizes for this record match and false if not. Some 
programs can export files that fail this
+     * test but still produce parsable files.
+     * </p>
+     *
+     * @return true of this record is valid, false if not
+     */
+    public boolean isConsistent() {
+        return headerMap == null || headerMap.size() == values.length;
+    }
+
+    /**
      * Checks whether a given column is mapped, i.e. its name has been defined 
to the parser.
      *
      * @param name
@@ -183,7 +210,7 @@ public final class CSVRecord implements Serializable, 
Iterable<String> {
      * @return whether a given column is mapped.
      */
     public boolean isMapped(final String name) {
-        return mapping != null && mapping.containsKey(name);
+        return headerMap != null && headerMap.containsKey(name);
     }
 
     /**
@@ -194,7 +221,7 @@ public final class CSVRecord implements Serializable, 
Iterable<String> {
      * @return whether a given columns is mapped and has a value
      */
     public boolean isSet(final String name) {
-        return isMapped(name) && mapping.get(name).intValue() < values.length;
+        return isMapped(name) && headerMap.get(name).intValue() < 
values.length;
     }
 
     /**
@@ -215,10 +242,10 @@ public final class CSVRecord implements Serializable, 
Iterable<String> {
      * @return the given map.
      */
     <M extends Map<String, String>> M putIn(final M map) {
-        if (mapping == null) {
+        if (headerMap == null) {
             return map;
         }
-        for (final Entry<String, Integer> entry : mapping.entrySet()) {
+        for (final Entry<String, Integer> entry : headerMap.entrySet()) {
             final int col = entry.getValue().intValue();
             if (col < values.length) {
                 map.put(entry.getKey(), values[col]);
@@ -253,7 +280,7 @@ public final class CSVRecord implements Serializable, 
Iterable<String> {
      * @return A new Map. The map is empty if the record has no headers.
      */
     public Map<String, String> toMap() {
-        return putIn(new HashMap<String, String>(values.length));
+        return putIn(new LinkedHashMap<String, String>(values.length));
     }
 
     /**
@@ -264,7 +291,7 @@ public final class CSVRecord implements Serializable, 
Iterable<String> {
      */
     @Override
     public String toString() {
-        return "CSVRecord [comment=" + comment + ", mapping=" + mapping +
+        return "CSVRecord [comment=" + comment + ", mapping=" + headerMap +
                 ", recordNumber=" + recordNumber + ", values=" +
                 Arrays.toString(values) + "]";
     }
diff --git a/src/test/java/org/apache/commons/csv/CSVRecordTest.java 
b/src/test/java/org/apache/commons/csv/CSVRecordTest.java
index 6347cc5..dab4d55 100644
--- a/src/test/java/org/apache/commons/csv/CSVRecordTest.java
+++ b/src/test/java/org/apache/commons/csv/CSVRecordTest.java
@@ -26,6 +26,7 @@ import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.HashMap;
+import java.util.List;
 import java.util.Map;
 import java.util.TreeMap;
 import java.util.concurrent.ConcurrentHashMap;
@@ -40,17 +41,17 @@ public class CSVRecordTest {
 
     private String[] values;
     private CSVRecord record, recordWithHeader;
-    private Map<String, Integer> header;
+    private Map<String, Integer> headerMap;
 
     @Before
     public void setUp() throws Exception {
         values = new String[] { "A", "B", "C" };
-        record = new CSVRecord(values, null, null, 0, -1);
-        header = new HashMap<>();
-        header.put("first", Integer.valueOf(0));
-        header.put("second", Integer.valueOf(1));
-        header.put("third", Integer.valueOf(2));
-        recordWithHeader = new CSVRecord(values, header, null, 0, -1);
+        record = new CSVRecord(values, null, null, null, 0, -1);
+        headerMap = new HashMap<>();
+        headerMap.put("first", Integer.valueOf(0));
+        headerMap.put("second", Integer.valueOf(1));
+        headerMap.put("third", Integer.valueOf(2));
+        recordWithHeader = new CSVRecord(values, headerMap, 
CSVParser.createHeaderNames(headerMap), null, 0, -1);
     }
 
     @Test
@@ -69,7 +70,7 @@ public class CSVRecordTest {
 
     @Test(expected = IllegalArgumentException.class)
     public void testGetStringInconsistentRecord() {
-        header.put("fourth", Integer.valueOf(4));
+        headerMap.put("fourth", Integer.valueOf(4));
         recordWithHeader.get("fourth");
     }
 
@@ -103,7 +104,7 @@ public class CSVRecordTest {
         assertTrue(record.isConsistent());
         assertTrue(recordWithHeader.isConsistent());
 
-        header.put("fourth", Integer.valueOf(4));
+        headerMap.put("fourth", Integer.valueOf(4));
         assertFalse(recordWithHeader.isConsistent());
     }
 
@@ -162,6 +163,18 @@ public class CSVRecordTest {
     }
 
     @Test
+    public void testGetHeaderNames() {
+        final Map<String, String> nameValueMap = this.recordWithHeader.toMap();
+        final List<String> headerNames = 
this.recordWithHeader.getHeaderNames();
+        Assert.assertEquals(nameValueMap.size(), headerNames.size());
+        for (int i = 0; i < headerNames.size(); i++) {
+            String name = headerNames.get(i);
+            Assert.assertEquals(i, 
this.recordWithHeader.getIndex(name).intValue());
+            Assert.assertEquals(name, this.recordWithHeader.getName(i));
+        }
+    }
+
+    @Test
     public void testToMapWithShortRecord() throws Exception {
         try (final CSVParser parser = CSVParser.parse("a,b", 
CSVFormat.DEFAULT.withHeader("A", "B", "C"))) {
             final CSVRecord shortRec = parser.iterator().next();

[commons-csv] branch master updated: [CSV-239] Cannot get headers in column order from CSVRecord.

Reply via email to