This is an automated email from the ASF dual-hosted git repository. ggregory pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/commons-csv.git
The following commit(s) were added to refs/heads/master by this push: new 788f2aa [CSV-239] Cannot get headers in column order from CSVRecord. 788f2aa is described below commit 788f2aaa7aff361416e11fba5df7cfc31060cbb1 Author: Gary Gregory <gardgreg...@gmail.com> AuthorDate: Sun May 19 10:26:27 2019 -0400 [CSV-239] Cannot get headers in column order from CSVRecord. --- src/changes/changes.xml | 1 + .../java/org/apache/commons/csv/CSVParser.java | 219 +++++++++++---------- .../java/org/apache/commons/csv/CSVRecord.java | 89 ++++++--- .../java/org/apache/commons/csv/CSVRecordTest.java | 31 ++- 4 files changed, 196 insertions(+), 144 deletions(-) diff --git a/src/changes/changes.xml b/src/changes/changes.xml index ce721ef..2fa9369 100644 --- a/src/changes/changes.xml +++ b/src/changes/changes.xml @@ -45,6 +45,7 @@ <action issue="CSV-234" type="add" dev="ggregory" due-to="Roberto Benedetti, Gary Gregory">Add support for java.sql.Clob.</action> <action issue="CSV-237" type="update" dev="ggregory" due-to="Gary Gregory">Update to Java 8.</action> <action issue="CSV-238" type="fix" dev="ggregory" due-to="Stephen Olander-Waters">Escape quotes in CLOBs #39.</action> + <action issue="CSV-239" type="add" dev="ggregory" due-to="Gary Gregory, Dave Moten">Cannot get headers in column order from CSVRecord.</action> <action type="update" dev="ggregory" due-to="Gary Gregory">Update tests from H2 1.4.198 to 1.4.199.</action> </release> <release version="1.6" date="2018-09-22" description="Feature and bug fix release (Java 7)"> diff --git a/src/main/java/org/apache/commons/csv/CSVParser.java b/src/main/java/org/apache/commons/csv/CSVParser.java index a3decdf..6eb97f8 100644 --- a/src/main/java/org/apache/commons/csv/CSVParser.java +++ b/src/main/java/org/apache/commons/csv/CSVParser.java @@ -39,6 +39,7 @@ import java.util.List; import java.util.Map; import java.util.NoSuchElementException; import java.util.TreeMap; +import java.util.stream.Collectors; /** * Parses CSV files according to the specified format. @@ -133,6 +134,61 @@ import java.util.TreeMap; */ public final class CSVParser implements Iterable<CSVRecord>, Closeable { + class CSVRecordIterator implements Iterator<CSVRecord> { + private CSVRecord current; + + private CSVRecord getNextRecord() { + try { + return CSVParser.this.nextRecord(); + } catch (final IOException e) { + throw new IllegalStateException( + e.getClass().getSimpleName() + " reading next record: " + e.toString(), e); + } + } + + @Override + public boolean hasNext() { + if (CSVParser.this.isClosed()) { + return false; + } + if (this.current == null) { + this.current = this.getNextRecord(); + } + + return this.current != null; + } + + @Override + public CSVRecord next() { + if (CSVParser.this.isClosed()) { + throw new NoSuchElementException("CSVParser has been closed"); + } + CSVRecord next = this.current; + this.current = null; + + if (next == null) { + // hasNext() wasn't called before + next = this.getNextRecord(); + if (next == null) { + throw new NoSuchElementException("No more CSV records available"); + } + } + + return next; + } + + @Override + public void remove() { + throw new UnsupportedOperationException(); + } + } + + static List<String> createHeaderNames(final Map<String, Integer> headerMap) { + return headerMap == null ? null + : headerMap.entrySet().stream().sorted(Map.Entry.comparingByValue()).map(Map.Entry::getKey) + .collect(Collectors.toList()); + } + /** * Creates a parser for the given {@link File}. * @@ -229,6 +285,8 @@ public final class CSVParser implements Iterable<CSVRecord>, Closeable { return new CSVParser(reader, format); } + // the following objects are shared to reduce garbage + /** * Creates a parser for the given {@link String}. * @@ -277,13 +335,14 @@ public final class CSVParser implements Iterable<CSVRecord>, Closeable { return new CSVParser(new InputStreamReader(url.openStream(), charset), format); } - // the following objects are shared to reduce garbage - private final CSVFormat format; /** A mapping of column names to column indices */ private final Map<String, Integer> headerMap; + /** Preserve the column order to avoid re-computing it. */ + private final List<String> headerNames; + private final Lexer lexer; private final CSVRecordIterator csvRecordIterator; @@ -349,14 +408,15 @@ public final class CSVParser implements Iterable<CSVRecord>, Closeable { */ @SuppressWarnings("resource") public CSVParser(final Reader reader, final CSVFormat format, final long characterOffset, final long recordNumber) - throws IOException { + throws IOException { Assertions.notNull(reader, "reader"); Assertions.notNull(format, "format"); this.format = format; this.lexer = new Lexer(format, new ExtendedBufferedReader(reader)); this.csvRecordIterator = new CSVRecordIterator(); - this.headerMap = this.createHeaderMap(); + this.headerMap = createHeaderMap(); // 1st + this.headerNames = createHeaderNames(this.headerMap); // 2nd this.characterOffset = characterOffset; this.recordNumber = recordNumber - 1; } @@ -385,6 +445,53 @@ public final class CSVParser implements Iterable<CSVRecord>, Closeable { } /** + * Creates the name to index mapping if the format defines a header. + * + * @return null if the format has no header. + * @throws IOException if there is a problem reading the header or skipping the first record + */ + private Map<String, Integer> createHeaderMap() throws IOException { + Map<String, Integer> hdrMap = null; + final String[] formatHeader = this.format.getHeader(); + if (formatHeader != null) { + hdrMap = this.format.getIgnoreHeaderCase() ? + new TreeMap<>(String.CASE_INSENSITIVE_ORDER) : + new TreeMap<>(); + + String[] headerRecord = null; + if (formatHeader.length == 0) { + // read the header from the first line of the file + final CSVRecord nextRecord = this.nextRecord(); + if (nextRecord != null) { + headerRecord = nextRecord.values(); + } + } else { + if (this.format.getSkipHeaderRecord()) { + this.nextRecord(); + } + headerRecord = formatHeader; + } + + // build the name to index mappings + if (headerRecord != null) { + for (int i = 0; i < headerRecord.length; i++) { + final String header = headerRecord[i]; + final boolean containsHeader = header == null ? false : hdrMap.containsKey(header); + final boolean emptyHeader = header == null || header.trim().isEmpty(); + if (containsHeader && (!emptyHeader || !this.format.getAllowMissingColumnNames())) { + throw new IllegalArgumentException("The header contains a duplicate name: \"" + header + + "\" in " + Arrays.toString(headerRecord)); + } + if (header != null) { + hdrMap.put(header, Integer.valueOf(i)); + } + } + } + } + return hdrMap; + } + + /** * Returns the current line number in the input stream. * * <p> @@ -409,11 +516,11 @@ public final class CSVParser implements Iterable<CSVRecord>, Closeable { } /** - * Returns a copy of the header map that iterates in column order. + * Returns a copy of the header map. * <p> * The map keys are column names. The map values are 0-based indices. * </p> - * @return a copy of the header map that iterates in column order. + * @return a copy of the header map. */ public Map<String, Integer> getHeaderMap() { return this.headerMap == null ? null : new LinkedHashMap<>(this.headerMap); @@ -455,53 +562,6 @@ public final class CSVParser implements Iterable<CSVRecord>, Closeable { } /** - * Creates the name to index mapping if the format defines a header. - * - * @return null if the format has no header. - * @throws IOException if there is a problem reading the header or skipping the first record - */ - private Map<String, Integer> createHeaderMap() throws IOException { - Map<String, Integer> hdrMap = null; - final String[] formatHeader = this.format.getHeader(); - if (formatHeader != null) { - hdrMap = this.format.getIgnoreHeaderCase() ? - new TreeMap<>(String.CASE_INSENSITIVE_ORDER) : - new TreeMap<>(); - - String[] headerRecord = null; - if (formatHeader.length == 0) { - // read the header from the first line of the file - final CSVRecord nextRecord = this.nextRecord(); - if (nextRecord != null) { - headerRecord = nextRecord.values(); - } - } else { - if (this.format.getSkipHeaderRecord()) { - this.nextRecord(); - } - headerRecord = formatHeader; - } - - // build the name to index mappings - if (headerRecord != null) { - for (int i = 0; i < headerRecord.length; i++) { - final String header = headerRecord[i]; - final boolean containsHeader = header == null ? false : hdrMap.containsKey(header); - final boolean emptyHeader = header == null || header.trim().isEmpty(); - if (containsHeader && (!emptyHeader || !this.format.getAllowMissingColumnNames())) { - throw new IllegalArgumentException("The header contains a duplicate name: \"" + header - + "\" in " + Arrays.toString(headerRecord)); - } - if (header != null) { - hdrMap.put(header, Integer.valueOf(i)); - } - } - } - } - return hdrMap; - } - - /** * Gets whether this parser is closed. * * @return whether this parser is closed. @@ -527,55 +587,6 @@ public final class CSVParser implements Iterable<CSVRecord>, Closeable { return csvRecordIterator; } - class CSVRecordIterator implements Iterator<CSVRecord> { - private CSVRecord current; - - private CSVRecord getNextRecord() { - try { - return CSVParser.this.nextRecord(); - } catch (final IOException e) { - throw new IllegalStateException( - e.getClass().getSimpleName() + " reading next record: " + e.toString(), e); - } - } - - @Override - public boolean hasNext() { - if (CSVParser.this.isClosed()) { - return false; - } - if (this.current == null) { - this.current = this.getNextRecord(); - } - - return this.current != null; - } - - @Override - public CSVRecord next() { - if (CSVParser.this.isClosed()) { - throw new NoSuchElementException("CSVParser has been closed"); - } - CSVRecord next = this.current; - this.current = null; - - if (next == null) { - // hasNext() wasn't called before - next = this.getNextRecord(); - if (next == null) { - throw new NoSuchElementException("No more CSV records available"); - } - } - - return next; - } - - @Override - public void remove() { - throw new UnsupportedOperationException(); - } - } - /** * Parses the next record from the current point in the stream. * @@ -622,8 +633,8 @@ public final class CSVParser implements Iterable<CSVRecord>, Closeable { if (!this.recordList.isEmpty()) { this.recordNumber++; final String comment = sb == null ? null : sb.toString(); - result = new CSVRecord(this.recordList.toArray(new String[this.recordList.size()]), this.headerMap, comment, - this.recordNumber, startCharPosition); + result = new CSVRecord(this.recordList.toArray(new String[this.recordList.size()]), this.headerMap, + this.headerNames, comment, this.recordNumber, startCharPosition); } return result; } diff --git a/src/main/java/org/apache/commons/csv/CSVRecord.java b/src/main/java/org/apache/commons/csv/CSVRecord.java index 34a3ba2..3984405 100644 --- a/src/main/java/org/apache/commons/csv/CSVRecord.java +++ b/src/main/java/org/apache/commons/csv/CSVRecord.java @@ -18,9 +18,10 @@ package org.apache.commons.csv; import java.io.Serializable; +import java.util.ArrayList; import java.util.Arrays; -import java.util.HashMap; import java.util.Iterator; +import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Map.Entry; @@ -40,7 +41,10 @@ public final class CSVRecord implements Serializable, Iterable<String> { private final String comment; /** The column name to index mapping. */ - private final Map<String, Integer> mapping; + private final Map<String, Integer> headerMap; + + /** The column order to avoid re-computing it. */ + private final List<String> headerNames; /** The record number. */ private final long recordNumber; @@ -48,11 +52,12 @@ public final class CSVRecord implements Serializable, Iterable<String> { /** The values of the record */ private final String[] values; - CSVRecord(final String[] values, final Map<String, Integer> mapping, final String comment, final long recordNumber, - final long characterPosition) { + CSVRecord(final String[] values, final Map<String, Integer> headerMap, List<String> headerNames, final String comment, + final long recordNumber, final long characterPosition) { this.recordNumber = recordNumber; this.values = values != null ? values : EMPTY_STRING_ARRAY; - this.mapping = mapping; + this.headerMap = headerMap; + this.headerNames = headerNames; this.comment = comment; this.characterPosition = characterPosition; } @@ -93,14 +98,10 @@ public final class CSVRecord implements Serializable, Iterable<String> { * @see CSVFormat#withNullString(String) */ public String get(final String name) { - if (mapping == null) { - throw new IllegalStateException( - "No header mapping was specified, the record values can't be accessed by name"); - } - final Integer index = mapping.get(name); + final Integer index = getIndex(name); if (index == null) { throw new IllegalArgumentException(String.format("Mapping for %s not found, expected one of %s", name, - mapping.keySet())); + headerMap.keySet())); } try { return values[index.intValue()]; @@ -134,6 +135,32 @@ public final class CSVRecord implements Serializable, Iterable<String> { } /** + * Returns a copy of the header names that iterates in column order. + * + * @return a copy of the header names that iterates in column order. + * @since 1.7 + */ + public List<String> getHeaderNames() { + return new ArrayList<>(headerNames); + } + + Integer getIndex(final String name) { + if (headerMap == null) { + throw new IllegalStateException( + "No header mapping was specified, the record values can't be accessed by name"); + } + return headerMap.get(name); + } + + String getName(final int index) { + if (headerMap == null) { + throw new IllegalStateException( + "No header mapping was specified, the record values can't be accessed by name"); + } + return headerNames.get(index); + } + + /** * Returns the number of this record in the parsed CSV file. * * <p> @@ -149,20 +176,6 @@ public final class CSVRecord implements Serializable, Iterable<String> { } /** - * Tells whether the record size matches the header size. - * - * <p> - * Returns true if the sizes for this record match and false if not. Some programs can export files that fail this - * test but still produce parsable files. - * </p> - * - * @return true of this record is valid, false if not - */ - public boolean isConsistent() { - return mapping == null || mapping.size() == values.length; - } - - /** * Checks whether this record has a comment, false otherwise. * Note that comments are attached to the following record. * If there is no following record (i.e. the comment is at EOF) @@ -176,6 +189,20 @@ public final class CSVRecord implements Serializable, Iterable<String> { } /** + * Tells whether the record size matches the header size. + * + * <p> + * Returns true if the sizes for this record match and false if not. Some programs can export files that fail this + * test but still produce parsable files. + * </p> + * + * @return true of this record is valid, false if not + */ + public boolean isConsistent() { + return headerMap == null || headerMap.size() == values.length; + } + + /** * Checks whether a given column is mapped, i.e. its name has been defined to the parser. * * @param name @@ -183,7 +210,7 @@ public final class CSVRecord implements Serializable, Iterable<String> { * @return whether a given column is mapped. */ public boolean isMapped(final String name) { - return mapping != null && mapping.containsKey(name); + return headerMap != null && headerMap.containsKey(name); } /** @@ -194,7 +221,7 @@ public final class CSVRecord implements Serializable, Iterable<String> { * @return whether a given columns is mapped and has a value */ public boolean isSet(final String name) { - return isMapped(name) && mapping.get(name).intValue() < values.length; + return isMapped(name) && headerMap.get(name).intValue() < values.length; } /** @@ -215,10 +242,10 @@ public final class CSVRecord implements Serializable, Iterable<String> { * @return the given map. */ <M extends Map<String, String>> M putIn(final M map) { - if (mapping == null) { + if (headerMap == null) { return map; } - for (final Entry<String, Integer> entry : mapping.entrySet()) { + for (final Entry<String, Integer> entry : headerMap.entrySet()) { final int col = entry.getValue().intValue(); if (col < values.length) { map.put(entry.getKey(), values[col]); @@ -253,7 +280,7 @@ public final class CSVRecord implements Serializable, Iterable<String> { * @return A new Map. The map is empty if the record has no headers. */ public Map<String, String> toMap() { - return putIn(new HashMap<String, String>(values.length)); + return putIn(new LinkedHashMap<String, String>(values.length)); } /** @@ -264,7 +291,7 @@ public final class CSVRecord implements Serializable, Iterable<String> { */ @Override public String toString() { - return "CSVRecord [comment=" + comment + ", mapping=" + mapping + + return "CSVRecord [comment=" + comment + ", mapping=" + headerMap + ", recordNumber=" + recordNumber + ", values=" + Arrays.toString(values) + "]"; } diff --git a/src/test/java/org/apache/commons/csv/CSVRecordTest.java b/src/test/java/org/apache/commons/csv/CSVRecordTest.java index 6347cc5..dab4d55 100644 --- a/src/test/java/org/apache/commons/csv/CSVRecordTest.java +++ b/src/test/java/org/apache/commons/csv/CSVRecordTest.java @@ -26,6 +26,7 @@ import java.io.IOException; import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; +import java.util.List; import java.util.Map; import java.util.TreeMap; import java.util.concurrent.ConcurrentHashMap; @@ -40,17 +41,17 @@ public class CSVRecordTest { private String[] values; private CSVRecord record, recordWithHeader; - private Map<String, Integer> header; + private Map<String, Integer> headerMap; @Before public void setUp() throws Exception { values = new String[] { "A", "B", "C" }; - record = new CSVRecord(values, null, null, 0, -1); - header = new HashMap<>(); - header.put("first", Integer.valueOf(0)); - header.put("second", Integer.valueOf(1)); - header.put("third", Integer.valueOf(2)); - recordWithHeader = new CSVRecord(values, header, null, 0, -1); + record = new CSVRecord(values, null, null, null, 0, -1); + headerMap = new HashMap<>(); + headerMap.put("first", Integer.valueOf(0)); + headerMap.put("second", Integer.valueOf(1)); + headerMap.put("third", Integer.valueOf(2)); + recordWithHeader = new CSVRecord(values, headerMap, CSVParser.createHeaderNames(headerMap), null, 0, -1); } @Test @@ -69,7 +70,7 @@ public class CSVRecordTest { @Test(expected = IllegalArgumentException.class) public void testGetStringInconsistentRecord() { - header.put("fourth", Integer.valueOf(4)); + headerMap.put("fourth", Integer.valueOf(4)); recordWithHeader.get("fourth"); } @@ -103,7 +104,7 @@ public class CSVRecordTest { assertTrue(record.isConsistent()); assertTrue(recordWithHeader.isConsistent()); - header.put("fourth", Integer.valueOf(4)); + headerMap.put("fourth", Integer.valueOf(4)); assertFalse(recordWithHeader.isConsistent()); } @@ -162,6 +163,18 @@ public class CSVRecordTest { } @Test + public void testGetHeaderNames() { + final Map<String, String> nameValueMap = this.recordWithHeader.toMap(); + final List<String> headerNames = this.recordWithHeader.getHeaderNames(); + Assert.assertEquals(nameValueMap.size(), headerNames.size()); + for (int i = 0; i < headerNames.size(); i++) { + String name = headerNames.get(i); + Assert.assertEquals(i, this.recordWithHeader.getIndex(name).intValue()); + Assert.assertEquals(name, this.recordWithHeader.getName(i)); + } + } + + @Test public void testToMapWithShortRecord() throws Exception { try (final CSVParser parser = CSVParser.parse("a,b", CSVFormat.DEFAULT.withHeader("A", "B", "C"))) { final CSVRecord shortRec = parser.iterator().next();