This is an automated email from the ASF dual-hosted git repository. ggregory pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/commons-csv.git
The following commit(s) were added to refs/heads/master by this push: new ef51c1e [CSV-239] Cannot get headers in column order from CSVRecord. ef51c1e is described below commit ef51c1ecad91aad5527b8e1568e2a17748039be8 Author: Gary Gregory <gardgreg...@gmail.com> AuthorDate: Sun May 19 19:49:23 2019 -0400 [CSV-239] Cannot get headers in column order from CSVRecord. - Redo so that the record tracks its source parser where call sites can find metadata. This avoids adding slots to the record class itself. --- .../java/org/apache/commons/csv/CSVParser.java | 43 +++++++++++--- .../java/org/apache/commons/csv/CSVRecord.java | 67 +++++++++------------- .../java/org/apache/commons/csv/CSVParserTest.java | 34 +++++++---- .../org/apache/commons/csv/CSVPrinterTest.java | 2 +- .../java/org/apache/commons/csv/CSVRecordTest.java | 52 +++++++++-------- .../org/apache/commons/csv/PerformanceTest.java | 4 ++ 6 files changed, 121 insertions(+), 81 deletions(-) diff --git a/src/main/java/org/apache/commons/csv/CSVParser.java b/src/main/java/org/apache/commons/csv/CSVParser.java index 6eb97f8..7fd8292 100644 --- a/src/main/java/org/apache/commons/csv/CSVParser.java +++ b/src/main/java/org/apache/commons/csv/CSVParser.java @@ -340,7 +340,7 @@ public final class CSVParser implements Iterable<CSVRecord>, Closeable { /** A mapping of column names to column indices */ private final Map<String, Integer> headerMap; - /** Preserve the column order to avoid re-computing it. */ + /** The column order to avoid re-computing it. */ private final List<String> headerNames; private final Lexer lexer; @@ -444,6 +444,12 @@ public final class CSVParser implements Iterable<CSVRecord>, Closeable { } } + private Map<String, Integer> createEmptyHeaderMap() { + return this.format.getIgnoreHeaderCase() ? + new TreeMap<>(String.CASE_INSENSITIVE_ORDER) : + new TreeMap<>(); + } + /** * Creates the name to index mapping if the format defines a header. * @@ -454,10 +460,7 @@ public final class CSVParser implements Iterable<CSVRecord>, Closeable { Map<String, Integer> hdrMap = null; final String[] formatHeader = this.format.getHeader(); if (formatHeader != null) { - hdrMap = this.format.getIgnoreHeaderCase() ? - new TreeMap<>(String.CASE_INSENSITIVE_ORDER) : - new TreeMap<>(); - + hdrMap = createEmptyHeaderMap(); String[] headerRecord = null; if (formatHeader.length == 0) { // read the header from the first line of the file @@ -523,7 +526,31 @@ public final class CSVParser implements Iterable<CSVRecord>, Closeable { * @return a copy of the header map. */ public Map<String, Integer> getHeaderMap() { - return this.headerMap == null ? null : new LinkedHashMap<>(this.headerMap); + if (this.headerMap == null) { + return null; + } + final Map<String, Integer> map = createEmptyHeaderMap(); + map.putAll(this.headerMap); + return map; + } + + /** + * Returns the header map. + * + * @return the header map. + */ + Map<String, Integer> getHeaderMapRaw() { + return this.headerMap; + } + + /** + * Returns a copy of the header names that iterates in column order. + * + * @return a copy of the header names that iterates in column order. + * @since 1.7 + */ + public List<String> getHeaderNames() { + return new ArrayList<>(headerNames); } /** @@ -633,8 +660,8 @@ public final class CSVParser implements Iterable<CSVRecord>, Closeable { if (!this.recordList.isEmpty()) { this.recordNumber++; final String comment = sb == null ? null : sb.toString(); - result = new CSVRecord(this.recordList.toArray(new String[this.recordList.size()]), this.headerMap, - this.headerNames, comment, this.recordNumber, startCharPosition); + result = new CSVRecord(this, this.recordList.toArray(new String[this.recordList.size()]), + comment, this.recordNumber, startCharPosition); } return result; } diff --git a/src/main/java/org/apache/commons/csv/CSVRecord.java b/src/main/java/org/apache/commons/csv/CSVRecord.java index 3984405..a88e3cb 100644 --- a/src/main/java/org/apache/commons/csv/CSVRecord.java +++ b/src/main/java/org/apache/commons/csv/CSVRecord.java @@ -18,7 +18,6 @@ package org.apache.commons.csv; import java.io.Serializable; -import java.util.ArrayList; import java.util.Arrays; import java.util.Iterator; import java.util.LinkedHashMap; @@ -40,24 +39,20 @@ public final class CSVRecord implements Serializable, Iterable<String> { /** The accumulated comments (if any) */ private final String comment; - /** The column name to index mapping. */ - private final Map<String, Integer> headerMap; - - /** The column order to avoid re-computing it. */ - private final List<String> headerNames; - /** The record number. */ private final long recordNumber; /** The values of the record */ private final String[] values; - CSVRecord(final String[] values, final Map<String, Integer> headerMap, List<String> headerNames, final String comment, - final long recordNumber, final long characterPosition) { + /** The parser that originates this record. */ + private final CSVParser parser; + + CSVRecord(final CSVParser parser, final String[] values, final String comment, final long recordNumber, + final long characterPosition) { this.recordNumber = recordNumber; this.values = values != null ? values : EMPTY_STRING_ARRAY; - this.headerMap = headerMap; - this.headerNames = headerNames; + this.parser = parser; this.comment = comment; this.characterPosition = characterPosition; } @@ -84,6 +79,10 @@ public final class CSVRecord implements Serializable, Iterable<String> { return values[i]; } + private Map<String, Integer> getHeaderMapRaw() { + return parser.getHeaderMapRaw(); + } + /** * Returns a value by name. * @@ -98,7 +97,12 @@ public final class CSVRecord implements Serializable, Iterable<String> { * @see CSVFormat#withNullString(String) */ public String get(final String name) { - final Integer index = getIndex(name); + final Map<String, Integer> headerMap = getHeaderMapRaw(); + if (headerMap == null) { + throw new IllegalStateException( + "No header mapping was specified, the record values can't be accessed by name"); + } + final Integer index = headerMap.get(name); if (index == null) { throw new IllegalArgumentException(String.format("Mapping for %s not found, expected one of %s", name, headerMap.keySet())); @@ -135,29 +139,13 @@ public final class CSVRecord implements Serializable, Iterable<String> { } /** - * Returns a copy of the header names that iterates in column order. - * - * @return a copy of the header names that iterates in column order. + * Returns the parser. + * + * @return the parser. * @since 1.7 */ - public List<String> getHeaderNames() { - return new ArrayList<>(headerNames); - } - - Integer getIndex(final String name) { - if (headerMap == null) { - throw new IllegalStateException( - "No header mapping was specified, the record values can't be accessed by name"); - } - return headerMap.get(name); - } - - String getName(final int index) { - if (headerMap == null) { - throw new IllegalStateException( - "No header mapping was specified, the record values can't be accessed by name"); - } - return headerNames.get(index); + public CSVParser getCSVParser() { + return parser; } /** @@ -199,6 +187,7 @@ public final class CSVRecord implements Serializable, Iterable<String> { * @return true of this record is valid, false if not */ public boolean isConsistent() { + final Map<String, Integer> headerMap = getHeaderMapRaw(); return headerMap == null || headerMap.size() == values.length; } @@ -210,6 +199,7 @@ public final class CSVRecord implements Serializable, Iterable<String> { * @return whether a given column is mapped. */ public boolean isMapped(final String name) { + final Map<String, Integer> headerMap = getHeaderMapRaw(); return headerMap != null && headerMap.containsKey(name); } @@ -221,7 +211,7 @@ public final class CSVRecord implements Serializable, Iterable<String> { * @return whether a given columns is mapped and has a value */ public boolean isSet(final String name) { - return isMapped(name) && headerMap.get(name).intValue() < values.length; + return isMapped(name) && getHeaderMapRaw().get(name).intValue() < values.length; } /** @@ -242,10 +232,10 @@ public final class CSVRecord implements Serializable, Iterable<String> { * @return the given map. */ <M extends Map<String, String>> M putIn(final M map) { - if (headerMap == null) { + if (getHeaderMapRaw() == null) { return map; } - for (final Entry<String, Integer> entry : headerMap.entrySet()) { + for (final Entry<String, Integer> entry : getHeaderMapRaw().entrySet()) { final int col = entry.getValue().intValue(); if (col < values.length) { map.put(entry.getKey(), values[col]); @@ -291,9 +281,8 @@ public final class CSVRecord implements Serializable, Iterable<String> { */ @Override public String toString() { - return "CSVRecord [comment=" + comment + ", mapping=" + headerMap + - ", recordNumber=" + recordNumber + ", values=" + - Arrays.toString(values) + "]"; + return "CSVRecord [comment='" + comment + "', recordNumber=" + recordNumber + ", values=" + + Arrays.toString(values) + "]"; } String[] values() { diff --git a/src/test/java/org/apache/commons/csv/CSVParserTest.java b/src/test/java/org/apache/commons/csv/CSVParserTest.java index a5539f6..530df93 100644 --- a/src/test/java/org/apache/commons/csv/CSVParserTest.java +++ b/src/test/java/org/apache/commons/csv/CSVParserTest.java @@ -491,6 +491,20 @@ public class CSVParserTest { } @Test + public void testGetHeaderNames() throws IOException { + try (final CSVParser parser = CSVParser.parse("a,b,c\n1,2,3\nx,y,z", + CSVFormat.DEFAULT.withHeader("A", "B", "C"))) { + final Map<String, Integer> nameIndexMap = parser.getHeaderMap(); + final List<String> headerNames = parser.getHeaderNames(); + Assert.assertEquals(nameIndexMap.size(), headerNames.size()); + for (int i = 0; i < headerNames.size(); i++) { + final String name = headerNames.get(i); + Assert.assertEquals(i, nameIndexMap.get(name).intValue()); + } + } + } + + @Test public void testGetLine() throws IOException { try (final CSVParser parser = CSVParser.parse(CSV_INPUT, CSVFormat.DEFAULT.withIgnoreSurroundingSpaces())) { for (final String[] re : RESULT) { @@ -678,9 +692,9 @@ public class CSVParserTest { @Test public void testIgnoreCaseHeaderMapping() throws Exception { - final Reader in = new StringReader("1,2,3"); + final Reader reader = new StringReader("1,2,3"); final Iterator<CSVRecord> records = CSVFormat.DEFAULT.withHeader("One", "TWO", "three").withIgnoreHeaderCase() - .parse(in).iterator(); + .parse(reader).iterator(); final CSVRecord record = records.next(); assertEquals("1", record.get("one")); assertEquals("2", record.get("two")); @@ -742,10 +756,10 @@ public class CSVParserTest { // Iterator hasNext() shouldn't break sequence CSVParser parser = CSVFormat.DEFAULT.parse(new StringReader(fiveRows)); int recordNumber = 0; - Iterator<CSVRecord> iter = parser.iterator(); + final Iterator<CSVRecord> iter = parser.iterator(); recordNumber = 0; while (iter.hasNext()) { - CSVRecord record = iter.next(); + final CSVRecord record = iter.next(); recordNumber++; assertEquals(String.valueOf(recordNumber), record.get(0)); if (recordNumber >= 2) { @@ -754,7 +768,7 @@ public class CSVParserTest { } iter.hasNext(); while (iter.hasNext()) { - CSVRecord record = iter.next(); + final CSVRecord record = iter.next(); recordNumber++; assertEquals(String.valueOf(recordNumber), record.get(0)); } @@ -762,14 +776,14 @@ public class CSVParserTest { // Consecutive enhanced for loops shouldn't break sequence parser = CSVFormat.DEFAULT.parse(new StringReader(fiveRows)); recordNumber = 0; - for (CSVRecord record : parser) { + for (final CSVRecord record : parser) { recordNumber++; assertEquals(String.valueOf(recordNumber), record.get(0)); if (recordNumber >= 2) { break; } } - for (CSVRecord record : parser) { + for (final CSVRecord record : parser) { recordNumber++; assertEquals(String.valueOf(recordNumber), record.get(0)); } @@ -777,7 +791,7 @@ public class CSVParserTest { // Consecutive enhanced for loops with hasNext() peeking shouldn't break sequence parser = CSVFormat.DEFAULT.parse(new StringReader(fiveRows)); recordNumber = 0; - for (CSVRecord record : parser) { + for (final CSVRecord record : parser) { recordNumber++; assertEquals(String.valueOf(recordNumber), record.get(0)); if (recordNumber >= 2) { @@ -785,7 +799,7 @@ public class CSVParserTest { } } parser.iterator().hasNext(); - for (CSVRecord record : parser) { + for (final CSVRecord record : parser) { recordNumber++; assertEquals(String.valueOf(recordNumber), record.get(0)); } @@ -799,7 +813,7 @@ public class CSVParserTest { assertEquals(4, records.size()); } } - + @Test public void testMappedButNotSetAsOutlook2007ContactExport() throws Exception { final Reader in = new StringReader("a,b,c\n1,2\nx,y,z"); diff --git a/src/test/java/org/apache/commons/csv/CSVPrinterTest.java b/src/test/java/org/apache/commons/csv/CSVPrinterTest.java index 6a0e3af..7fe6c5c 100644 --- a/src/test/java/org/apache/commons/csv/CSVPrinterTest.java +++ b/src/test/java/org/apache/commons/csv/CSVPrinterTest.java @@ -82,7 +82,7 @@ public class CSVPrinterTest { } private final String recordSeparator = CSVFormat.DEFAULT.getRecordSeparator(); - private String longText2; + private String longText2; private void doOneRandom(final CSVFormat format) throws Exception { final Random r = new Random(); diff --git a/src/test/java/org/apache/commons/csv/CSVRecordTest.java b/src/test/java/org/apache/commons/csv/CSVRecordTest.java index dab4d55..ea4b797 100644 --- a/src/test/java/org/apache/commons/csv/CSVRecordTest.java +++ b/src/test/java/org/apache/commons/csv/CSVRecordTest.java @@ -23,21 +23,23 @@ import static org.junit.Assert.assertNull; import static org.junit.Assert.assertTrue; import java.io.IOException; +import java.io.StringReader; import java.util.ArrayList; import java.util.Collections; -import java.util.HashMap; -import java.util.List; import java.util.Map; import java.util.TreeMap; import java.util.concurrent.ConcurrentHashMap; +import org.apache.commons.lang3.StringUtils; import org.junit.Assert; import org.junit.Before; import org.junit.Test; public class CSVRecordTest { - private enum EnumFixture { UNKNOWN_COLUMN } + private enum EnumFixture { + UNKNOWN_COLUMN + } private String[] values; private CSVRecord record, recordWithHeader; @@ -46,12 +48,15 @@ public class CSVRecordTest { @Before public void setUp() throws Exception { values = new String[] { "A", "B", "C" }; - record = new CSVRecord(values, null, null, null, 0, -1); - headerMap = new HashMap<>(); - headerMap.put("first", Integer.valueOf(0)); - headerMap.put("second", Integer.valueOf(1)); - headerMap.put("third", Integer.valueOf(2)); - recordWithHeader = new CSVRecord(values, headerMap, CSVParser.createHeaderNames(headerMap), null, 0, -1); + final String rowData = StringUtils.join(values, ','); + try (final CSVParser parser = CSVFormat.DEFAULT.parse(new StringReader(rowData))) { + record = parser.iterator().next(); + } + final String[] headers = { "first", "second", "third" }; + try (final CSVParser parser = CSVFormat.DEFAULT.withHeader(headers).parse(new StringReader(rowData))) { + recordWithHeader = parser.iterator().next(); + headerMap = parser.getHeaderMap(); + } } @Test @@ -103,9 +108,22 @@ public class CSVRecordTest { public void testIsConsistent() { assertTrue(record.isConsistent()); assertTrue(recordWithHeader.isConsistent()); + final Map<String, Integer> map = recordWithHeader.getCSVParser().getHeaderMap(); + map.put("fourth", Integer.valueOf(4)); + // We are working on a copy of the map, so the record should still be OK. + assertTrue(recordWithHeader.isConsistent()); + } - headerMap.put("fourth", Integer.valueOf(4)); - assertFalse(recordWithHeader.isConsistent()); + @Test + public void testIsInconsistent() throws IOException { + final String[] headers = { "first", "second", "third" }; + final String rowData = StringUtils.join(values, ','); + try (final CSVParser parser = CSVFormat.DEFAULT.withHeader(headers).parse(new StringReader(rowData))) { + final Map<String, Integer> map = parser.getHeaderMapRaw(); + final CSVRecord record1 = parser.iterator().next(); + map.put("fourth", Integer.valueOf(4)); + assertFalse(record1.isConsistent()); + } } @Test @@ -163,18 +181,6 @@ public class CSVRecordTest { } @Test - public void testGetHeaderNames() { - final Map<String, String> nameValueMap = this.recordWithHeader.toMap(); - final List<String> headerNames = this.recordWithHeader.getHeaderNames(); - Assert.assertEquals(nameValueMap.size(), headerNames.size()); - for (int i = 0; i < headerNames.size(); i++) { - String name = headerNames.get(i); - Assert.assertEquals(i, this.recordWithHeader.getIndex(name).intValue()); - Assert.assertEquals(name, this.recordWithHeader.getName(i)); - } - } - - @Test public void testToMapWithShortRecord() throws Exception { try (final CSVParser parser = CSVParser.parse("a,b", CSVFormat.DEFAULT.withHeader("A", "B", "C"))) { final CSVRecord shortRec = parser.iterator().next(); diff --git a/src/test/java/org/apache/commons/csv/PerformanceTest.java b/src/test/java/org/apache/commons/csv/PerformanceTest.java index 9ab7071..1727b79 100644 --- a/src/test/java/org/apache/commons/csv/PerformanceTest.java +++ b/src/test/java/org/apache/commons/csv/PerformanceTest.java @@ -246,6 +246,7 @@ public class PerformanceTest { private static void testParseCommonsCSV() throws Exception { testParser("CSV", new CSVParserFactory() { + @Override public CSVParser createParser() throws IOException { return new CSVParser(createReader(), format); } @@ -254,6 +255,7 @@ public class PerformanceTest { private static void testParsePath() throws Exception { testParser("CSV-PATH", new CSVParserFactory() { + @Override public CSVParser createParser() throws IOException { return CSVParser.parse(Files.newInputStream(Paths.get(BIG_FILE.toURI())), StandardCharsets.ISO_8859_1, format); } @@ -262,6 +264,7 @@ public class PerformanceTest { private static void testParsePathDoubleBuffering() throws Exception { testParser("CSV-PATH-DB", new CSVParserFactory() { + @Override public CSVParser createParser() throws IOException { return CSVParser.parse(Files.newBufferedReader(Paths.get(BIG_FILE.toURI()), StandardCharsets.ISO_8859_1), format); } @@ -270,6 +273,7 @@ public class PerformanceTest { private static void testParseURL() throws Exception { testParser("CSV-URL", new CSVParserFactory() { + @Override public CSVParser createParser() throws IOException { //NOTE: URL will always return a BufferedInputStream. return CSVParser.parse(BIG_FILE.toURI().toURL(), StandardCharsets.ISO_8859_1, format);