This is an automated email from the ASF dual-hosted git repository.

ggregory pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/commons-csv.git


The following commit(s) were added to refs/heads/master by this push:
     new 4fdfc59  CSV-264: Added DuplicateHeaderMode for flexibility with 
header strictness. (#114)
4fdfc59 is described below

commit 4fdfc59b1cb668cb110b074742a1e760819f719b
Author: Seth Falco <s...@falco.fun>
AuthorDate: Sat Feb 19 18:07:44 2022 +0100

    CSV-264: Added DuplicateHeaderMode for flexibility with header strictness. 
(#114)
    
    * csv-264: added duplicateheadermode for flexibility with header strictness
    
    * fix: use assertthrows and update docs
---
 .../java/org/apache/commons/csv/CSVFormat.java     | 68 ++++++++++++-----
 .../java/org/apache/commons/csv/CSVParser.java     | 10 ++-
 .../apache/commons/csv/DuplicateHeaderMode.java    | 42 ++++++++++
 .../checkstyle/checkstyle-suppressions.xml         |  2 +-
 .../java/org/apache/commons/csv/CSVFormatTest.java | 13 ++++
 .../apache/commons/csv/issues/JiraCsv264Test.java  | 89 ++++++++++++++++++++++
 6 files changed, 200 insertions(+), 24 deletions(-)

diff --git a/src/main/java/org/apache/commons/csv/CSVFormat.java 
b/src/main/java/org/apache/commons/csv/CSVFormat.java
index 6bf3ed1..77a009d 100644
--- a/src/main/java/org/apache/commons/csv/CSVFormat.java
+++ b/src/main/java/org/apache/commons/csv/CSVFormat.java
@@ -188,8 +188,6 @@ public final class CSVFormat implements Serializable {
             return new Builder(csvFormat);
         }
 
-        private boolean allowDuplicateHeaderNames;
-
         private boolean allowMissingColumnNames;
 
         private boolean autoFlush;
@@ -198,6 +196,8 @@ public final class CSVFormat implements Serializable {
 
         private String delimiter;
 
+        private DuplicateHeaderMode duplicateHeaderMode;
+
         private Character escapeCharacter;
 
         private String[] headerComments;
@@ -245,7 +245,7 @@ public final class CSVFormat implements Serializable {
             this.trim = csvFormat.trim;
             this.autoFlush = csvFormat.autoFlush;
             this.quotedNullString = csvFormat.quotedNullString;
-            this.allowDuplicateHeaderNames = 
csvFormat.allowDuplicateHeaderNames;
+            this.duplicateHeaderMode = csvFormat.duplicateHeaderMode;
         }
 
         /**
@@ -262,13 +262,27 @@ public final class CSVFormat implements Serializable {
          *
          * @param allowDuplicateHeaderNames the duplicate header names 
behavior, true to allow, false to disallow.
          * @return This instance.
+         * @deprecated Use {@link 
#setDuplicateHeaderMode(DuplicateHeaderMode)}.
          */
+        @Deprecated
         public Builder setAllowDuplicateHeaderNames(final boolean 
allowDuplicateHeaderNames) {
-            this.allowDuplicateHeaderNames = allowDuplicateHeaderNames;
+            final DuplicateHeaderMode mode = allowDuplicateHeaderNames ? 
DuplicateHeaderMode.ALLOW_ALL : DuplicateHeaderMode.ALLOW_EMPTY;
+            setDuplicateHeaderMode(mode);
             return this;
         }
 
         /**
+         * Sets the duplicate header names behavior.
+         *
+         * @param duplicateHeaderMode the duplicate header names behavior
+         * @return This instance.
+         */
+        public Builder setDuplicateHeaderMode(final DuplicateHeaderMode 
duplicateHeaderMode) {
+          this.duplicateHeaderMode = duplicateHeaderMode;
+          return this;
+        }
+
+        /**
          * Sets the missing column names behavior, {@code true} to allow 
missing column names in the header line, {@code false} to cause an
          * {@link IllegalArgumentException} to be thrown.
          *
@@ -760,7 +774,8 @@ public final class CSVFormat implements Serializable {
     }
 
     /**
-     * Standard Comma Separated Value format, as for {@link #RFC4180} but 
allowing empty lines.
+     * Standard Comma Separated Value format, as for {@link #RFC4180} but 
allowing
+     * empty lines.
      *
      * <p>
      * The {@link Builder} settings are:
@@ -770,13 +785,13 @@ public final class CSVFormat implements Serializable {
      * <li>{@code setQuote('"')}</li>
      * <li>{@code setRecordSeparator("\r\n")}</li>
      * <li>{@code setIgnoreEmptyLines(true)}</li>
-     * <li>{@code setAllowDuplicateHeaderNames(true)}</li>
+     * <li>{@code setDuplicateHeaderMode(DuplicateHeaderMode.ALLOW_ALL)}</li>
      * </ul>
      *
      * @see Predefined#Default
      */
     public static final CSVFormat DEFAULT = new CSVFormat(COMMA, 
DOUBLE_QUOTE_CHAR, null, null, null, false, true, CRLF, null, null, null, 
false, false, false,
-            false, false, false, true);
+            false, false, false, DuplicateHeaderMode.ALLOW_ALL);
 
     /**
      * Excel file format (using a comma as the value delimiter). Note that the 
actual value delimiter used by Excel is locale dependent, it might be necessary
@@ -799,7 +814,7 @@ public final class CSVFormat implements Serializable {
      * <li>{@code setRecordSeparator("\r\n")}</li>
      * <li>{@code setIgnoreEmptyLines(false)}</li>
      * <li>{@code setAllowMissingColumnNames(true)}</li>
-     * <li>{@code setAllowDuplicateHeaderNames(true)}</li>
+     * <li>{@code setDuplicateHeaderMode(DuplicateHeaderMode.ALLOW_ALL)}</li>
      * </ul>
      * <p>
      * Note: This is currently like {@link #RFC4180} plus {@link 
Builder#setAllowMissingColumnNames(boolean) 
Builder#setAllowMissingColumnNames(true)} and
@@ -1220,7 +1235,7 @@ public final class CSVFormat implements Serializable {
      */
     public static CSVFormat newFormat(final char delimiter) {
         return new CSVFormat(String.valueOf(delimiter), null, null, null, 
null, false, false, null, null, null, null, false, false, false, false, false, 
false,
-                true);
+                DuplicateHeaderMode.ALLOW_ALL);
     }
 
     static String[] toStringArray(final Object[] values) {
@@ -1262,7 +1277,7 @@ public final class CSVFormat implements Serializable {
         return CSVFormat.Predefined.valueOf(format).getFormat();
     }
 
-    private final boolean allowDuplicateHeaderNames;
+    private final DuplicateHeaderMode duplicateHeaderMode;
 
     private final boolean allowMissingColumnNames;
 
@@ -1319,7 +1334,7 @@ public final class CSVFormat implements Serializable {
         this.trim = builder.trim;
         this.autoFlush = builder.autoFlush;
         this.quotedNullString = builder.quotedNullString;
-        this.allowDuplicateHeaderNames = builder.allowDuplicateHeaderNames;
+        this.duplicateHeaderMode = builder.duplicateHeaderMode;
         validate();
     }
 
@@ -1343,14 +1358,14 @@ public final class CSVFormat implements Serializable {
      * @param trim                    TODO Doc me.
      * @param trailingDelimiter       TODO Doc me.
      * @param autoFlush               TODO Doc me.
-     * @param allowDuplicateHeaderNames TODO Doc me.
+     * @param duplicateHeaderMode     the behavior when handling duplicate 
headers
      * @throws IllegalArgumentException if the delimiter is a line break 
character.
      */
     private CSVFormat(final String delimiter, final Character quoteChar, final 
QuoteMode quoteMode, final Character commentStart, final Character escape,
             final boolean ignoreSurroundingSpaces, final boolean 
ignoreEmptyLines, final String recordSeparator, final String nullString,
             final Object[] headerComments, final String[] header, final 
boolean skipHeaderRecord, final boolean allowMissingColumnNames,
             final boolean ignoreHeaderCase, final boolean trim, final boolean 
trailingDelimiter, final boolean autoFlush,
-            final boolean allowDuplicateHeaderNames) {
+            final DuplicateHeaderMode duplicateHeaderMode) {
         this.delimiter = delimiter;
         this.quoteCharacter = quoteChar;
         this.quoteMode = quoteMode;
@@ -1369,7 +1384,7 @@ public final class CSVFormat implements Serializable {
         this.trim = trim;
         this.autoFlush = autoFlush;
         this.quotedNullString = quoteCharacter + nullString + quoteCharacter;
-        this.allowDuplicateHeaderNames = allowDuplicateHeaderNames;
+        this.duplicateHeaderMode = duplicateHeaderMode;
         validate();
     }
 
@@ -1416,7 +1431,7 @@ public final class CSVFormat implements Serializable {
             return false;
         }
         final CSVFormat other = (CSVFormat) obj;
-        return allowDuplicateHeaderNames == other.allowDuplicateHeaderNames && 
allowMissingColumnNames == other.allowMissingColumnNames &&
+        return duplicateHeaderMode == other.duplicateHeaderMode && 
allowMissingColumnNames == other.allowMissingColumnNames &&
                 autoFlush == other.autoFlush && Objects.equals(commentMarker, 
other.commentMarker) && Objects.equals(delimiter, other.delimiter) &&
                 Objects.equals(escapeCharacter, other.escapeCharacter) && 
Arrays.equals(header, other.header) &&
                 Arrays.equals(headerComments, other.headerComments) && 
ignoreEmptyLines == other.ignoreEmptyLines &&
@@ -1450,9 +1465,21 @@ public final class CSVFormat implements Serializable {
      *
      * @return whether duplicate header names are allowed
      * @since 1.7
+     * @deprecated Use {@link #getDuplicateHeaderMode()}.
      */
+    @Deprecated
     public boolean getAllowDuplicateHeaderNames() {
-        return allowDuplicateHeaderNames;
+        return duplicateHeaderMode == DuplicateHeaderMode.ALLOW_ALL;
+    }
+
+    /**
+     * Gets how duplicate headers are handled.
+     *
+     * @return if duplicate header values are allowed, allowed conditionally, 
or disallowed.
+     * @since 1.9.0
+     */
+    public DuplicateHeaderMode getDuplicateHeaderMode() {
+        return duplicateHeaderMode;
     }
 
     /**
@@ -1633,7 +1660,7 @@ public final class CSVFormat implements Serializable {
         int result = 1;
         result = prime * result + Arrays.hashCode(header);
         result = prime * result + Arrays.hashCode(headerComments);
-        return prime * result + Objects.hash(allowDuplicateHeaderNames, 
allowMissingColumnNames, autoFlush, commentMarker, delimiter, escapeCharacter,
+        return prime * result + Objects.hash(duplicateHeaderMode, 
allowMissingColumnNames, autoFlush, commentMarker, delimiter, escapeCharacter,
                 ignoreEmptyLines, ignoreHeaderCase, ignoreSurroundingSpaces, 
nullString, quoteCharacter, quoteMode, quotedNullString, recordSeparator,
                 skipHeaderRecord, trailingDelimiter, trim);
     }
@@ -2235,7 +2262,7 @@ public final class CSVFormat implements Serializable {
         }
 
         // validate header
-        if (header != null && !allowDuplicateHeaderNames) {
+        if (header != null && duplicateHeaderMode != 
DuplicateHeaderMode.ALLOW_ALL) {
             final Set<String> dupCheck = new HashSet<>();
             for (final String hdr : header) {
                 if (!dupCheck.add(hdr)) {
@@ -2254,7 +2281,7 @@ public final class CSVFormat implements Serializable {
      */
     @Deprecated
     public CSVFormat withAllowDuplicateHeaderNames() {
-        return builder().setAllowDuplicateHeaderNames(true).build();
+        return 
builder().setDuplicateHeaderMode(DuplicateHeaderMode.ALLOW_ALL).build();
     }
 
     /**
@@ -2267,7 +2294,8 @@ public final class CSVFormat implements Serializable {
      */
     @Deprecated
     public CSVFormat withAllowDuplicateHeaderNames(final boolean 
allowDuplicateHeaderNames) {
-        return 
builder().setAllowDuplicateHeaderNames(allowDuplicateHeaderNames).build();
+        final DuplicateHeaderMode mode = allowDuplicateHeaderNames ? 
DuplicateHeaderMode.ALLOW_ALL : DuplicateHeaderMode.ALLOW_EMPTY;
+        return builder().setDuplicateHeaderMode(mode).build();
     }
 
     /**
diff --git a/src/main/java/org/apache/commons/csv/CSVParser.java 
b/src/main/java/org/apache/commons/csv/CSVParser.java
index 60ecc73..58cdb14 100644
--- a/src/main/java/org/apache/commons/csv/CSVParser.java
+++ b/src/main/java/org/apache/commons/csv/CSVParser.java
@@ -497,12 +497,16 @@ public final class CSVParser implements 
Iterable<CSVRecord>, Closeable {
                         throw new IllegalArgumentException(
                             "A header name is missing in " + 
Arrays.toString(headerRecord));
                     }
-                    // Note: This will always allow a duplicate header if the 
header is empty
+
                     final boolean containsHeader = header != null && 
hdrMap.containsKey(header);
-                    if (containsHeader && !emptyHeader && 
!this.format.getAllowDuplicateHeaderNames()) {
+                    final DuplicateHeaderMode headerMode = 
this.format.getDuplicateHeaderMode();
+                    final boolean duplicatesAllowed = headerMode == 
DuplicateHeaderMode.ALLOW_ALL;
+                    final boolean emptyDuplicatesAllowed = headerMode == 
DuplicateHeaderMode.ALLOW_EMPTY;
+
+                    if (containsHeader && !duplicatesAllowed && !(emptyHeader 
&& emptyDuplicatesAllowed)) {
                         throw new IllegalArgumentException(
                             String.format(
-                                "The header contains a duplicate name: \"%s\" 
in %s. If this is valid then use CSVFormat.withAllowDuplicateHeaderNames().",
+                                "The header contains a duplicate name: \"%s\" 
in %s. If this is valid then use CSVFormat.Builder.setDuplicateHeaderMode().",
                                 header, Arrays.toString(headerRecord)));
                     }
                     if (header != null) {
diff --git a/src/main/java/org/apache/commons/csv/DuplicateHeaderMode.java 
b/src/main/java/org/apache/commons/csv/DuplicateHeaderMode.java
new file mode 100644
index 0000000..e623ada
--- /dev/null
+++ b/src/main/java/org/apache/commons/csv/DuplicateHeaderMode.java
@@ -0,0 +1,42 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.commons.csv;
+
+/**
+ * Determines how duplicate header fields should be handled
+ * if {@link CSVFormat#withHeader(String...)} is not null.
+ *
+ * @since 1.9.0
+ */
+public enum DuplicateHeaderMode {
+
+    /**
+     * Allows all duplicate headers.
+     */
+    ALLOW_ALL,
+
+    /**
+     * Allows duplicate headers only if they're empty strings or null.
+     */
+    ALLOW_EMPTY,
+
+    /**
+     * Disallows duplicate headers entirely.
+     */
+    DISALLOW
+}
diff --git a/src/site/resources/checkstyle/checkstyle-suppressions.xml 
b/src/site/resources/checkstyle/checkstyle-suppressions.xml
index 402525f..abff74c 100644
--- a/src/site/resources/checkstyle/checkstyle-suppressions.xml
+++ b/src/site/resources/checkstyle/checkstyle-suppressions.xml
@@ -19,5 +19,5 @@
     "-//Checkstyle//DTD SuppressionFilter Configuration 1.2//EN"
     "https://checkstyle.org/dtds/suppressions_1_2.dtd";>
 <suppressions>
-  <suppress checks="LineLength" files="[\\/]CSVParser\.java$" lines="511"/>
+  <suppress checks="LineLength" files="[\\/]CSVParser\.java$" lines="515"/>
 </suppressions>
diff --git a/src/test/java/org/apache/commons/csv/CSVFormatTest.java 
b/src/test/java/org/apache/commons/csv/CSVFormatTest.java
index 682764f..f7c32bd 100644
--- a/src/test/java/org/apache/commons/csv/CSVFormatTest.java
+++ b/src/test/java/org/apache/commons/csv/CSVFormatTest.java
@@ -260,6 +260,10 @@ public class CSVFormatTest {
                            final Object a = method.invoke(CSVFormat.DEFAULT, 
QuoteMode.MINIMAL);
                            final Object b = method.invoke(CSVFormat.DEFAULT, 
QuoteMode.ALL);
                            assertNotEquals(name, type, a, b);
+                       } else if 
("org.apache.commons.csv.DuplicateHeaderMode".equals(type)) {
+                           final Object a = method.invoke(CSVFormat.DEFAULT, 
new Object[] {DuplicateHeaderMode.ALLOW_ALL});
+                           final Object b = method.invoke(CSVFormat.DEFAULT, 
new Object[] {DuplicateHeaderMode.DISALLOW});
+                           assertNotEquals(name, type, a, b);
                        } else if ("java.lang.Object[]".equals(type)){
                            final Object a = method.invoke(CSVFormat.DEFAULT, 
new Object[] {new Object[] {null, null}});
                            final Object b = method.invoke(CSVFormat.DEFAULT, 
new Object[] {new Object[] {new Object(), new Object()}});
@@ -1296,6 +1300,15 @@ public class CSVFormatTest {
 
 
     @Test
+    public void testWithEmptyDuplicates() {
+        final CSVFormat formatWithEmptyDuplicates =
+            
CSVFormat.DEFAULT.builder().setDuplicateHeaderMode(DuplicateHeaderMode.ALLOW_EMPTY).build();
+
+        assertEquals(DuplicateHeaderMode.ALLOW_EMPTY, 
formatWithEmptyDuplicates.getDuplicateHeaderMode());
+        assertFalse(formatWithEmptyDuplicates.getAllowDuplicateHeaderNames());
+    }
+
+    @Test
     public void testWithEscapeCRThrowsExceptions() {
         assertThrows(IllegalArgumentException.class, () -> 
CSVFormat.DEFAULT.withEscape(CR));
     }
diff --git a/src/test/java/org/apache/commons/csv/issues/JiraCsv264Test.java 
b/src/test/java/org/apache/commons/csv/issues/JiraCsv264Test.java
new file mode 100644
index 0000000..bbbb262
--- /dev/null
+++ b/src/test/java/org/apache/commons/csv/issues/JiraCsv264Test.java
@@ -0,0 +1,89 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.commons.csv.issues;
+
+import org.apache.commons.csv.CSVFormat;
+import org.apache.commons.csv.DuplicateHeaderMode;
+import org.junit.jupiter.api.Test;
+
+import static org.junit.jupiter.api.Assertions.assertThrows;
+
+import java.io.IOException;
+import java.io.StringReader;
+
+/**
+ * When {@link CSVFormat#withHeader(String...)} is not null; duplicate headers
+ * with empty strings should not be allowed.
+ *
+ * @see <a href="https://issues.apache.org/jira/browse/CSV-264";>Jira Ticker</a>
+ */
+public class JiraCsv264Test {
+
+    private static final String CSV_STRING = "\"\",\"B\",\"\"\n" +
+                                             "\"1\",\"2\",\"3\"\n" +
+                                             "\"4\",\"5\",\"6\"";
+
+    /**
+     * A CSV file with a random gap in the middle.
+     */
+    private static final String CSV_STRING_GAP = 
"\"A\",\"B\",\"\",\"\",\"E\"\n" +
+                                                 
"\"1\",\"2\",\"\",\"\",\"5\"\n" +
+                                                 
"\"6\",\"7\",\"\",\"\",\"10\"";
+
+    @Test
+    public void testJiraCsv264() throws IOException {
+        final CSVFormat csvFormat = CSVFormat.DEFAULT
+            .builder()
+            .setHeader()
+            .setDuplicateHeaderMode(DuplicateHeaderMode.DISALLOW)
+            .setAllowMissingColumnNames(true)
+            .build();
+
+        try (StringReader reader = new StringReader(CSV_STRING)) {
+            assertThrows(IllegalArgumentException.class, () -> 
csvFormat.parse(reader));
+        }
+    }
+
+    @Test
+    public void testJiraCsv264WithGapAllowEmpty() throws IOException {
+        final CSVFormat csvFormat = CSVFormat.DEFAULT
+            .builder()
+            .setHeader()
+            .setDuplicateHeaderMode(DuplicateHeaderMode.ALLOW_EMPTY)
+            .setAllowMissingColumnNames(true)
+            .build();
+
+        try (StringReader reader = new StringReader(CSV_STRING_GAP)) {
+            csvFormat.parse(reader);
+        }
+    }
+
+    @Test
+    public void testJiraCsv264WithGapDisallow() throws IOException {
+        final CSVFormat csvFormat = CSVFormat.DEFAULT
+            .builder()
+            .setHeader()
+            .setDuplicateHeaderMode(DuplicateHeaderMode.DISALLOW)
+            .setAllowMissingColumnNames(true)
+            .build();
+
+        try (StringReader reader = new StringReader(CSV_STRING_GAP)) {
+            assertThrows(IllegalArgumentException.class, () -> 
csvFormat.parse(reader));
+        }
+    }
+}

Reply via email to