This is an automated email from the ASF dual-hosted git repository. ggregory pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/commons-csv.git
The following commit(s) were added to refs/heads/master by this push: new e100d623 Don't use deprecated code in examples new b63a788e Merge branch 'master' of https://github.com/apache/commons-csv.git e100d623 is described below commit e100d623682c411e242bea8fe0b3176fbd0ddcc0 Author: Gary D. Gregory <garydgreg...@gmail.com> AuthorDate: Sat Jan 4 08:46:52 2025 -0500 Don't use deprecated code in examples --- src/site/xdoc/user-guide.xml | 34 ++++---- .../java/org/apache/commons/csv/UserGuideTest.java | 94 ++++++++++++++++++++++ src/test/java/org/apache/commons/csv/Utils.java | 21 ++++- 3 files changed, 133 insertions(+), 16 deletions(-) diff --git a/src/site/xdoc/user-guide.xml b/src/site/xdoc/user-guide.xml index 3a433fac..5995879b 100644 --- a/src/site/xdoc/user-guide.xml +++ b/src/site/xdoc/user-guide.xml @@ -70,39 +70,45 @@ for (CSVRecord record : records) { <a href="https://commons.apache.org/proper/commons-io/">Apache Commons IO</a> for example: </p> - <source>final URL url = ...; -try (final Reader reader = new InputStreamReader(new BOMInputStream(url.openStream()), "UTF-8"); - final CSVParser parser = CSVFormat.EXCEL.builder() - .setHeader() - .build() - .parse(reader)) { + <source> +try (final Reader reader = new InputStreamReader(BOMInputStream.builder() + .setPath(path) + .get(), "UTF-8"); + final CSVParser parser = CSVFormat.EXCEL.builder() + .setHeader() + .get() + .parse(reader)) { for (final CSVRecord record : parser) { - final String string = record.get("SomeColumn"); - ... + final String string = record.get("ColumnA"); + // ... } } </source> <p> You might find it handy to create something like this: </p> - <source>/** + <source> +/** * Creates a reader capable of handling BOMs. + * + * @param path The path to read. + * @return a new InputStreamReader for UTF-8 bytes. + * @throws IOException if an I/O error occurs. */ -public InputStreamReader newReader(final InputStream inputStream) { - return new InputStreamReader(new BOMInputStream(inputStream), StandardCharsets.UTF_8); +public InputStreamReader newReader(final Path path) throws IOException { + return new InputStreamReader(BOMInputStream.builder() + .setPath(path) + .get(), StandardCharsets.UTF_8); } </source> </subsection> </section> - <section name="Working with headers"> - Apache Commons CSV provides several ways to access record values. The simplest way is to access values by their index in the record. However, columns in CSV files often have a name, for example: ID, CustomerNo, Birthday, etc. The CSVFormat class provides an API for specifying these <i>header</i> names and CSVRecord on the other hand has methods to access values by their corresponding header name. - <subsection name="Accessing column values by index"> To access a record value by index, no special configuration of the CSVFormat is necessary: <source>Reader in = new FileReader("path/to/file.csv"); diff --git a/src/test/java/org/apache/commons/csv/UserGuideTest.java b/src/test/java/org/apache/commons/csv/UserGuideTest.java new file mode 100644 index 00000000..27001b1a --- /dev/null +++ b/src/test/java/org/apache/commons/csv/UserGuideTest.java @@ -0,0 +1,94 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.csv; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +import java.io.IOException; +import java.io.InputStreamReader; +import java.io.Reader; +import java.io.UnsupportedEncodingException; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; + +import org.apache.commons.io.input.BOMInputStream; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; + +/** + * Tests for the user guide. + */ +public class UserGuideTest { + + @TempDir + Path tempDir; + + /** + * Creates a reader capable of handling BOMs. + * + * @param path The path to read. + * @return a new InputStreamReader for UTF-8 bytes. + * @throws IOException if an I/O error occurs. + */ + public InputStreamReader newReader(final Path path) throws IOException { + return new InputStreamReader(BOMInputStream.builder() + .setPath(path) + .get(), StandardCharsets.UTF_8); + } + + @Test + public void testBomFull() throws UnsupportedEncodingException, IOException { + final Path path = tempDir.resolve("test1.csv"); + Files.copy(Utils.createUtf8Input("ColumnA, ColumnB, ColumnC\r\nA, B, C\r\n".getBytes(StandardCharsets.UTF_8), true), path); + // @formatter:off + try (final Reader reader = new InputStreamReader(BOMInputStream.builder() + .setPath(path) + .get(), "UTF-8"); + final CSVParser parser = CSVFormat.EXCEL.builder() + .setHeader() + .get() + .parse(reader)) { + // @formatter:off + for (final CSVRecord record : parser) { + final String string = record.get("ColumnA"); + assertEquals("A", string); + } + } + } + + @Test + public void testBomUtil() throws UnsupportedEncodingException, IOException { + final Path path = tempDir.resolve("test2.csv"); + Files.copy(Utils.createUtf8Input("ColumnA, ColumnB, ColumnC\r\nA, B, C\r\n".getBytes(StandardCharsets.UTF_8), true), path); + try (final Reader reader = newReader(path); + // @formatter:off + final CSVParser parser = CSVFormat.EXCEL.builder() + .setHeader() + .get() + .parse(reader)) { + // @formatter:off + for (final CSVRecord record : parser) { + final String string = record.get("ColumnA"); + assertEquals("A", string); + } + } + } + +} diff --git a/src/test/java/org/apache/commons/csv/Utils.java b/src/test/java/org/apache/commons/csv/Utils.java index ca1aa659..c99b77ac 100644 --- a/src/test/java/org/apache/commons/csv/Utils.java +++ b/src/test/java/org/apache/commons/csv/Utils.java @@ -22,6 +22,8 @@ package org.apache.commons.csv; import static org.junit.jupiter.api.Assertions.assertArrayEquals; import static org.junit.jupiter.api.Assertions.assertEquals; +import java.io.ByteArrayInputStream; +import java.io.InputStream; import java.util.List; /** @@ -32,9 +34,9 @@ final class Utils { /** * Checks if the 2d array has the same contents as the list of records. * - * @param message the message to be displayed + * @param message the message to be displayed * @param expected the 2d array of expected results - * @param actual the List of {@link CSVRecord} entries, each containing an array of values + * @param actual the List of {@link CSVRecord} entries, each containing an array of values */ public static void compare(final String message, final String[][] expected, final List<CSVRecord> actual) { final int expectedLength = expected.length; @@ -44,6 +46,21 @@ final class Utils { } } + /** + * Creates an input stream, with or without a BOM. + */ + static InputStream createUtf8Input(final byte[] baseData, final boolean addBom) { + byte[] data = baseData; + if (addBom) { + data = new byte[baseData.length + 3]; + data[0] = (byte) 0xEF; + data[1] = (byte) 0xBB; + data[2] = (byte) 0xBF; + System.arraycopy(baseData, 0, data, 3, baseData.length); + } + return new ByteArrayInputStream(data); + } + private Utils() { } }