This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/main by this push:
     new 240cbf4f83 Move examples in arrow-csv to docstrings, polish up docs 
(#9001)
240cbf4f83 is described below

commit 240cbf4f838387445b0209db4b14dbb277b05a12
Author: Andrew Lamb <[email protected]>
AuthorDate: Fri Dec 19 12:52:31 2025 -0500

    Move examples in arrow-csv to docstrings, polish up docs (#9001)
    
    # Which issue does this PR close?
    
    
    # Rationale for this change
    
    while reviewing @xanderbailey's PR in
    https://github.com/apache/arrow-rs/pull/8960, I found that there are
    examples for arrow-csv and they are hard to find. Also each example add
    extra binaries and thus slows down CI and tests. For example the
    `whitespace_handling` example makes a new 2.9MB binary:
    
    ```shell
    cargo run -p arrow-csv --example whitespace_handling
    ...
    du -s -h target/debug/examples/whitespace_handling
    2.9M    target/debug/examples/whitespace_handling
    ```
    
    Let's consolidate the examples to make them easier to find
    
    
    # What changes are included in this PR?
    
    1. Consolidate the examples
    2. Improver other csv docs
    
    # Are these changes tested?
    
    We typically require tests for all PRs in order to:
    1. Prevent the code from being accidentally broken by subsequent changes
    3. Serve as another way to document the expected behavior of the code
    
    
    # Are there any user-facing changes?
    
    Docs only, no functional changes
---
 arrow-csv/examples/README.md              | 21 --------
 arrow-csv/examples/csv_calculation.rs     | 56 --------------------
 arrow-csv/examples/whitespace_handling.rs | 86 -------------------------------
 arrow-csv/src/lib.rs                      |  4 +-
 arrow-csv/src/reader/mod.rs               | 58 ++++++++++++++++++---
 arrow-csv/src/writer.rs                   | 47 ++++++-----------
 6 files changed, 71 insertions(+), 201 deletions(-)

diff --git a/arrow-csv/examples/README.md b/arrow-csv/examples/README.md
deleted file mode 100644
index 340413e76d..0000000000
--- a/arrow-csv/examples/README.md
+++ /dev/null
@@ -1,21 +0,0 @@
-<!---
-  Licensed to the Apache Software Foundation (ASF) under one
-  or more contributor license agreements.  See the NOTICE file
-  distributed with this work for additional information
-  regarding copyright ownership.  The ASF licenses this file
-  to you under the Apache License, Version 2.0 (the
-  "License"); you may not use this file except in compliance
-  with the License.  You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing,
-  software distributed under the License is distributed on an
-  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-  KIND, either express or implied.  See the License for the
-  specific language governing permissions and limitations
-  under the License.
--->
-
-# Examples
-- [`csv_calculation.rs`](csv_calculation.rs): performs a simple calculation 
using the CSV reader
\ No newline at end of file
diff --git a/arrow-csv/examples/csv_calculation.rs 
b/arrow-csv/examples/csv_calculation.rs
deleted file mode 100644
index 6ce963e2b0..0000000000
--- a/arrow-csv/examples/csv_calculation.rs
+++ /dev/null
@@ -1,56 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use arrow_array::cast::AsArray;
-use arrow_array::types::Int16Type;
-use arrow_csv::ReaderBuilder;
-
-use arrow_schema::{DataType, Field, Schema};
-use std::fs::File;
-use std::sync::Arc;
-
-fn main() {
-    // read csv from file
-    let file = File::open("arrow-csv/test/data/example.csv").unwrap();
-    let csv_schema = Schema::new(vec![
-        Field::new("c1", DataType::Int16, true),
-        Field::new("c2", DataType::Float32, true),
-        Field::new("c3", DataType::Utf8, true),
-        Field::new("c4", DataType::Boolean, true),
-    ]);
-    let mut reader = ReaderBuilder::new(Arc::new(csv_schema))
-        .with_header(true)
-        .build(file)
-        .unwrap();
-
-    match reader.next() {
-        Some(r) => match r {
-            Ok(r) => {
-                // get the column(0) max value
-                let col = r.column(0).as_primitive::<Int16Type>();
-                let max = col.iter().max().flatten();
-                println!("max value column(0): {max:?}")
-            }
-            Err(e) => {
-                println!("{e:?}");
-            }
-        },
-        None => {
-            println!("csv is empty");
-        }
-    }
-}
diff --git a/arrow-csv/examples/whitespace_handling.rs 
b/arrow-csv/examples/whitespace_handling.rs
deleted file mode 100644
index 77bb1a8a8c..0000000000
--- a/arrow-csv/examples/whitespace_handling.rs
+++ /dev/null
@@ -1,86 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use arrow_array::*;
-use arrow_csv::WriterBuilder;
-use arrow_schema::*;
-use std::sync::Arc;
-
-fn main() {
-    // Create a sample schema with string columns
-    let schema = Schema::new(vec![
-        Field::new("name", DataType::Utf8, false),
-        Field::new("city", DataType::Utf8, false),
-        Field::new("country", DataType::Utf8, false),
-    ]);
-
-    // Create sample data with leading and trailing whitespace
-    let name = StringArray::from(vec![
-        "  John Doe  ",
-        "  Jane Smith",
-        "Bob Johnson  ",
-        "Alice Williams",
-    ]);
-    let city = StringArray::from(vec![
-        "  New York  ",
-        "Los Angeles  ",
-        "  Chicago",
-        "Houston",
-    ]);
-    let country = StringArray::from(vec!["  USA  ", "  USA  ", "  USA  ", "  
USA  "]);
-
-    let batch = RecordBatch::try_new(
-        Arc::new(schema),
-        vec![Arc::new(name), Arc::new(city), Arc::new(country)],
-    )
-    .unwrap();
-
-    println!("Original CSV (with whitespace):");
-    let mut buf = Vec::new();
-    let mut writer = WriterBuilder::new().build(&mut buf);
-    writer.write(&batch).unwrap();
-    drop(writer);
-    println!("{}", String::from_utf8(buf).unwrap());
-
-    println!("\nCSV with ignore_leading_whitespace:");
-    let mut buf = Vec::new();
-    let mut writer = WriterBuilder::new()
-        .with_ignore_leading_whitespace(true)
-        .build(&mut buf);
-    writer.write(&batch).unwrap();
-    drop(writer);
-    println!("{}", String::from_utf8(buf).unwrap());
-
-    println!("\nCSV with ignore_trailing_whitespace:");
-    let mut buf = Vec::new();
-    let mut writer = WriterBuilder::new()
-        .with_ignore_trailing_whitespace(true)
-        .build(&mut buf);
-    writer.write(&batch).unwrap();
-    drop(writer);
-    println!("{}", String::from_utf8(buf).unwrap());
-
-    println!("\nCSV with both ignore_leading_whitespace and 
ignore_trailing_whitespace:");
-    let mut buf = Vec::new();
-    let mut writer = WriterBuilder::new()
-        .with_ignore_leading_whitespace(true)
-        .with_ignore_trailing_whitespace(true)
-        .build(&mut buf);
-    writer.write(&batch).unwrap();
-    drop(writer);
-    println!("{}", String::from_utf8(buf).unwrap());
-}
diff --git a/arrow-csv/src/lib.rs b/arrow-csv/src/lib.rs
index 54c4fc03f5..4c4b040981 100644
--- a/arrow-csv/src/lib.rs
+++ b/arrow-csv/src/lib.rs
@@ -15,7 +15,9 @@
 // specific language governing permissions and limitations
 // under the License.
 
-//! Transfer data between the Arrow memory format and CSV (comma-separated 
values).
+//! Transfer data between the [Apache Arrow] memory format and CSV 
(comma-separated values).
+//!
+//! [Apache Arrow]: https://arrow.apache.org/
 
 #![doc(
     html_logo_url = 
"https://arrow.apache.org/img/arrow-logo_chevrons_black-txt_white-bg.svg";,
diff --git a/arrow-csv/src/reader/mod.rs b/arrow-csv/src/reader/mod.rs
index 0a72b57e85..e26072fea9 100644
--- a/arrow-csv/src/reader/mod.rs
+++ b/arrow-csv/src/reader/mod.rs
@@ -15,7 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-//! CSV Reader
+//! CSV Reading: [`Reader`] and [`ReaderBuilder`]
 //!
 //! # Basic Usage
 //!
@@ -42,6 +42,46 @@
 //! let batch = csv.next().unwrap().unwrap();
 //! ```
 //!
+//! # Example: Numeric calculations on CSV
+//! This code finds the maximum value in column 0 of a CSV file containing
+//! ```csv
+//! c1,c2,c3,c4
+//! 1,1.1,"hong kong",true
+//! 3,323.12,"XiAn",false
+//! 10,131323.12,"cheng du",false
+//! ```
+//!
+//! ```
+//! # use arrow_array::cast::AsArray;
+//! # use arrow_array::types::Int16Type;
+//! # use arrow_csv::ReaderBuilder;
+//! # use arrow_schema::{DataType, Field, Schema};
+//! # use std::fs::File;
+//! # use std::sync::Arc;
+//! // Open the example file
+//! let file = File::open("test/data/example.csv").unwrap();
+//! let csv_schema = Schema::new(vec![
+//!     Field::new("c1", DataType::Int16, true),
+//!     Field::new("c2", DataType::Float32, true),
+//!     Field::new("c3", DataType::Utf8, true),
+//!     Field::new("c4", DataType::Boolean, true),
+//! ]);
+//! let mut reader = ReaderBuilder::new(Arc::new(csv_schema))
+//!     .with_header(true)
+//!     .build(file)
+//!     .unwrap();
+//! // find the maximum value in column 0 across all batches
+//! let mut max_c0 = 0;
+//! while let Some(r) = reader.next() {
+//!   let r = r.unwrap(); // handle error
+//!   // get the max value in column(0) for this batch
+//!   let col = r.column(0).as_primitive::<Int16Type>();
+//!   let batch_max = col.iter().max().flatten().unwrap_or_default();
+//!   max_c0 = max_c0.max(batch_max);
+//! }
+//! assert_eq!(max_c0, 10);
+//!```
+//!
 //! # Async Usage
 //!
 //! The lower-level [`Decoder`] can be integrated with various forms of async 
data streams,
@@ -441,13 +481,18 @@ pub fn infer_schema_from_files(
 type Bounds = Option<(usize, usize)>;
 
 /// CSV file reader using [`std::io::BufReader`]
+///
+/// See [`ReaderBuilder`] to construct a CSV reader with options and  the
+/// [module-level documentation](crate::reader) for more details and examples
 pub type Reader<R> = BufReader<StdBufReader<R>>;
 
-/// CSV file reader
+/// CSV file reader implementation. See [`Reader`] for usage
+///
+/// Despite having the same name as [`std::io::BufReader`, this structure does
+/// not buffer reads itself
 pub struct BufReader<R> {
     /// File reader
     reader: R,
-
     /// The decoder
     decoder: Decoder,
 }
@@ -1053,7 +1098,7 @@ fn build_boolean_array(
         .map(|e| Arc::new(e) as ArrayRef)
 }
 
-/// CSV file reader builder
+/// Builder for CSV [`Reader`]s
 #[derive(Debug)]
 pub struct ReaderBuilder {
     /// Schema of the CSV file
@@ -1071,9 +1116,10 @@ pub struct ReaderBuilder {
 }
 
 impl ReaderBuilder {
-    /// Create a new builder for configuring CSV parsing options.
+    /// Create a new builder for configuring [`Reader`] CSV parsing options.
     ///
-    /// To convert a builder into a reader, call `ReaderBuilder::build`
+    /// To convert a builder into a reader, call [`ReaderBuilder::build`]. See
+    /// the [module-level documentation](crate::reader) for more details and 
examples.
     ///
     /// # Example
     ///
diff --git a/arrow-csv/src/writer.rs b/arrow-csv/src/writer.rs
index fcf30a80dc..c38d1cdec3 100644
--- a/arrow-csv/src/writer.rs
+++ b/arrow-csv/src/writer.rs
@@ -15,13 +15,12 @@
 // specific language governing permissions and limitations
 // under the License.
 
-//! CSV Writer
+//! CSV Writing: [`Writer`] and [`WriterBuilder`]
 //!
 //! This CSV writer allows Arrow data (in record batches) to be written as CSV 
files.
 //! The writer does not support writing `ListArray` and `StructArray`.
 //!
 //! # Example
-//!
 //! ```
 //! # use arrow_array::*;
 //! # use arrow_array::types::*;
@@ -75,14 +74,13 @@
 //! - `DataType::LargeUtf8`
 //! - `DataType::Utf8View`
 //!
-//! ## Example with whitespace handling
+//! ## Example: Use [`WriterBuilder`] to control whitespace handling
 //!
 //! ```
 //! # use arrow_array::*;
 //! # use arrow_csv::WriterBuilder;
 //! # use arrow_schema::*;
 //! # use std::sync::Arc;
-//!
 //! let schema = Schema::new(vec![
 //!     Field::new("name", DataType::Utf8, false),
 //!     Field::new("comment", DataType::Utf8, false),
@@ -105,17 +103,6 @@
 //! )
 //! .unwrap();
 //!
-//! // Default behavior (no trimming)
-//! let mut output = Vec::new();
-//! WriterBuilder::new()
-//!     .build(&mut output)
-//!     .write(&batch)
-//!     .unwrap();
-//! assert_eq!(
-//!     String::from_utf8(output).unwrap(),
-//!     "name,comment\n  Alice  ,  Great job!  \nBob,Well done\n  
Charlie,Excellent  \n"
-//! );
-//!
 //! // Trim both leading and trailing whitespace
 //! let mut output = Vec::new();
 //! WriterBuilder::new()
@@ -126,19 +113,11 @@
 //!     .unwrap();
 //! assert_eq!(
 //!     String::from_utf8(output).unwrap(),
-//!     "name,comment\nAlice,Great job!\nBob,Well done\nCharlie,Excellent\n"
-//! );
-//!
-//! // Trim only leading whitespace
-//! let mut output = Vec::new();
-//! WriterBuilder::new()
-//!     .with_ignore_leading_whitespace(true)
-//!     .build(&mut output)
-//!     .write(&batch)
-//!     .unwrap();
-//! assert_eq!(
-//!     String::from_utf8(output).unwrap(),
-//!     "name,comment\nAlice  ,Great job!  \nBob,Well done\nCharlie,Excellent  
\n"
+//!     "\
+//! name,comment\n\
+//! Alice,Great job!\n\
+//! Bob,Well done\n\
+//! Charlie,Excellent\n"
 //! );
 //! ```
 //!
@@ -220,6 +199,8 @@ const DEFAULT_NULL_VALUE: &str = "";
 pub use csv::QuoteStyle;
 
 /// A CSV writer
+///
+/// See the [module documentation](crate::writer) for examples.
 #[derive(Debug)]
 pub struct Writer<W: Write> {
     /// The object to write to
@@ -248,12 +229,15 @@ pub struct Writer<W: Write> {
 
 impl<W: Write> Writer<W> {
     /// Create a new CsvWriter from a writable object, with default options
+    ///
+    /// See [`WriterBuilder`] for configure options, and the [module
+    /// documentation](crate::writer) for examples.
     pub fn new(writer: W) -> Self {
         let delimiter = b',';
         WriterBuilder::new().with_delimiter(delimiter).build(writer)
     }
 
-    /// Write a RecordBatch to a writable object
+    /// Write a RecordBatch to the underlying writer
     pub fn write(&mut self, batch: &RecordBatch) -> Result<(), ArrowError> {
         let num_columns = batch.num_columns();
         if self.beginning {
@@ -418,9 +402,10 @@ impl Default for WriterBuilder {
 }
 
 impl WriterBuilder {
-    /// Create a new builder for configuring CSV writing options.
+    /// Create a new builder for configuring CSV [`Writer`] options.
     ///
-    /// To convert a builder into a writer, call `WriterBuilder::build`
+    /// To convert a builder into a writer, call [`WriterBuilder::build`]. See
+    /// the [module documentation](crate::writer) for more examples.
     ///
     /// # Example
     ///

Reply via email to