This is an automated email from the ASF dual-hosted git repository.

kou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new 245f067673 GH-48680: [GLib][Ruby] Add CSVWriter (#48681)
245f067673 is described below

commit 245f0676739664be13e5507480cc9d4f0c025cb3
Author: Sten Larsson <[email protected]>
AuthorDate: Thu Jan 1 04:27:03 2026 +0100

    GH-48680: [GLib][Ruby] Add CSVWriter (#48681)
    
    ### Rationale for this change
    
    Using Arrow for writing CSVs could potentially give a lot better 
performance than using Ruby, but the CSV writer is not available.
    
    ### What changes are included in this PR?
    
    This adds a `CSVWriter` and options class to the GLib wrapper.
    
    ### Are these changes tested?
    
    Yes, with Ruby unit tests.
    
    ### Are there any user-facing changes?
    
    Yes, new classes.
    * GitHub Issue: #48680
    
    Lead-authored-by: Sten Larsson <[email protected]>
    Co-authored-by: Sutou Kouhei <[email protected]>
    Signed-off-by: Sutou Kouhei <[email protected]>
---
 c_glib/arrow-glib/writer.cpp                  | 330 ++++++++++++++++++++++++++
 c_glib/arrow-glib/writer.h                    |  45 ++++
 c_glib/arrow-glib/writer.hpp                  |  10 +
 c_glib/test/test-csv-writer.rb                | 190 +++++++++++++++
 ruby/red-arrow/lib/arrow/csv-write-options.rb |  53 +++++
 ruby/red-arrow/lib/arrow/libraries.rb         |   1 +
 ruby/red-arrow/test/test-csv-writer.rb        |  53 +++++
 7 files changed, 682 insertions(+)

diff --git a/c_glib/arrow-glib/writer.cpp b/c_glib/arrow-glib/writer.cpp
index 08af1c7976..4228b60910 100644
--- a/c_glib/arrow-glib/writer.cpp
+++ b/c_glib/arrow-glib/writer.cpp
@@ -18,6 +18,7 @@
  */
 
 #include <arrow-glib/array.hpp>
+#include <arrow-glib/enums.h>
 #include <arrow-glib/error.hpp>
 #include <arrow-glib/record-batch.hpp>
 #include <arrow-glib/schema.hpp>
@@ -300,6 +301,320 @@ garrow_record_batch_file_writer_new(GArrowOutputStream 
*sink,
   }
 }
 
+struct GArrowCSVWriteOptionsPrivate
+{
+  arrow::csv::WriteOptions write_options;
+};
+
+enum {
+  PROP_CSV_WRITE_OPTIONS_INCLUDE_HEADER = 1,
+  PROP_CSV_WRITE_OPTIONS_BATCH_SIZE,
+  PROP_CSV_WRITE_OPTIONS_DELIMITER,
+  PROP_CSV_WRITE_OPTIONS_NULL_STRING,
+  PROP_CSV_WRITE_OPTIONS_EOL,
+  PROP_CSV_WRITE_OPTIONS_QUOTING_STYLE,
+  PROP_CSV_WRITE_OPTIONS_QUOTING_HEADER,
+};
+
+G_DEFINE_TYPE_WITH_PRIVATE(GArrowCSVWriteOptions, garrow_csv_write_options, 
G_TYPE_OBJECT)
+
+#define GARROW_CSV_WRITE_OPTIONS_GET_PRIVATE(object)                           
          \
+  static_cast<GArrowCSVWriteOptionsPrivate *>(                                 
          \
+    
garrow_csv_write_options_get_instance_private(GARROW_CSV_WRITE_OPTIONS(object)))
+
+static void
+garrow_csv_write_options_finalize(GObject *object)
+{
+  auto priv = GARROW_CSV_WRITE_OPTIONS_GET_PRIVATE(object);
+
+  priv->write_options.~WriteOptions();
+
+  G_OBJECT_CLASS(garrow_csv_write_options_parent_class)->finalize(object);
+}
+
+static void
+garrow_csv_write_options_set_property(GObject *object,
+                                      guint prop_id,
+                                      const GValue *value,
+                                      GParamSpec *pspec)
+{
+  auto priv = GARROW_CSV_WRITE_OPTIONS_GET_PRIVATE(object);
+
+  switch (prop_id) {
+  case PROP_CSV_WRITE_OPTIONS_INCLUDE_HEADER:
+    priv->write_options.include_header = g_value_get_boolean(value);
+    break;
+  case PROP_CSV_WRITE_OPTIONS_BATCH_SIZE:
+    priv->write_options.batch_size = g_value_get_int(value);
+    break;
+  case PROP_CSV_WRITE_OPTIONS_DELIMITER:
+    priv->write_options.delimiter = g_value_get_schar(value);
+    break;
+  case PROP_CSV_WRITE_OPTIONS_NULL_STRING:
+    priv->write_options.null_string = g_value_get_string(value);
+    break;
+  case PROP_CSV_WRITE_OPTIONS_EOL:
+    priv->write_options.eol = g_value_get_string(value);
+    break;
+  case PROP_CSV_WRITE_OPTIONS_QUOTING_STYLE:
+    priv->write_options.quoting_style =
+      static_cast<arrow::csv::QuotingStyle>(g_value_get_enum(value));
+    break;
+  case PROP_CSV_WRITE_OPTIONS_QUOTING_HEADER:
+    priv->write_options.quoting_header =
+      static_cast<arrow::csv::QuotingStyle>(g_value_get_enum(value));
+    break;
+  default:
+    G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
+    break;
+  }
+}
+
+static void
+garrow_csv_write_options_get_property(GObject *object,
+                                      guint prop_id,
+                                      GValue *value,
+                                      GParamSpec *pspec)
+{
+  auto priv = GARROW_CSV_WRITE_OPTIONS_GET_PRIVATE(object);
+
+  switch (prop_id) {
+  case PROP_CSV_WRITE_OPTIONS_INCLUDE_HEADER:
+    g_value_set_boolean(value, priv->write_options.include_header);
+    break;
+  case PROP_CSV_WRITE_OPTIONS_BATCH_SIZE:
+    g_value_set_int(value, priv->write_options.batch_size);
+    break;
+  case PROP_CSV_WRITE_OPTIONS_DELIMITER:
+    g_value_set_schar(value, priv->write_options.delimiter);
+    break;
+  case PROP_CSV_WRITE_OPTIONS_NULL_STRING:
+    g_value_set_string(value, priv->write_options.null_string.c_str());
+    break;
+  case PROP_CSV_WRITE_OPTIONS_EOL:
+    g_value_set_string(value, priv->write_options.eol.c_str());
+    break;
+  case PROP_CSV_WRITE_OPTIONS_QUOTING_STYLE:
+    g_value_set_enum(
+      value,
+      static_cast<GArrowCSVQuotingStyle>(priv->write_options.quoting_style));
+    break;
+  case PROP_CSV_WRITE_OPTIONS_QUOTING_HEADER:
+    g_value_set_enum(
+      value,
+      static_cast<GArrowCSVQuotingStyle>(priv->write_options.quoting_header));
+    break;
+  default:
+    G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
+    break;
+  }
+}
+
+static void
+garrow_csv_write_options_init(GArrowCSVWriteOptions *object)
+{
+  auto priv = GARROW_CSV_WRITE_OPTIONS_GET_PRIVATE(object);
+  new (&priv->write_options) arrow::csv::WriteOptions;
+  priv->write_options = arrow::csv::WriteOptions::Defaults();
+}
+
+static void
+garrow_csv_write_options_class_init(GArrowCSVWriteOptionsClass *klass)
+{
+  GParamSpec *spec;
+
+  auto gobject_class = G_OBJECT_CLASS(klass);
+
+  gobject_class->finalize = garrow_csv_write_options_finalize;
+  gobject_class->set_property = garrow_csv_write_options_set_property;
+  gobject_class->get_property = garrow_csv_write_options_get_property;
+
+  auto write_options = arrow::csv::WriteOptions::Defaults();
+
+  /**
+   * GArrowCSVWriteOptions:include-header:
+   *
+   * Whether to write an initial header line with column names.
+   *
+   * Since: 23.0.0
+   */
+  spec = g_param_spec_boolean("include-header",
+                              "Include header",
+                              "Whether to write an initial header line with 
column names",
+                              write_options.include_header,
+                              static_cast<GParamFlags>(G_PARAM_READWRITE));
+  g_object_class_install_property(gobject_class,
+                                  PROP_CSV_WRITE_OPTIONS_INCLUDE_HEADER,
+                                  spec);
+
+  /**
+   * GArrowCSVWriteOptions:batch-size:
+   *
+   * Maximum number of rows processed at a time.
+   *
+   * The CSV writer converts and writes data in batches of N rows. This number 
can impact
+   * performance.
+   *
+   * Since: 23.0.0
+   */
+  spec = g_param_spec_int("batch-size",
+                          "Batch size",
+                          "Maximum number of rows processed at a time",
+                          1,
+                          G_MAXINT32,
+                          write_options.batch_size,
+                          static_cast<GParamFlags>(G_PARAM_READWRITE));
+  g_object_class_install_property(gobject_class, 
PROP_CSV_WRITE_OPTIONS_BATCH_SIZE, spec);
+
+  /**
+   * GArrowCSVWriteOptions:delimiter:
+   *
+   * Field delimiter.
+   *
+   * Since: 23.0.0
+   */
+  spec = g_param_spec_char("delimiter",
+                           "Delimiter",
+                           "Field delimiter",
+                           0,
+                           G_MAXINT8,
+                           write_options.delimiter,
+                           static_cast<GParamFlags>(G_PARAM_READWRITE));
+  g_object_class_install_property(gobject_class, 
PROP_CSV_WRITE_OPTIONS_DELIMITER, spec);
+
+  /**
+   * GArrowCSVWriteOptions:null-string:
+   *
+   * The string to write for null values. Quotes are not allowed in this 
string.
+   *
+   * Since: 23.0.0
+   */
+  spec = g_param_spec_string("null-string",
+                             "Null string",
+                             "The string to write for null values",
+                             write_options.null_string.c_str(),
+                             static_cast<GParamFlags>(G_PARAM_READWRITE));
+  g_object_class_install_property(gobject_class,
+                                  PROP_CSV_WRITE_OPTIONS_NULL_STRING,
+                                  spec);
+
+  /**
+   * GArrowCSVWriteOptions:eol:
+   *
+   * The end of line character to use for ending rows.
+   *
+   * Since: 23.0.0
+   */
+  spec = g_param_spec_string("eol",
+                             "EOL",
+                             "The end of line character to use for ending 
rows",
+                             write_options.eol.c_str(),
+                             static_cast<GParamFlags>(G_PARAM_READWRITE));
+  g_object_class_install_property(gobject_class, PROP_CSV_WRITE_OPTIONS_EOL, 
spec);
+
+  /**
+   * GArrowCSVWriteOptions:quoting-style:
+   *
+   * Quoting style.
+   *
+   * Since: 23.0.0
+   */
+  spec =
+    g_param_spec_enum("quoting-style",
+                      "Quoting style",
+                      "Quoting style",
+                      GARROW_TYPE_CSV_QUOTING_STYLE,
+                      
static_cast<GArrowCSVQuotingStyle>(write_options.quoting_style),
+                      static_cast<GParamFlags>(G_PARAM_READWRITE));
+  g_object_class_install_property(gobject_class,
+                                  PROP_CSV_WRITE_OPTIONS_QUOTING_STYLE,
+                                  spec);
+
+  /**
+   * GArrowCSVWriteOptions:quoting-header:
+   *
+   * Quoting style of header.
+   *
+   * Note that #GARROW_CSV_QUOTING_STYLE_NEEDED and 
#GARROW_CSV_QUOTING_STYLE_ALL_VALID
+   * have the same effect of quoting all column names.
+   *
+   * Since: 23.0.0
+   */
+  spec =
+    g_param_spec_enum("quoting-header",
+                      "Quoting header",
+                      "Quoting style of header",
+                      GARROW_TYPE_CSV_QUOTING_STYLE,
+                      
static_cast<GArrowCSVQuotingStyle>(write_options.quoting_header),
+                      static_cast<GParamFlags>(G_PARAM_READWRITE));
+  g_object_class_install_property(gobject_class,
+                                  PROP_CSV_WRITE_OPTIONS_QUOTING_HEADER,
+                                  spec);
+}
+
+/**
+ * garrow_csv_write_options_new:
+ *
+ * Returns: A newly created #GArrowCSVWriteOptions.
+ *
+ * Since: 23.0.0
+ */
+GArrowCSVWriteOptions *
+garrow_csv_write_options_new(void)
+{
+  auto csv_write_options = g_object_new(GARROW_TYPE_CSV_WRITE_OPTIONS, 
nullptr);
+  return GARROW_CSV_WRITE_OPTIONS(csv_write_options);
+}
+
+G_DEFINE_TYPE(GArrowCSVWriter, garrow_csv_writer, 
GARROW_TYPE_RECORD_BATCH_WRITER);
+
+static void
+garrow_csv_writer_init(GArrowCSVWriter *object)
+{
+}
+
+static void
+garrow_csv_writer_class_init(GArrowCSVWriterClass *klass)
+{
+}
+
+/**
+ * garrow_csv_writer_new:
+ * @sink: The output of the writer.
+ * @schema: The schema of the writer.
+ * @options: (nullable): Options for serialization.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Returns: (nullable): A newly created #GArrowCSVWriter
+ *   or %NULL on error.
+ *
+ * Since: 23.0.0
+ */
+GArrowCSVWriter *
+garrow_csv_writer_new(GArrowOutputStream *sink,
+                      GArrowSchema *schema,
+                      GArrowCSVWriteOptions *options,
+                      GError **error)
+{
+  auto arrow_sink = garrow_output_stream_get_raw(sink);
+  auto arrow_schema = garrow_schema_get_raw(schema);
+  arrow::csv::WriteOptions arrow_write_options;
+  if (options) {
+    auto arrow_write_options_ptr = garrow_csv_write_options_get_raw(options);
+    arrow_write_options = *arrow_write_options_ptr;
+  } else {
+    arrow_write_options = arrow::csv::WriteOptions::Defaults();
+  }
+  auto arrow_writer_result =
+    arrow::csv::MakeCSVWriter(arrow_sink, arrow_schema, arrow_write_options);
+  if (garrow::check(error, arrow_writer_result, "[csv-writer][new]")) {
+    auto arrow_writer = *arrow_writer_result;
+    return garrow_csv_writer_new_raw(&arrow_writer);
+  } else {
+    return nullptr;
+  }
+}
+
 G_END_DECLS
 
 GArrowRecordBatchWriter *
@@ -343,3 +658,18 @@ garrow_record_batch_file_writer_new_raw(
                                                  NULL));
   return writer;
 }
+
+GArrowCSVWriter *
+garrow_csv_writer_new_raw(std::shared_ptr<arrow::ipc::RecordBatchWriter> 
*arrow_writer)
+{
+  auto writer = GARROW_CSV_WRITER(
+    g_object_new(GARROW_TYPE_CSV_WRITER, "record-batch-writer", arrow_writer, 
nullptr));
+  return writer;
+}
+
+arrow::csv::WriteOptions *
+garrow_csv_write_options_get_raw(GArrowCSVWriteOptions *options)
+{
+  auto priv = GARROW_CSV_WRITE_OPTIONS_GET_PRIVATE(options);
+  return &priv->write_options;
+}
diff --git a/c_glib/arrow-glib/writer.h b/c_glib/arrow-glib/writer.h
index cea8390d90..fc5fe0c2c7 100644
--- a/c_glib/arrow-glib/writer.h
+++ b/c_glib/arrow-glib/writer.h
@@ -94,4 +94,49 @@ garrow_record_batch_file_writer_new(GArrowOutputStream *sink,
                                     GArrowSchema *schema,
                                     GError **error);
 
+/**
+ * GArrowCSVQuotingStyle:
+ * @GARROW_CSV_QUOTING_STYLE_NEEDED: Only enclose values in quotes which need 
them.
+ * @GARROW_CSV_QUOTING_STYLE_ALL_VALID: Enclose all valid values in quotes.
+ * @GARROW_CSV_QUOTING_STYLE_NONE: Do not enclose any values in quotes.
+ *
+ * They are corresponding to `arrow::csv::QuotingStyle` values.
+ *
+ * Since: 23.0.0
+ */
+typedef enum {
+  GARROW_CSV_QUOTING_STYLE_NEEDED,
+  GARROW_CSV_QUOTING_STYLE_ALL_VALID,
+  GARROW_CSV_QUOTING_STYLE_NONE,
+} GArrowCSVQuotingStyle;
+
+#define GARROW_TYPE_CSV_WRITE_OPTIONS (garrow_csv_write_options_get_type())
+GARROW_AVAILABLE_IN_23_0
+G_DECLARE_DERIVABLE_TYPE(
+  GArrowCSVWriteOptions, garrow_csv_write_options, GARROW, CSV_WRITE_OPTIONS, 
GObject)
+struct _GArrowCSVWriteOptionsClass
+{
+  GObjectClass parent_class;
+};
+
+GARROW_AVAILABLE_IN_23_0
+GArrowCSVWriteOptions *
+garrow_csv_write_options_new(void);
+
+#define GARROW_TYPE_CSV_WRITER (garrow_csv_writer_get_type())
+GARROW_AVAILABLE_IN_23_0
+G_DECLARE_DERIVABLE_TYPE(
+  GArrowCSVWriter, garrow_csv_writer, GARROW, CSV_WRITER, 
GArrowRecordBatchWriter)
+struct _GArrowCSVWriterClass
+{
+  GArrowRecordBatchWriterClass parent_class;
+};
+
+GARROW_AVAILABLE_IN_23_0
+GArrowCSVWriter *
+garrow_csv_writer_new(GArrowOutputStream *sink,
+                      GArrowSchema *schema,
+                      GArrowCSVWriteOptions *options,
+                      GError **error);
+
 G_END_DECLS
diff --git a/c_glib/arrow-glib/writer.hpp b/c_glib/arrow-glib/writer.hpp
index 1d85ac52f8..553d83a382 100644
--- a/c_glib/arrow-glib/writer.hpp
+++ b/c_glib/arrow-glib/writer.hpp
@@ -20,6 +20,8 @@
 #pragma once
 
 #include <arrow/api.h>
+#include <arrow/csv/options.h>
+#include <arrow/csv/writer.h>
 #include <arrow/ipc/api.h>
 #include <arrow/ipc/feather.h>
 
@@ -42,3 +44,11 @@ GARROW_AVAILABLE_IN_ALL
 GArrowRecordBatchFileWriter *
 garrow_record_batch_file_writer_new_raw(
   std::shared_ptr<arrow::ipc::RecordBatchWriter> *arrow_writer);
+
+GARROW_AVAILABLE_IN_23_0
+GArrowCSVWriter *
+garrow_csv_writer_new_raw(std::shared_ptr<arrow::ipc::RecordBatchWriter> 
*arrow_writer);
+
+GARROW_AVAILABLE_IN_23_0
+arrow::csv::WriteOptions *
+garrow_csv_write_options_get_raw(GArrowCSVWriteOptions *options);
diff --git a/c_glib/test/test-csv-writer.rb b/c_glib/test/test-csv-writer.rb
new file mode 100644
index 0000000000..2fdbbcc689
--- /dev/null
+++ b/c_glib/test/test-csv-writer.rb
@@ -0,0 +1,190 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class TestCSVWriter < Test::Unit::TestCase
+  include Helper::Buildable
+
+  def test_write_record_batch
+    message_data = ["Start", "Shutdown"]
+    count_data = [2, 9]
+    message_field = Arrow::Field.new("message", Arrow::StringDataType.new)
+    count_field = Arrow::Field.new("count", Arrow::Int64DataType.new)
+    schema = Arrow::Schema.new([message_field, count_field])
+
+    buffer = Arrow::ResizableBuffer.new(0)
+    output = Arrow::BufferOutputStream.new(buffer)
+    begin
+      csv_writer = Arrow::CSVWriter.new(output, schema)
+      begin
+        record_batch = Arrow::RecordBatch.new(schema,
+                                              message_data.size,
+                                              [
+                                                
build_string_array(message_data),
+                                                build_int64_array(count_data),
+                                              ])
+        csv_writer.write_record_batch(record_batch)
+      ensure
+        csv_writer.close
+        assert do
+          csv_writer.closed?
+        end
+      end
+    ensure
+      output.close
+    end
+
+    csv_output = buffer.data.to_s
+    expected = <<~CSV
+      "message","count"
+      "Start",2
+      "Shutdown",9
+    CSV
+    assert_equal(expected, csv_output)
+  end
+
+  def test_write_table
+    message_data = ["Start", "Shutdown", "Reboot"]
+    count_data = [2, 9, 5]
+    message_field = Arrow::Field.new("message", Arrow::StringDataType.new)
+    count_field = Arrow::Field.new("count", Arrow::Int64DataType.new)
+    schema = Arrow::Schema.new([message_field, count_field])
+
+    buffer = Arrow::ResizableBuffer.new(0)
+    output = Arrow::BufferOutputStream.new(buffer)
+    begin
+      csv_writer = Arrow::CSVWriter.new(output, schema)
+      begin
+        table = Arrow::Table.new(schema,
+                                 [
+                                   build_string_array(message_data),
+                                   build_int64_array(count_data),
+                                 ])
+        csv_writer.write_table(table)
+      ensure
+        csv_writer.close
+        assert do
+          csv_writer.closed?
+        end
+      end
+    ensure
+      output.close
+    end
+
+    csv_output = buffer.data.to_s
+    expected = <<~CSV
+      "message","count"
+      "Start",2
+      "Shutdown",9
+      "Reboot",5
+    CSV
+    assert_equal(expected, csv_output)
+  end
+
+
+  sub_test_case("options") do
+    def setup
+      @options = Arrow::CSVWriteOptions.new
+    end
+
+    def test_include_header
+      assert do
+        @options.include_header?
+      end
+      @options.include_header = false
+      assert do
+        not @options.include_header?
+      end
+    end
+
+    def test_batch_size
+      assert_equal(1024, @options.batch_size)
+      @options.batch_size = 2048
+      assert_equal(2048, @options.batch_size)
+    end
+
+    def test_delimiter
+      assert_equal(",".ord, @options.delimiter)
+      @options.delimiter = ";".ord
+      assert_equal(";".ord, @options.delimiter)
+    end
+
+    def test_null_string
+      assert_equal("", @options.null_string)
+      @options.null_string = "NULL"
+      assert_equal("NULL", @options.null_string)
+    end
+
+    def test_eol
+      assert_equal("\n", @options.eol)
+      @options.eol = "\r\n"
+      assert_equal("\r\n", @options.eol)
+    end
+
+    def test_quoting_style
+      assert_equal(Arrow::CSVQuotingStyle::NEEDED, @options.quoting_style)
+      @options.quoting_style = :all_valid
+      assert_equal(Arrow::CSVQuotingStyle::ALL_VALID, @options.quoting_style)
+    end
+
+    def test_quoting_header
+      assert_equal(Arrow::CSVQuotingStyle::NEEDED, @options.quoting_header)
+      @options.quoting_header = :none
+      assert_equal(Arrow::CSVQuotingStyle::NONE, @options.quoting_header)
+    end
+
+    def test_write_with_options
+      message_data = ["Start", nil, "Reboot"]
+      count_data = [2, 9, 5]
+      message_field = Arrow::Field.new("message", Arrow::StringDataType.new)
+      count_field = Arrow::Field.new("count", Arrow::Int64DataType.new)
+      schema = Arrow::Schema.new([message_field, count_field])
+
+      options = Arrow::CSVWriteOptions.new
+      options.include_header = false
+      options.delimiter = ";".ord
+      options.quoting_style = Arrow::CSVQuotingStyle::NONE
+      options.null_string = "NULL"
+
+      buffer = Arrow::ResizableBuffer.new(0)
+      output = Arrow::BufferOutputStream.new(buffer)
+      begin
+        csv_writer = Arrow::CSVWriter.new(output, schema, options)
+        begin
+          record_batch = Arrow::RecordBatch.new(schema,
+                                                message_data.size,
+                                                [
+                                                  
build_string_array(message_data),
+                                                  
build_int64_array(count_data),
+                                                ])
+          csv_writer.write_record_batch(record_batch)
+        ensure
+          csv_writer.close
+        end
+      ensure
+        output.close
+      end
+
+      csv_output = buffer.data.to_s
+      expected = <<~CSV
+        Start;2
+        NULL;9
+        Reboot;5
+      CSV
+      assert_equal(expected, csv_output)
+    end
+  end
+end
diff --git a/ruby/red-arrow/lib/arrow/csv-write-options.rb 
b/ruby/red-arrow/lib/arrow/csv-write-options.rb
new file mode 100644
index 0000000000..ccdc9bd4c9
--- /dev/null
+++ b/ruby/red-arrow/lib/arrow/csv-write-options.rb
@@ -0,0 +1,53 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+module Arrow
+  class CSVWriteOptions
+    class << self
+      def try_convert(value)
+        case value
+        when Hash
+          options = new
+          value.each do |k, v|
+            options.public_send("#{k}=", v)
+          end
+          options
+        else
+          nil
+        end
+      end
+    end
+    
+    alias_method :delimiter_raw, :delimiter
+    def delimiter
+      delimiter_raw.chr
+    end
+
+    alias_method :delimiter_raw=, :delimiter=
+    def delimiter=(delimiter)
+      case delimiter
+      when String
+        if delimiter.bytesize != 1
+          message = "delimiter must be 1 byte character: #{delimiter.inspect}"
+          raise ArgumentError, message
+        end
+        delimiter = delimiter.ord
+      end
+      self.delimiter_raw = delimiter
+    end
+  end
+end
diff --git a/ruby/red-arrow/lib/arrow/libraries.rb 
b/ruby/red-arrow/lib/arrow/libraries.rb
index 8135a2d4e7..3bbf621f2b 100644
--- a/ruby/red-arrow/lib/arrow/libraries.rb
+++ b/ruby/red-arrow/lib/arrow/libraries.rb
@@ -39,6 +39,7 @@ require_relative "column"
 require_relative "compression-type"
 require_relative "csv-loader"
 require_relative "csv-read-options"
+require_relative "csv-write-options"
 require_relative "data-type"
 require_relative "date32-array"
 require_relative "date32-array-builder"
diff --git a/ruby/red-arrow/test/test-csv-writer.rb 
b/ruby/red-arrow/test/test-csv-writer.rb
new file mode 100644
index 0000000000..511e47ad4b
--- /dev/null
+++ b/ruby/red-arrow/test/test-csv-writer.rb
@@ -0,0 +1,53 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class CSVWriterTest < Test::Unit::TestCase
+  sub_test_case("CSVWriteOptions") do
+    def setup
+      @options = Arrow::CSVWriteOptions.new
+    end
+
+    def test_delimiter
+      assert_equal(",", @options.delimiter)
+      @options.delimiter = ";"
+      assert_equal(";", @options.delimiter)
+    end
+  end
+
+  def test_write_table
+    table = Arrow::Table.new({
+      message: ["Start", nil, "Reboot"],
+      count: [2, 9, 5],
+    })
+
+    buffer = Arrow::ResizableBuffer.new(0)
+    Arrow::BufferOutputStream.open(buffer) do |output|
+      Arrow::CSVWriter.open(output, table.schema, delimiter: ";") do 
|csv_writer|
+        csv_writer.write_table(table)
+      end
+    end
+
+    csv_output = buffer.data.to_s
+    expected = <<~CSV
+      "message";"count"
+      "Start";2
+      ;9
+      "Reboot";5
+    CSV
+    assert_equal(expected, csv_output)
+  end
+end

Reply via email to