This is an automated email from the ASF dual-hosted git repository.

kou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new 384ea25e7a GH-49071: [Ruby] Add support for writing list and large 
list arrays (#49072)
384ea25e7a is described below

commit 384ea25e7a4583da170dfb65d29702a6c8ad14f4
Author: Sutou Kouhei <[email protected]>
AuthorDate: Sat Jan 31 10:37:33 2026 +0900

    GH-49071: [Ruby] Add support for writing list and large list arrays (#49072)
    
    ### Rationale for this change
    
    They use different offset size.
    
    ### What changes are included in this PR?
    
    * Add `ArrowFormat::ListType#to_flatbuffers`
    * Add `ArrowFormat::LargeListType#to_flatbuffers`
    * Add `ArrowFormat::VariableSizeListArray#child`
    * Add `ArrowFormat::VariableSizeListArray#each_buffer`
    * `garrow_array_get_null_bitmap()` returns `NULL` when null bitmap doesn't 
exist
    * Add `garrow_list_array_get_value_offsets_buffer()`
    * Add `garrow_large_list_array_get_value_offsets_buffer()`
    
    ### Are these changes tested?
    
    Yes.
    
    ### Are there any user-facing changes?
    
    Yes.
    * GitHub Issue: #49071
    
    Authored-by: Sutou Kouhei <[email protected]>
    Signed-off-by: Sutou Kouhei <[email protected]>
---
 c_glib/arrow-glib/basic-array.cpp                  |  6 ++-
 c_glib/arrow-glib/composite-array.cpp              | 46 ++++++++++++++++++++++
 c_glib/arrow-glib/composite-array.h                |  8 ++++
 ruby/red-arrow-format/lib/arrow-format/array.rb    |  8 ++++
 ruby/red-arrow-format/lib/arrow-format/field.rb    |  4 +-
 .../lib/arrow-format/record-batch.rb               |  4 +-
 ruby/red-arrow-format/lib/arrow-format/type.rb     |  9 ++++-
 ruby/red-arrow-format/test/test-writer.rb          | 42 ++++++++++++++++++++
 8 files changed, 123 insertions(+), 4 deletions(-)

diff --git a/c_glib/arrow-glib/basic-array.cpp 
b/c_glib/arrow-glib/basic-array.cpp
index cf6e94738e..bf5bf60d00 100644
--- a/c_glib/arrow-glib/basic-array.cpp
+++ b/c_glib/arrow-glib/basic-array.cpp
@@ -1114,7 +1114,11 @@ garrow_array_get_null_bitmap(GArrowArray *array)
 
   auto arrow_array = garrow_array_get_raw(array);
   auto arrow_null_bitmap = arrow_array->null_bitmap();
-  return garrow_buffer_new_raw(&arrow_null_bitmap);
+  if (arrow_null_bitmap) {
+    return garrow_buffer_new_raw(&arrow_null_bitmap);
+  } else {
+    return nullptr;
+  }
 }
 
 /**
diff --git a/c_glib/arrow-glib/composite-array.cpp 
b/c_glib/arrow-glib/composite-array.cpp
index 9bc53264b7..ef7502dd57 100644
--- a/c_glib/arrow-glib/composite-array.cpp
+++ b/c_glib/arrow-glib/composite-array.cpp
@@ -188,6 +188,22 @@ garrow_base_list_array_get_value_offsets(GArrowArray 
*array, gint64 *n_offsets)
   return arrow_list_array->raw_value_offsets();
 };
 
+template <typename LIST_ARRAY_CLASS>
+GArrowBuffer *
+garrow_base_list_array_get_value_offsets_buffer(GArrowArray *array)
+{
+  GArrowBuffer *buffer = nullptr;
+  g_object_get(array, "buffer1", &buffer, nullptr);
+  if (buffer) {
+    return buffer;
+  }
+
+  auto arrow_array = garrow_array_get_raw(array);
+  auto arrow_list_array = 
std::static_pointer_cast<LIST_ARRAY_CLASS>(arrow_array);
+  auto arrow_buffer = arrow_list_array->value_offsets();
+  return garrow_buffer_new_raw(&arrow_buffer);
+};
+
 G_BEGIN_DECLS
 
 static void
@@ -385,6 +401,21 @@ garrow_list_array_get_value_offsets(GArrowListArray 
*array, gint64 *n_offsets)
                                                                     n_offsets);
 }
 
+/**
+ * garrow_list_array_get_value_offsets_buffer:
+ * @array: A #GArrowListArray.
+ *
+ * Returns: (transfer full) (nullable): The value offsets buffer.
+ *
+ * Since: 24.0.0
+ */
+GArrowBuffer *
+garrow_list_array_get_value_offsets_buffer(GArrowListArray *array)
+{
+  return garrow_base_list_array_get_value_offsets_buffer<arrow::ListArray>(
+    GARROW_ARRAY(array));
+}
+
 typedef struct GArrowLargeListArrayPrivate_
 {
   GArrowArray *raw_values;
@@ -602,6 +633,21 @@ 
garrow_large_list_array_get_value_offsets(GArrowLargeListArray *array, gint64 *n
   return reinterpret_cast<const gint64 *>(value_offsets);
 }
 
+/**
+ * garrow_large_list_array_get_value_offsets_buffer:
+ * @array: A #GArrowLargeListArray.
+ *
+ * Returns: (transfer full) (nullable): The value offsets buffer.
+ *
+ * Since: 24.0.0
+ */
+GArrowBuffer *
+garrow_large_list_array_get_value_offsets_buffer(GArrowLargeListArray *array)
+{
+  return 
garrow_base_list_array_get_value_offsets_buffer<arrow::LargeListArray>(
+    GARROW_ARRAY(array));
+}
+
 typedef struct GArrowFixedSizeListArrayPrivate_
 {
   GArrowArray *raw_values;
diff --git a/c_glib/arrow-glib/composite-array.h 
b/c_glib/arrow-glib/composite-array.h
index 117ffdf707..73d8d7f8a6 100644
--- a/c_glib/arrow-glib/composite-array.h
+++ b/c_glib/arrow-glib/composite-array.h
@@ -68,6 +68,10 @@ GARROW_AVAILABLE_IN_2_0
 const gint32 *
 garrow_list_array_get_value_offsets(GArrowListArray *array, gint64 *n_offsets);
 
+GARROW_AVAILABLE_IN_24_0
+GArrowBuffer *
+garrow_list_array_get_value_offsets_buffer(GArrowListArray *array);
+
 #define GARROW_TYPE_LARGE_LIST_ARRAY (garrow_large_list_array_get_type())
 GARROW_AVAILABLE_IN_0_16
 G_DECLARE_DERIVABLE_TYPE(
@@ -110,6 +114,10 @@ GARROW_AVAILABLE_IN_2_0
 const gint64 *
 garrow_large_list_array_get_value_offsets(GArrowLargeListArray *array, gint64 
*n_offsets);
 
+GARROW_AVAILABLE_IN_24_0
+GArrowBuffer *
+garrow_large_list_array_get_value_offsets_buffer(GArrowLargeListArray *array);
+
 #define GARROW_TYPE_FIXED_SIZE_LIST_ARRAY 
(garrow_fixed_size_list_array_get_type())
 GARROW_AVAILABLE_IN_23_0
 G_DECLARE_DERIVABLE_TYPE(GArrowFixedSizeListArray,
diff --git a/ruby/red-arrow-format/lib/arrow-format/array.rb 
b/ruby/red-arrow-format/lib/arrow-format/array.rb
index 825311f43d..df1356c614 100644
--- a/ruby/red-arrow-format/lib/arrow-format/array.rb
+++ b/ruby/red-arrow-format/lib/arrow-format/array.rb
@@ -370,12 +370,20 @@ module ArrowFormat
   end
 
   class VariableSizeListArray < Array
+    attr_reader :child
     def initialize(type, size, validity_buffer, offsets_buffer, child)
       super(type, size, validity_buffer)
       @offsets_buffer = offsets_buffer
       @child = child
     end
 
+    def each_buffer(&block)
+      return to_enum(__method__) unless block_given?
+
+      yield(@validity_buffer)
+      yield(@offsets_buffer)
+    end
+
     def to_a
       child_values = @child.to_a
       values = @offsets_buffer.
diff --git a/ruby/red-arrow-format/lib/arrow-format/field.rb 
b/ruby/red-arrow-format/lib/arrow-format/field.rb
index fc5639bb66..3642c867c8 100644
--- a/ruby/red-arrow-format/lib/arrow-format/field.rb
+++ b/ruby/red-arrow-format/lib/arrow-format/field.rb
@@ -49,7 +49,9 @@ module ArrowFormat
       else
         fb_field.type = @type.to_flatbuffers
       end
-      if @type.respond_to?(:children)
+      if @type.respond_to?(:child)
+        fb_field.children = [@type.child.to_flatbuffers]
+      elsif @type.respond_to?(:children)
         fb_field.children = @type.children.collect(&:to_flatbuffers)
       end
       # fb_field.custom_metadata = @custom_metadata
diff --git a/ruby/red-arrow-format/lib/arrow-format/record-batch.rb 
b/ruby/red-arrow-format/lib/arrow-format/record-batch.rb
index cf925eebdf..a641c87da7 100644
--- a/ruby/red-arrow-format/lib/arrow-format/record-batch.rb
+++ b/ruby/red-arrow-format/lib/arrow-format/record-batch.rb
@@ -70,7 +70,9 @@ module ArrowFormat
       Enumerator.new do |yielder|
         traverse = lambda do |array|
           yielder << array
-          if array.respond_to?(:children)
+          if array.respond_to?(:child)
+            traverse.call(array.child)
+          elsif array.respond_to?(:children)
             array.children.each do |child_array|
               traverse.call(child_array)
             end
diff --git a/ruby/red-arrow-format/lib/arrow-format/type.rb 
b/ruby/red-arrow-format/lib/arrow-format/type.rb
index 9ba8cae710..50c392f270 100644
--- a/ruby/red-arrow-format/lib/arrow-format/type.rb
+++ b/ruby/red-arrow-format/lib/arrow-format/type.rb
@@ -707,7 +707,6 @@ module ArrowFormat
       super()
       @child = child
     end
-
   end
 
   class ListType < VariableSizeListType
@@ -718,6 +717,10 @@ module ArrowFormat
     def build_array(size, validity_buffer, offsets_buffer, child)
       ListArray.new(self, size, validity_buffer, offsets_buffer, child)
     end
+
+    def to_flatbuffers
+      FB::List::Data.new
+    end
   end
 
   class LargeListType < VariableSizeListType
@@ -728,6 +731,10 @@ module ArrowFormat
     def build_array(size, validity_buffer, offsets_buffer, child)
       LargeListArray.new(self, size, validity_buffer, offsets_buffer, child)
     end
+
+    def to_flatbuffers
+      FB::LargeList::Data.new
+    end
   end
 
   class StructType < Type
diff --git a/ruby/red-arrow-format/test/test-writer.rb 
b/ruby/red-arrow-format/test/test-writer.rb
index 841194ff51..bf05f20e4e 100644
--- a/ruby/red-arrow-format/test/test-writer.rb
+++ b/ruby/red-arrow-format/test/test-writer.rb
@@ -83,11 +83,22 @@ module WriterTests
                                       red_arrow_type.scale)
     when Arrow::FixedSizeBinaryDataType
       ArrowFormat::FixedSizeBinaryType.new(red_arrow_type.byte_width)
+    when Arrow::ListDataType
+      ArrowFormat::ListType.new(convert_field(red_arrow_type.field))
+    when Arrow::LargeListDataType
+      ArrowFormat::LargeListType.new(convert_field(red_arrow_type.field))
     else
       raise "Unsupported type: #{red_arrow_type.inspect}"
     end
   end
 
+  def convert_field(red_arrow_field)
+    ArrowFormat::Field.new(red_arrow_field.name,
+                           convert_type(red_arrow_field.data_type),
+                           red_arrow_field.nullable?,
+                           nil)
+  end
+
   def convert_buffer(buffer)
     return nil if buffer.nil?
     IO::Buffer.for(buffer.data.to_s)
@@ -111,6 +122,11 @@ module WriterTests
       type.build_array(red_arrow_array.size,
                        convert_buffer(red_arrow_array.null_bitmap),
                        convert_buffer(red_arrow_array.data_buffer))
+    when ArrowFormat::VariableSizeListType
+      type.build_array(red_arrow_array.size,
+                       convert_buffer(red_arrow_array.null_bitmap),
+                       convert_buffer(red_arrow_array.value_offsets_buffer),
+                       convert_array(red_arrow_array.values_raw))
     else
       raise "Unsupported array #{red_arrow_array.inspect}"
     end
@@ -706,6 +722,32 @@ module WriterTests
                          @values)
           end
         end
+
+        sub_test_case("List") do
+          def build_array
+            data_type = Arrow::ListDataType.new(name: "count", type: :int8)
+            Arrow::ListArray.new(data_type, [[-128, 127], nil, [-1, 0, 1]])
+          end
+
+          def test_write
+            assert_equal([[-128, 127], nil, [-1, 0, 1]],
+                         @values)
+          end
+        end
+
+        sub_test_case("LargeList") do
+          def build_array
+            data_type = Arrow::LargeListDataType.new(name: "count",
+                                                     type: :int8)
+            Arrow::LargeListArray.new(data_type,
+                                      [[-128, 127], nil, [-1, 0, 1]])
+          end
+
+          def test_write
+            assert_equal([[-128, 127], nil, [-1, 0, 1]],
+                         @values)
+          end
+        end
       end
     end
   end

Reply via email to