This is an automated email from the ASF dual-hosted git repository.

kou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new 262e4e19b2 GH-49117: [Ruby] Add support for writing union arrays 
(#49118)
262e4e19b2 is described below

commit 262e4e19b2968cc117454ac56dc30d2e76aab107
Author: Sutou Kouhei <[email protected]>
AuthorDate: Tue Feb 3 20:20:40 2026 +0900

    GH-49117: [Ruby] Add support for writing union arrays (#49118)
    
    ### Rationale for this change
    
    There are dense and sparse variants.
    
    ### What changes are included in this PR?
    
    * Add `garrow_union_array_get_n_fields()`
    * Add `ArrowFormat::UnionArray#children`
    * Add `ArrowFormat::DenseUnionArray#each_buffer`
    * Add `ArrowFormat::SparseUnionArray#each_buffer`
    * Add `ArrowFormat::UnionType#to_flatbuffers`
    * Add `Arrow::UnionArray#fields`
    
    ### Are these changes tested?
    
    Yes.
    
    ### Are there any user-facing changes?
    
    Yes.
    * GitHub Issue: #49117
    
    Authored-by: Sutou Kouhei <[email protected]>
    Signed-off-by: Sutou Kouhei <[email protected]>
---
 c_glib/arrow-glib/composite-array.cpp              | 15 +++++
 c_glib/arrow-glib/composite-array.h                |  4 ++
 ruby/red-arrow-format/lib/arrow-format/array.rb    | 14 ++++
 ruby/red-arrow-format/lib/arrow-format/type.rb     | 18 ++++-
 ruby/red-arrow-format/test/test-writer.rb          | 76 ++++++++++++++++++++++
 ruby/red-arrow/lib/arrow/dense-union-array.rb      |  2 +-
 ruby/red-arrow/lib/arrow/libraries.rb              |  1 +
 ruby/red-arrow/lib/arrow/sparse-union-array.rb     |  2 +-
 .../{sparse-union-array.rb => union-array.rb}      | 10 +--
 9 files changed, 134 insertions(+), 8 deletions(-)

diff --git a/c_glib/arrow-glib/composite-array.cpp 
b/c_glib/arrow-glib/composite-array.cpp
index ef7502dd57..4f31a599f5 100644
--- a/c_glib/arrow-glib/composite-array.cpp
+++ b/c_glib/arrow-glib/composite-array.cpp
@@ -1461,6 +1461,21 @@ garrow_union_array_get_field(GArrowUnionArray *array, 
gint i)
   return field;
 }
 
+/**
+ * garrow_union_array_get_n_fields
+ * @array: A #GArrowUnionArray.
+ *
+ * Returns: The number of fields.
+ *
+ * Since: 24.0.0
+ */
+gint
+garrow_union_array_get_n_fields(GArrowUnionArray *array)
+{
+  auto arrow_array = garrow_array_get_raw(GARROW_ARRAY(array));
+  return arrow_array->num_fields();
+}
+
 G_DEFINE_TYPE(GArrowSparseUnionArray, garrow_sparse_union_array, 
GARROW_TYPE_UNION_ARRAY)
 
 static void
diff --git a/c_glib/arrow-glib/composite-array.h 
b/c_glib/arrow-glib/composite-array.h
index 73d8d7f8a6..930bb813ac 100644
--- a/c_glib/arrow-glib/composite-array.h
+++ b/c_glib/arrow-glib/composite-array.h
@@ -236,6 +236,10 @@ GARROW_AVAILABLE_IN_ALL
 GArrowArray *
 garrow_union_array_get_field(GArrowUnionArray *array, gint i);
 
+GARROW_AVAILABLE_IN_24_0
+gint
+garrow_union_array_get_n_fields(GArrowUnionArray *array);
+
 #define GARROW_TYPE_SPARSE_UNION_ARRAY (garrow_sparse_union_array_get_type())
 GARROW_AVAILABLE_IN_ALL
 G_DECLARE_DERIVABLE_TYPE(GArrowSparseUnionArray,
diff --git a/ruby/red-arrow-format/lib/arrow-format/array.rb 
b/ruby/red-arrow-format/lib/arrow-format/array.rb
index 8c0620cdfb..4728d7ca70 100644
--- a/ruby/red-arrow-format/lib/arrow-format/array.rb
+++ b/ruby/red-arrow-format/lib/arrow-format/array.rb
@@ -455,6 +455,7 @@ module ArrowFormat
   end
 
   class UnionArray < Array
+    attr_reader :children
     def initialize(type, size, types_buffer, children)
       super(type, size, nil)
       @types_buffer = types_buffer
@@ -472,6 +473,13 @@ module ArrowFormat
       @offsets_buffer = offsets_buffer
     end
 
+    def each_buffer(&block)
+      return to_enum(__method__) unless block_given?
+
+      yield(@types_buffer)
+      yield(@offsets_buffer)
+    end
+
     def to_a
       children_values = @children.collect(&:to_a)
       types = @types_buffer.each(:S8, 0, @size)
@@ -484,6 +492,12 @@ module ArrowFormat
   end
 
   class SparseUnionArray < UnionArray
+    def each_buffer(&block)
+      return to_enum(__method__) unless block_given?
+
+      yield(@types_buffer)
+    end
+
     def to_a
       children_values = @children.collect(&:to_a)
       @types_buffer.each(:S8, 0, @size).with_index.collect do |(_, type), i|
diff --git a/ruby/red-arrow-format/lib/arrow-format/type.rb 
b/ruby/red-arrow-format/lib/arrow-format/type.rb
index c12d2d6e08..65353f26ab 100644
--- a/ruby/red-arrow-format/lib/arrow-format/type.rb
+++ b/ruby/red-arrow-format/lib/arrow-format/type.rb
@@ -796,8 +796,9 @@ module ArrowFormat
   class UnionType < Type
     attr_reader :children
     attr_reader :type_ids
-    def initialize(children, type_ids)
+    def initialize(mode, children, type_ids)
       super()
+      @mode = mode
       @children = children
       @type_ids = type_ids
       @type_indexes = {}
@@ -806,9 +807,20 @@ module ArrowFormat
     def resolve_type_index(type)
       @type_indexes[type] ||= @type_ids.index(type)
     end
+
+    def to_flatbuffers
+      fb_type = FB::Union::Data.new
+      fb_type.mode = FB::UnionMode.try_convert(@mode.to_s.capitalize)
+      fb_type.type_ids = @type_ids
+      fb_type
+    end
   end
 
   class DenseUnionType < UnionType
+    def initialize(children, type_ids)
+      super(:dense, children, type_ids)
+    end
+
     def name
       "DenseUnion"
     end
@@ -819,6 +831,10 @@ module ArrowFormat
   end
 
   class SparseUnionType < UnionType
+    def initialize(children, type_ids)
+      super(:sparse, children, type_ids)
+    end
+
     def name
       "SparseUnion"
     end
diff --git a/ruby/red-arrow-format/test/test-writer.rb 
b/ruby/red-arrow-format/test/test-writer.rb
index c0e4dd4607..7be4c162db 100644
--- a/ruby/red-arrow-format/test/test-writer.rb
+++ b/ruby/red-arrow-format/test/test-writer.rb
@@ -94,6 +94,16 @@ module WriterTests
         convert_field(field)
       end
       ArrowFormat::StructType.new(fields)
+    when Arrow::DenseUnionDataType
+      fields = red_arrow_type.fields.collect do |field|
+        convert_field(field)
+      end
+      ArrowFormat::DenseUnionType.new(fields, red_arrow_type.type_codes)
+    when Arrow::SparseUnionDataType
+      fields = red_arrow_type.fields.collect do |field|
+        convert_field(field)
+      end
+      ArrowFormat::SparseUnionType.new(fields, red_arrow_type.type_codes)
     else
       raise "Unsupported type: #{red_arrow_type.inspect}"
     end
@@ -141,6 +151,24 @@ module WriterTests
       type.build_array(red_arrow_array.size,
                        convert_buffer(red_arrow_array.null_bitmap),
                        children)
+    when ArrowFormat::DenseUnionType
+      types_buffer = convert_buffer(red_arrow_array.type_ids.data_buffer)
+      offsets_buffer = 
convert_buffer(red_arrow_array.value_offsets.data_buffer)
+      children = red_arrow_array.fields.collect do |red_arrow_field|
+        convert_array(red_arrow_field)
+      end
+      type.build_array(red_arrow_array.size,
+                       types_buffer,
+                       offsets_buffer,
+                       children)
+    when ArrowFormat::SparseUnionType
+      types_buffer = convert_buffer(red_arrow_array.type_ids.data_buffer)
+      children = red_arrow_array.fields.collect do |red_arrow_field|
+        convert_array(red_arrow_field)
+      end
+      type.build_array(red_arrow_array.size,
+                       types_buffer,
+                       children)
     else
       raise "Unsupported array #{red_arrow_array.inspect}"
     end
@@ -840,6 +868,54 @@ module WriterTests
                          @values)
           end
         end
+
+        sub_test_case("DenseUnion") do
+          def build_array
+            fields = [
+              Arrow::Field.new("number", :int8),
+              Arrow::Field.new("text", :string),
+            ]
+            type_ids = [11, 13]
+            data_type = Arrow::DenseUnionDataType.new(fields, type_ids)
+            types = Arrow::Int8Array.new([11, 13, 11, 13, 13])
+            value_offsets = Arrow::Int32Array.new([0, 0, 1, 1, 2])
+            children = [
+              Arrow::Int8Array.new([1, nil]),
+              Arrow::StringArray.new(["a", "b", "c"])
+            ]
+            Arrow::DenseUnionArray.new(data_type,
+                                       types,
+                                       value_offsets,
+                                       children)
+          end
+
+          def test_write
+            assert_equal([1, "a", nil, "b", "c"],
+                         @values)
+          end
+        end
+
+        sub_test_case("SparseUnion") do
+          def build_array
+            fields = [
+              Arrow::Field.new("number", :int8),
+              Arrow::Field.new("text", :string),
+            ]
+            type_ids = [11, 13]
+            data_type = Arrow::SparseUnionDataType.new(fields, type_ids)
+            types = Arrow::Int8Array.new([11, 13, 11, 13, 11])
+            children = [
+              Arrow::Int8Array.new([1, nil, nil, nil, 5]),
+              Arrow::StringArray.new([nil, "b", nil, "d", nil])
+            ]
+            Arrow::SparseUnionArray.new(data_type, types, children)
+          end
+
+          def test_write
+            assert_equal([1, "b", nil, "d", 5],
+                         @values)
+          end
+        end
       end
     end
   end
diff --git a/ruby/red-arrow/lib/arrow/dense-union-array.rb 
b/ruby/red-arrow/lib/arrow/dense-union-array.rb
index 07b2bbfce6..eb8bab0fa6 100644
--- a/ruby/red-arrow/lib/arrow/dense-union-array.rb
+++ b/ruby/red-arrow/lib/arrow/dense-union-array.rb
@@ -19,7 +19,7 @@ module Arrow
   class DenseUnionArray
     def get_value(i)
       child_id = get_child_id(i)
-      field = get_field(child_id)
+      field = fields[child_id]
       field[get_value_offset(i)]
     end
   end
diff --git a/ruby/red-arrow/lib/arrow/libraries.rb 
b/ruby/red-arrow/lib/arrow/libraries.rb
index 52cc1ceb29..a29a5588bb 100644
--- a/ruby/red-arrow/lib/arrow/libraries.rb
+++ b/ruby/red-arrow/lib/arrow/libraries.rb
@@ -134,5 +134,6 @@ require_relative "timestamp-array"
 require_relative "timestamp-array-builder"
 require_relative "timestamp-data-type"
 require_relative "timestamp-parser"
+require_relative "union-array"
 require_relative "union-array-builder"
 require_relative "writable"
diff --git a/ruby/red-arrow/lib/arrow/sparse-union-array.rb 
b/ruby/red-arrow/lib/arrow/sparse-union-array.rb
index 783493f6b6..084001a058 100644
--- a/ruby/red-arrow/lib/arrow/sparse-union-array.rb
+++ b/ruby/red-arrow/lib/arrow/sparse-union-array.rb
@@ -19,7 +19,7 @@ module Arrow
   class SparseUnionArray
     def get_value(i)
       child_id = get_child_id(i)
-      field = get_field(child_id)
+      field = fields[child_id]
       field[i]
     end
   end
diff --git a/ruby/red-arrow/lib/arrow/sparse-union-array.rb 
b/ruby/red-arrow/lib/arrow/union-array.rb
similarity index 86%
copy from ruby/red-arrow/lib/arrow/sparse-union-array.rb
copy to ruby/red-arrow/lib/arrow/union-array.rb
index 783493f6b6..a316dd38f1 100644
--- a/ruby/red-arrow/lib/arrow/sparse-union-array.rb
+++ b/ruby/red-arrow/lib/arrow/union-array.rb
@@ -16,11 +16,11 @@
 # under the License.
 
 module Arrow
-  class SparseUnionArray
-    def get_value(i)
-      child_id = get_child_id(i)
-      field = get_field(child_id)
-      field[i]
+  class UnionArray
+    def fields
+      @fields ||= n_fields.times.collect do |i|
+        get_field(i)
+      end
     end
   end
 end

Reply via email to