This is an automated email from the ASF dual-hosted git repository.

kou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new 8974ddc5a5 GH-48425: [Ruby] Add support for reading dense union array 
(#48426)
8974ddc5a5 is described below

commit 8974ddc5a51c25c8f47054fd10c689d38320845a
Author: Sutou Kouhei <[email protected]>
AuthorDate: Thu Dec 11 07:06:53 2025 +0900

    GH-48425: [Ruby] Add support for reading dense union array (#48426)
    
    ### Rationale for this change
    
    It's a dense variant of union array.
    
    ### What changes are included in this PR?
    
    * Add `ArrowFormat::DenseUnionType`
    * Add `ArrowFormat::DenseUnionArray`
    
    ### Are these changes tested?
    
    Yes.
    
    ### Are there any user-facing changes?
    
    Yes.
    * GitHub Issue: #48425
    
    Authored-by: Sutou Kouhei <[email protected]>
    Signed-off-by: Sutou Kouhei <[email protected]>
---
 ruby/red-arrow-format/lib/arrow-format/array.rb    | 25 +++++++++++++++++++++
 .../lib/arrow-format/file-reader.rb                | 18 +++++++++++++++
 ruby/red-arrow-format/lib/arrow-format/type.rb     | 25 +++++++++++++++++++++
 ruby/red-arrow-format/test/test-file-reader.rb     | 26 ++++++++++++++++++++++
 4 files changed, 94 insertions(+)

diff --git a/ruby/red-arrow-format/lib/arrow-format/array.rb 
b/ruby/red-arrow-format/lib/arrow-format/array.rb
index fac39c609a..c4220a4367 100644
--- a/ruby/red-arrow-format/lib/arrow-format/array.rb
+++ b/ruby/red-arrow-format/lib/arrow-format/array.rb
@@ -231,6 +231,31 @@ module ArrowFormat
     end
   end
 
+  class UnionArray < Array
+    def initialize(type,
+                   size,
+                   types_buffer,
+                   offsets_buffer,
+                   children)
+      super(type, size, nil)
+      @types_buffer = types_buffer
+      @offsets_buffer = offsets_buffer
+      @children = children
+    end
+  end
+
+  class DenseUnionArray < UnionArray
+    def to_a
+      children_values = @children.collect(&:to_a)
+      types = @types_buffer.each(:S8, 0, @size)
+      offsets = @offsets_buffer.each(:s32, 0, @size)
+      types.zip(offsets).collect do |(_, type), (_, offset)|
+        index = @type.resolve_type_index(type)
+        children_values[index][offset]
+      end
+    end
+  end
+
   class MapArray < VariableSizeListArray
     def to_a
       super.collect do |entries|
diff --git a/ruby/red-arrow-format/lib/arrow-format/file-reader.rb 
b/ruby/red-arrow-format/lib/arrow-format/file-reader.rb
index 4a46382685..68b3c4b64c 100644
--- a/ruby/red-arrow-format/lib/arrow-format/file-reader.rb
+++ b/ruby/red-arrow-format/lib/arrow-format/file-reader.rb
@@ -38,6 +38,8 @@ require_relative "org/apache/arrow/flatbuf/null"
 require_relative "org/apache/arrow/flatbuf/precision"
 require_relative "org/apache/arrow/flatbuf/schema"
 require_relative "org/apache/arrow/flatbuf/struct_"
+require_relative "org/apache/arrow/flatbuf/union"
+require_relative "org/apache/arrow/flatbuf/union_mode"
 require_relative "org/apache/arrow/flatbuf/utf8"
 
 module ArrowFormat
@@ -176,6 +178,13 @@ module ArrowFormat
       when Org::Apache::Arrow::Flatbuf::Struct
         children = fb_field.children.collect {|child| read_field(child)}
         type = StructType.new(children)
+      when Org::Apache::Arrow::Flatbuf::Union
+        children = fb_field.children.collect {|child| read_field(child)}
+        type_ids = fb_type.type_ids
+        case fb_type.mode
+        when Org::Apache::Arrow::Flatbuf::UnionMode::DENSE
+          type = DenseUnionType.new(children, type_ids)
+        end
       when Org::Apache::Arrow::Flatbuf::Map
         type = MapType.new(read_field(fb_field.children[0]))
       when Org::Apache::Arrow::Flatbuf::Binary
@@ -225,6 +234,15 @@ module ArrowFormat
           read_column(child, nodes, buffers, body)
         end
         field.type.build_array(length, validity, children)
+      when UnionType
+        # union type doesn't have validity.
+        types = validity
+        offsets_buffer = buffers.shift
+        offsets = body.slice(offsets_buffer.offset, offsets_buffer.length)
+        children = field.type.children.collect do |child|
+          read_column(child, nodes, buffers, body)
+        end
+        field.type.build_array(length, types, offsets, children)
       when VariableSizeBinaryType
         offsets_buffer = buffers.shift
         values_buffer = buffers.shift
diff --git a/ruby/red-arrow-format/lib/arrow-format/type.rb 
b/ruby/red-arrow-format/lib/arrow-format/type.rb
index 5516a5807f..c783d87987 100644
--- a/ruby/red-arrow-format/lib/arrow-format/type.rb
+++ b/ruby/red-arrow-format/lib/arrow-format/type.rb
@@ -296,4 +296,29 @@ module ArrowFormat
       MapArray.new(self, size, validity_buffer, offsets_buffer, child)
     end
   end
+
+  class UnionType < Type
+    attr_reader :children
+    attr_reader :type_ids
+    def initialize(name, children, type_ids)
+      super(name)
+      @children = children
+      @type_ids = type_ids
+      @type_indexes = {}
+    end
+
+    def resolve_type_index(type)
+      @type_indexes[type] ||= @type_ids.index(type)
+    end
+  end
+
+  class DenseUnionType < UnionType
+    def initialize(children, type_ids)
+      super("DenseUnion", children, type_ids)
+    end
+
+    def build_array(size, types_buffer, offsets_buffer, children)
+      DenseUnionArray.new(self, size, types_buffer, offsets_buffer, children)
+    end
+  end
 end
diff --git a/ruby/red-arrow-format/test/test-file-reader.rb 
b/ruby/red-arrow-format/test/test-file-reader.rb
index 72b818314b..2b37a888f7 100644
--- a/ruby/red-arrow-format/test/test-file-reader.rb
+++ b/ruby/red-arrow-format/test/test-file-reader.rb
@@ -230,6 +230,32 @@ class TestFileReader < Test::Unit::TestCase
     end
   end
 
+  sub_test_case("DenseUnion") do
+    def build_array
+      fields = [
+        Arrow::Field.new("number", :int8),
+        Arrow::Field.new("text", :string),
+      ]
+      type_ids = [11, 13]
+      data_type = Arrow::DenseUnionDataType.new(fields, type_ids)
+      types = Arrow::Int8Array.new([11, 13, 11, 13, 13])
+      value_offsets = Arrow::Int32Array.new([0, 0, 1, 1, 2])
+      children = [
+        Arrow::Int8Array.new([1, nil]),
+        Arrow::StringArray.new(["a", "b", "c"])
+      ]
+      Arrow::DenseUnionArray.new(data_type,
+                                 types,
+                                 value_offsets,
+                                 children)
+    end
+
+    def test_read
+      assert_equal([{"value" => [1, "a", nil, "b", "c"]}],
+                   read)
+    end
+  end
+
   sub_test_case("Map") do
     def build_array
       data_type = Arrow::MapDataType.new(:string, :int8)

Reply via email to