This is an automated email from the ASF dual-hosted git repository.

kou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new 7b54614f57 GH-48347: [Ruby] Add support for reading list array (#48351)
7b54614f57 is described below

commit 7b54614f5758e17e93d4815e65657c14fccb2868
Author: Sutou Kouhei <[email protected]>
AuthorDate: Fri Dec 5 20:43:38 2025 +0900

    GH-48347: [Ruby] Add support for reading list array (#48351)
    
    ### Rationale for this change
    
    This is the first nested type.
    
    ### What changes are included in this PR?
    
    * Add `ArrowFormat::ListType`
    * Add `ArrowFormat::ListArray`
    
    ### Are these changes tested?
    
    Yes.
    
    ### Are there any user-facing changes?
    
    Yes.
    * GitHub Issue: #48347
    
    Authored-by: Sutou Kouhei <[email protected]>
    Signed-off-by: Sutou Kouhei <[email protected]>
---
 ruby/red-arrow-format/lib/arrow-format/array.rb    | 19 ++++++
 .../lib/arrow-format/file-reader.rb                | 74 +++++++++++++---------
 ruby/red-arrow-format/lib/arrow-format/type.rb     | 13 +++-
 ruby/red-arrow-format/test/test-file-reader.rb     | 12 ++++
 4 files changed, 88 insertions(+), 30 deletions(-)

diff --git a/ruby/red-arrow-format/lib/arrow-format/array.rb 
b/ruby/red-arrow-format/lib/arrow-format/array.rb
index 6d164cc0b5..2a304f5416 100644
--- a/ruby/red-arrow-format/lib/arrow-format/array.rb
+++ b/ruby/red-arrow-format/lib/arrow-format/array.rb
@@ -123,4 +123,23 @@ module ArrowFormat
       Encoding::UTF_8
     end
   end
+
+  class ListArray < Array
+    def initialize(type, size, validity_buffer, offsets_buffer, child)
+      super(type, size, validity_buffer)
+      @offsets_buffer = offsets_buffer
+      @child = child
+    end
+
+    def to_a
+      child_values = @child.to_a
+      values = @offsets_buffer.
+        each(:s32, 0, @size + 1). # TODO: big endian support
+        each_cons(2).
+        collect do |(_, offset), (_, next_offset)|
+        child_values[offset...next_offset]
+      end
+      apply_validity(values)
+    end
+  end
 end
diff --git a/ruby/red-arrow-format/lib/arrow-format/file-reader.rb 
b/ruby/red-arrow-format/lib/arrow-format/file-reader.rb
index 733140a10b..3db6bad77a 100644
--- a/ruby/red-arrow-format/lib/arrow-format/file-reader.rb
+++ b/ruby/red-arrow-format/lib/arrow-format/file-reader.rb
@@ -22,14 +22,15 @@ require_relative "record-batch"
 require_relative "schema"
 require_relative "type"
 
+require_relative "org/apache/arrow/flatbuf/binary"
 require_relative "org/apache/arrow/flatbuf/bool"
 require_relative "org/apache/arrow/flatbuf/footer"
-require_relative "org/apache/arrow/flatbuf/message"
-require_relative "org/apache/arrow/flatbuf/binary"
 require_relative "org/apache/arrow/flatbuf/int"
+require_relative "org/apache/arrow/flatbuf/list"
+require_relative "org/apache/arrow/flatbuf/message"
 require_relative "org/apache/arrow/flatbuf/null"
-require_relative "org/apache/arrow/flatbuf/utf8"
 require_relative "org/apache/arrow/flatbuf/schema"
+require_relative "org/apache/arrow/flatbuf/utf8"
 
 module ArrowFormat
   class FileReader
@@ -90,9 +91,10 @@ module ArrowFormat
         when Org::Apache::Arrow::Flatbuf::RecordBatch
           n_rows = header.length
           columns = []
+          nodes = header.nodes
           buffers = header.buffers
           schema.fields.each do |field|
-            columns << read_column(field, n_rows, buffers, body)
+            columns << read_column(field, nodes, buffers, body)
           end
           yield(RecordBatch.new(schema, n_rows, columns))
         end
@@ -129,35 +131,44 @@ module ArrowFormat
       Org::Apache::Arrow::Flatbuf::Footer.new(footer_data)
     end
 
-    def read_schema(fb_schema)
-      fields = fb_schema.fields.collect do |fb_field|
-        fb_type = fb_field.type
-        case fb_type
-        when Org::Apache::Arrow::Flatbuf::Null
-          type = NullType.singleton
-        when Org::Apache::Arrow::Flatbuf::Bool
-          type = BooleanType.singleton
-        when Org::Apache::Arrow::Flatbuf::Int
-          case fb_type.bit_width
-          when 8
-            if fb_type.signed?
-              type = Int8Type.singleton
-            else
-              type = UInt8Type.singleton
-            end
+    def read_field(fb_field)
+      fb_type = fb_field.type
+      case fb_type
+      when Org::Apache::Arrow::Flatbuf::Null
+        type = NullType.singleton
+      when Org::Apache::Arrow::Flatbuf::Bool
+        type = BooleanType.singleton
+      when Org::Apache::Arrow::Flatbuf::Int
+        case fb_type.bit_width
+        when 8
+          if fb_type.signed?
+            type = Int8Type.singleton
+          else
+            type = UInt8Type.singleton
           end
-        when Org::Apache::Arrow::Flatbuf::Binary
-          type = BinaryType.singleton
-        when Org::Apache::Arrow::Flatbuf::Utf8
-          type = UTF8Type.singleton
         end
-        Field.new(fb_field.name, type)
+      when Org::Apache::Arrow::Flatbuf::List
+        type = ListType.new(read_field(fb_field.children[0]))
+      when Org::Apache::Arrow::Flatbuf::Binary
+        type = BinaryType.singleton
+      when Org::Apache::Arrow::Flatbuf::Utf8
+        type = UTF8Type.singleton
+      end
+      Field.new(fb_field.name, type)
+    end
+
+    def read_schema(fb_schema)
+      fields = fb_schema.fields.collect do |fb_field|
+        read_field(fb_field)
       end
       Schema.new(fields)
     end
 
-    def read_column(field, n_rows, buffers, body)
-      return field.type.build_array(n_rows) if field.type.is_a?(NullType)
+    def read_column(field, nodes, buffers, body)
+      node = nodes.shift
+      length = node.length
+
+      return field.type.build_array(length) if field.type.is_a?(NullType)
 
       validity_buffer = buffers.shift
       if validity_buffer.length.zero?
@@ -172,14 +183,19 @@ module ArrowFormat
            UInt8Type
         values_buffer = buffers.shift
         values = body.slice(values_buffer.offset, values_buffer.length)
-        field.type.build_array(n_rows, validity, values)
+        field.type.build_array(length, validity, values)
+      when ListType
+        offsets_buffer = buffers.shift
+        offsets = body.slice(offsets_buffer.offset, offsets_buffer.length)
+        child = read_column(field.type.child, nodes, buffers, body)
+        field.type.build_array(length, validity, offsets, child)
       when BinaryType,
            UTF8Type
         offsets_buffer = buffers.shift
         values_buffer = buffers.shift
         offsets = body.slice(offsets_buffer.offset, offsets_buffer.length)
         values = body.slice(values_buffer.offset, values_buffer.length)
-        field.type.build_array(n_rows, validity, offsets, values)
+        field.type.build_array(length, validity, offsets, values)
       end
     end
   end
diff --git a/ruby/red-arrow-format/lib/arrow-format/type.rb 
b/ruby/red-arrow-format/lib/arrow-format/type.rb
index 22a246aeab..c792eac175 100644
--- a/ruby/red-arrow-format/lib/arrow-format/type.rb
+++ b/ruby/red-arrow-format/lib/arrow-format/type.rb
@@ -103,7 +103,6 @@ module ArrowFormat
       end
     end
 
-    attr_reader :name
     def initialize
       super("Binary")
     end
@@ -129,4 +128,16 @@ module ArrowFormat
       UTF8Array.new(self, size, validity_buffer, offsets_buffer, values_buffer)
     end
   end
+
+  class ListType < Type
+    attr_reader :child
+    def initialize(child)
+      super("List")
+      @child = child
+    end
+
+    def build_array(size, validity_buffer, offsets_buffer, child)
+      ListArray.new(self, size, validity_buffer, offsets_buffer, child)
+    end
+  end
 end
diff --git a/ruby/red-arrow-format/test/test-file-reader.rb 
b/ruby/red-arrow-format/test/test-file-reader.rb
index 0029a57887..95cb6f3b1a 100644
--- a/ruby/red-arrow-format/test/test-file-reader.rb
+++ b/ruby/red-arrow-format/test/test-file-reader.rb
@@ -105,4 +105,16 @@ class TestFileReader < Test::Unit::TestCase
                    read)
     end
   end
+
+  sub_test_case("List") do
+    def build_array
+      data_type = Arrow::ListDataType.new(name: "count", type: :int8)
+      Arrow::ListArray.new(data_type, [[-128, 127], nil, [-1, 0, 1]])
+    end
+
+    def test_read
+      assert_equal([{"value" => [[-128, 127], nil, [-1, 0, 1]]}],
+                   read)
+    end
+  end
 end

Reply via email to