This is an automated email from the ASF dual-hosted git repository.

kou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new 4723b79d81 GH-48388: [Ruby] Add support for reading map array (#48389)
4723b79d81 is described below

commit 4723b79d812daff41f761e8452f7d936ad165548
Author: Sutou Kouhei <[email protected]>
AuthorDate: Mon Dec 8 10:48:33 2025 +0900

    GH-48388: [Ruby] Add support for reading map array (#48389)
    
    ### Rationale for this change
    
    It's a list of struct array.
    
    ### What changes are included in this PR?
    
    * Add `ArrowFormat::MapType`
    * Add `ArrowFormat::MapArray`
    
    ### Are these changes tested?
    
    Yes.
    
    ### Are there any user-facing changes?
    
    Yes.
    * GitHub Issue: #48388
    
    Authored-by: Sutou Kouhei <[email protected]>
    Signed-off-by: Sutou Kouhei <[email protected]>
---
 ruby/red-arrow-format/lib/arrow-format/array.rb    | 21 +++++++++++-
 ruby/red-arrow-format/lib/arrow-format/error.rb    |  5 ++-
 ruby/red-arrow-format/lib/arrow-format/field.rb    |  7 +++-
 .../lib/arrow-format/file-reader.rb                |  7 ++--
 ruby/red-arrow-format/lib/arrow-format/type.rb     | 39 ++++++++++++++++++++--
 ruby/red-arrow-format/test/test-file-reader.rb     | 25 ++++++++++++++
 6 files changed, 96 insertions(+), 8 deletions(-)

diff --git a/ruby/red-arrow-format/lib/arrow-format/array.rb 
b/ruby/red-arrow-format/lib/arrow-format/array.rb
index c5b636dc75..ea728ce8ce 100644
--- a/ruby/red-arrow-format/lib/arrow-format/array.rb
+++ b/ruby/red-arrow-format/lib/arrow-format/array.rb
@@ -162,7 +162,7 @@ module ArrowFormat
     end
   end
 
-  class ListArray < Array
+  class VariableSizeListArray < Array
     def initialize(type, size, validity_buffer, offsets_buffer, child)
       super(type, size, validity_buffer)
       @offsets_buffer = offsets_buffer
@@ -181,6 +181,9 @@ module ArrowFormat
     end
   end
 
+  class ListArray < VariableSizeListArray
+  end
+
   class StructArray < Array
     def initialize(type, size, validity_buffer, children)
       super(type, size, validity_buffer)
@@ -197,4 +200,20 @@ module ArrowFormat
       apply_validity(values)
     end
   end
+
+  class MapArray < VariableSizeListArray
+    def to_a
+      super.collect do |entries|
+        if entries.nil?
+          entries
+        else
+          hash = {}
+          entries.each do |key, value|
+            hash[key] = value
+          end
+          hash
+        end
+      end
+    end
+  end
 end
diff --git a/ruby/red-arrow-format/lib/arrow-format/error.rb 
b/ruby/red-arrow-format/lib/arrow-format/error.rb
index 125c0cbcf8..39b0b8af15 100644
--- a/ruby/red-arrow-format/lib/arrow-format/error.rb
+++ b/ruby/red-arrow-format/lib/arrow-format/error.rb
@@ -18,11 +18,14 @@ module ArrowFormat
   class Error < StandardError
   end
 
-  class ReadError < StandardError
+  class ReadError < Error
     attr_reader :buffer
     def initialize(buffer, message)
       @buffer = buffer
       super("#{message}: #{@buffer}")
     end
   end
+
+  class TypeError < Error
+  end
 end
diff --git a/ruby/red-arrow-format/lib/arrow-format/field.rb 
b/ruby/red-arrow-format/lib/arrow-format/field.rb
index a5e146bcbd..ac531750f7 100644
--- a/ruby/red-arrow-format/lib/arrow-format/field.rb
+++ b/ruby/red-arrow-format/lib/arrow-format/field.rb
@@ -18,9 +18,14 @@ module ArrowFormat
   class Field
     attr_reader :name
     attr_reader :type
-    def initialize(name, type)
+    def initialize(name, type, nullable)
       @name = name
       @type = type
+      @nullable = nullable
+    end
+
+    def nullable?
+      @nullable
     end
   end
 end
diff --git a/ruby/red-arrow-format/lib/arrow-format/file-reader.rb 
b/ruby/red-arrow-format/lib/arrow-format/file-reader.rb
index 79ea86cd33..3b2dc22823 100644
--- a/ruby/red-arrow-format/lib/arrow-format/file-reader.rb
+++ b/ruby/red-arrow-format/lib/arrow-format/file-reader.rb
@@ -29,6 +29,7 @@ require_relative "org/apache/arrow/flatbuf/footer"
 require_relative "org/apache/arrow/flatbuf/int"
 require_relative "org/apache/arrow/flatbuf/large_binary"
 require_relative "org/apache/arrow/flatbuf/list"
+require_relative "org/apache/arrow/flatbuf/map"
 require_relative "org/apache/arrow/flatbuf/message"
 require_relative "org/apache/arrow/flatbuf/null"
 require_relative "org/apache/arrow/flatbuf/precision"
@@ -163,6 +164,8 @@ module ArrowFormat
       when Org::Apache::Arrow::Flatbuf::Struct
         children = fb_field.children.collect {|child| read_field(child)}
         type = StructType.new(children)
+      when Org::Apache::Arrow::Flatbuf::Map
+        type = MapType.new(read_field(fb_field.children[0]))
       when Org::Apache::Arrow::Flatbuf::Binary
         type = BinaryType.singleton
       when Org::Apache::Arrow::Flatbuf::LargeBinary
@@ -170,7 +173,7 @@ module ArrowFormat
       when Org::Apache::Arrow::Flatbuf::Utf8
         type = UTF8Type.singleton
       end
-      Field.new(fb_field.name, type)
+      Field.new(fb_field.name, type, fb_field.nullable?)
     end
 
     def read_schema(fb_schema)
@@ -199,7 +202,7 @@ module ArrowFormat
         values_buffer = buffers.shift
         values = body.slice(values_buffer.offset, values_buffer.length)
         field.type.build_array(length, validity, values)
-      when ListType
+      when VariableSizeListType
         offsets_buffer = buffers.shift
         offsets = body.slice(offsets_buffer.offset, offsets_buffer.length)
         child = read_column(field.type.child, nodes, buffers, body)
diff --git a/ruby/red-arrow-format/lib/arrow-format/type.rb 
b/ruby/red-arrow-format/lib/arrow-format/type.rb
index 179a89ff4c..7726c23325 100644
--- a/ruby/red-arrow-format/lib/arrow-format/type.rb
+++ b/ruby/red-arrow-format/lib/arrow-format/type.rb
@@ -195,13 +195,20 @@ module ArrowFormat
     end
   end
 
-  class ListType < Type
+  class VariableSizeListType < Type
     attr_reader :child
-    def initialize(child)
-      super("List")
+    def initialize(name, child)
+      super(name)
       @child = child
     end
 
+  end
+
+  class ListType < VariableSizeListType
+    def initialize(child)
+      super("List", child)
+    end
+
     def build_array(size, validity_buffer, offsets_buffer, child)
       ListArray.new(self, size, validity_buffer, offsets_buffer, child)
     end
@@ -218,4 +225,30 @@ module ArrowFormat
       StructArray.new(self, size, validity_buffer, children)
     end
   end
+
+  class MapType < VariableSizeListType
+    def initialize(child)
+      if child.nullable?
+        raise TypeError.new("Map entry field must not be nullable: " +
+                            child.inspect)
+      end
+      type = child.type
+      unless type.is_a?(StructType)
+        raise TypeError.new("Map entry type must be struct: #{type.inspect}")
+      end
+      unless type.children.size == 2
+        raise TypeError.new("Map entry struct type must have 2 children: " +
+                            type.inspect)
+      end
+      if type.children[0].nullable?
+        raise TypeError.new("Map key field must not be nullable: " +
+                            type.children[0].inspect)
+      end
+      super("Map", child)
+    end
+
+    def build_array(size, validity_buffer, offsets_buffer, child)
+      MapArray.new(self, size, validity_buffer, offsets_buffer, child)
+    end
+  end
 end
diff --git a/ruby/red-arrow-format/test/test-file-reader.rb 
b/ruby/red-arrow-format/test/test-file-reader.rb
index 48c20d1f3f..9748ede47d 100644
--- a/ruby/red-arrow-format/test/test-file-reader.rb
+++ b/ruby/red-arrow-format/test/test-file-reader.rb
@@ -160,4 +160,29 @@ class TestFileReader < Test::Unit::TestCase
                    read)
     end
   end
+
+  sub_test_case("Map") do
+    def build_array
+      data_type = Arrow::MapDataType.new(:string, :int8)
+      Arrow::MapArray.new(data_type,
+                          [
+                            {"a" => -128, "b" => 127},
+                            nil,
+                            {"c" => nil},
+                          ])
+    end
+
+    def test_read
+      assert_equal([
+                     {
+                       "value" => [
+                         {"a" => -128, "b" => 127},
+                         nil,
+                         {"c" => nil},
+                       ],
+                     },
+                   ],
+                   read)
+    end
+  end
 end

Reply via email to