This is an automated email from the ASF dual-hosted git repository.
kou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new 262e4e19b2 GH-49117: [Ruby] Add support for writing union arrays
(#49118)
262e4e19b2 is described below
commit 262e4e19b2968cc117454ac56dc30d2e76aab107
Author: Sutou Kouhei <[email protected]>
AuthorDate: Tue Feb 3 20:20:40 2026 +0900
GH-49117: [Ruby] Add support for writing union arrays (#49118)
### Rationale for this change
There are dense and sparse variants.
### What changes are included in this PR?
* Add `garrow_union_array_get_n_fields()`
* Add `ArrowFormat::UnionArray#children`
* Add `ArrowFormat::DenseUnionArray#each_buffer`
* Add `ArrowFormat::SparseUnionArray#each_buffer`
* Add `ArrowFormat::UnionType#to_flatbuffers`
* Add `Arrow::UnionArray#fields`
### Are these changes tested?
Yes.
### Are there any user-facing changes?
Yes.
* GitHub Issue: #49117
Authored-by: Sutou Kouhei <[email protected]>
Signed-off-by: Sutou Kouhei <[email protected]>
---
c_glib/arrow-glib/composite-array.cpp | 15 +++++
c_glib/arrow-glib/composite-array.h | 4 ++
ruby/red-arrow-format/lib/arrow-format/array.rb | 14 ++++
ruby/red-arrow-format/lib/arrow-format/type.rb | 18 ++++-
ruby/red-arrow-format/test/test-writer.rb | 76 ++++++++++++++++++++++
ruby/red-arrow/lib/arrow/dense-union-array.rb | 2 +-
ruby/red-arrow/lib/arrow/libraries.rb | 1 +
ruby/red-arrow/lib/arrow/sparse-union-array.rb | 2 +-
.../{sparse-union-array.rb => union-array.rb} | 10 +--
9 files changed, 134 insertions(+), 8 deletions(-)
diff --git a/c_glib/arrow-glib/composite-array.cpp
b/c_glib/arrow-glib/composite-array.cpp
index ef7502dd57..4f31a599f5 100644
--- a/c_glib/arrow-glib/composite-array.cpp
+++ b/c_glib/arrow-glib/composite-array.cpp
@@ -1461,6 +1461,21 @@ garrow_union_array_get_field(GArrowUnionArray *array,
gint i)
return field;
}
+/**
+ * garrow_union_array_get_n_fields
+ * @array: A #GArrowUnionArray.
+ *
+ * Returns: The number of fields.
+ *
+ * Since: 24.0.0
+ */
+gint
+garrow_union_array_get_n_fields(GArrowUnionArray *array)
+{
+ auto arrow_array = garrow_array_get_raw(GARROW_ARRAY(array));
+ return arrow_array->num_fields();
+}
+
G_DEFINE_TYPE(GArrowSparseUnionArray, garrow_sparse_union_array,
GARROW_TYPE_UNION_ARRAY)
static void
diff --git a/c_glib/arrow-glib/composite-array.h
b/c_glib/arrow-glib/composite-array.h
index 73d8d7f8a6..930bb813ac 100644
--- a/c_glib/arrow-glib/composite-array.h
+++ b/c_glib/arrow-glib/composite-array.h
@@ -236,6 +236,10 @@ GARROW_AVAILABLE_IN_ALL
GArrowArray *
garrow_union_array_get_field(GArrowUnionArray *array, gint i);
+GARROW_AVAILABLE_IN_24_0
+gint
+garrow_union_array_get_n_fields(GArrowUnionArray *array);
+
#define GARROW_TYPE_SPARSE_UNION_ARRAY (garrow_sparse_union_array_get_type())
GARROW_AVAILABLE_IN_ALL
G_DECLARE_DERIVABLE_TYPE(GArrowSparseUnionArray,
diff --git a/ruby/red-arrow-format/lib/arrow-format/array.rb
b/ruby/red-arrow-format/lib/arrow-format/array.rb
index 8c0620cdfb..4728d7ca70 100644
--- a/ruby/red-arrow-format/lib/arrow-format/array.rb
+++ b/ruby/red-arrow-format/lib/arrow-format/array.rb
@@ -455,6 +455,7 @@ module ArrowFormat
end
class UnionArray < Array
+ attr_reader :children
def initialize(type, size, types_buffer, children)
super(type, size, nil)
@types_buffer = types_buffer
@@ -472,6 +473,13 @@ module ArrowFormat
@offsets_buffer = offsets_buffer
end
+ def each_buffer(&block)
+ return to_enum(__method__) unless block_given?
+
+ yield(@types_buffer)
+ yield(@offsets_buffer)
+ end
+
def to_a
children_values = @children.collect(&:to_a)
types = @types_buffer.each(:S8, 0, @size)
@@ -484,6 +492,12 @@ module ArrowFormat
end
class SparseUnionArray < UnionArray
+ def each_buffer(&block)
+ return to_enum(__method__) unless block_given?
+
+ yield(@types_buffer)
+ end
+
def to_a
children_values = @children.collect(&:to_a)
@types_buffer.each(:S8, 0, @size).with_index.collect do |(_, type), i|
diff --git a/ruby/red-arrow-format/lib/arrow-format/type.rb
b/ruby/red-arrow-format/lib/arrow-format/type.rb
index c12d2d6e08..65353f26ab 100644
--- a/ruby/red-arrow-format/lib/arrow-format/type.rb
+++ b/ruby/red-arrow-format/lib/arrow-format/type.rb
@@ -796,8 +796,9 @@ module ArrowFormat
class UnionType < Type
attr_reader :children
attr_reader :type_ids
- def initialize(children, type_ids)
+ def initialize(mode, children, type_ids)
super()
+ @mode = mode
@children = children
@type_ids = type_ids
@type_indexes = {}
@@ -806,9 +807,20 @@ module ArrowFormat
def resolve_type_index(type)
@type_indexes[type] ||= @type_ids.index(type)
end
+
+ def to_flatbuffers
+ fb_type = FB::Union::Data.new
+ fb_type.mode = FB::UnionMode.try_convert(@mode.to_s.capitalize)
+ fb_type.type_ids = @type_ids
+ fb_type
+ end
end
class DenseUnionType < UnionType
+ def initialize(children, type_ids)
+ super(:dense, children, type_ids)
+ end
+
def name
"DenseUnion"
end
@@ -819,6 +831,10 @@ module ArrowFormat
end
class SparseUnionType < UnionType
+ def initialize(children, type_ids)
+ super(:sparse, children, type_ids)
+ end
+
def name
"SparseUnion"
end
diff --git a/ruby/red-arrow-format/test/test-writer.rb
b/ruby/red-arrow-format/test/test-writer.rb
index c0e4dd4607..7be4c162db 100644
--- a/ruby/red-arrow-format/test/test-writer.rb
+++ b/ruby/red-arrow-format/test/test-writer.rb
@@ -94,6 +94,16 @@ module WriterTests
convert_field(field)
end
ArrowFormat::StructType.new(fields)
+ when Arrow::DenseUnionDataType
+ fields = red_arrow_type.fields.collect do |field|
+ convert_field(field)
+ end
+ ArrowFormat::DenseUnionType.new(fields, red_arrow_type.type_codes)
+ when Arrow::SparseUnionDataType
+ fields = red_arrow_type.fields.collect do |field|
+ convert_field(field)
+ end
+ ArrowFormat::SparseUnionType.new(fields, red_arrow_type.type_codes)
else
raise "Unsupported type: #{red_arrow_type.inspect}"
end
@@ -141,6 +151,24 @@ module WriterTests
type.build_array(red_arrow_array.size,
convert_buffer(red_arrow_array.null_bitmap),
children)
+ when ArrowFormat::DenseUnionType
+ types_buffer = convert_buffer(red_arrow_array.type_ids.data_buffer)
+ offsets_buffer =
convert_buffer(red_arrow_array.value_offsets.data_buffer)
+ children = red_arrow_array.fields.collect do |red_arrow_field|
+ convert_array(red_arrow_field)
+ end
+ type.build_array(red_arrow_array.size,
+ types_buffer,
+ offsets_buffer,
+ children)
+ when ArrowFormat::SparseUnionType
+ types_buffer = convert_buffer(red_arrow_array.type_ids.data_buffer)
+ children = red_arrow_array.fields.collect do |red_arrow_field|
+ convert_array(red_arrow_field)
+ end
+ type.build_array(red_arrow_array.size,
+ types_buffer,
+ children)
else
raise "Unsupported array #{red_arrow_array.inspect}"
end
@@ -840,6 +868,54 @@ module WriterTests
@values)
end
end
+
+ sub_test_case("DenseUnion") do
+ def build_array
+ fields = [
+ Arrow::Field.new("number", :int8),
+ Arrow::Field.new("text", :string),
+ ]
+ type_ids = [11, 13]
+ data_type = Arrow::DenseUnionDataType.new(fields, type_ids)
+ types = Arrow::Int8Array.new([11, 13, 11, 13, 13])
+ value_offsets = Arrow::Int32Array.new([0, 0, 1, 1, 2])
+ children = [
+ Arrow::Int8Array.new([1, nil]),
+ Arrow::StringArray.new(["a", "b", "c"])
+ ]
+ Arrow::DenseUnionArray.new(data_type,
+ types,
+ value_offsets,
+ children)
+ end
+
+ def test_write
+ assert_equal([1, "a", nil, "b", "c"],
+ @values)
+ end
+ end
+
+ sub_test_case("SparseUnion") do
+ def build_array
+ fields = [
+ Arrow::Field.new("number", :int8),
+ Arrow::Field.new("text", :string),
+ ]
+ type_ids = [11, 13]
+ data_type = Arrow::SparseUnionDataType.new(fields, type_ids)
+ types = Arrow::Int8Array.new([11, 13, 11, 13, 11])
+ children = [
+ Arrow::Int8Array.new([1, nil, nil, nil, 5]),
+ Arrow::StringArray.new([nil, "b", nil, "d", nil])
+ ]
+ Arrow::SparseUnionArray.new(data_type, types, children)
+ end
+
+ def test_write
+ assert_equal([1, "b", nil, "d", 5],
+ @values)
+ end
+ end
end
end
end
diff --git a/ruby/red-arrow/lib/arrow/dense-union-array.rb
b/ruby/red-arrow/lib/arrow/dense-union-array.rb
index 07b2bbfce6..eb8bab0fa6 100644
--- a/ruby/red-arrow/lib/arrow/dense-union-array.rb
+++ b/ruby/red-arrow/lib/arrow/dense-union-array.rb
@@ -19,7 +19,7 @@ module Arrow
class DenseUnionArray
def get_value(i)
child_id = get_child_id(i)
- field = get_field(child_id)
+ field = fields[child_id]
field[get_value_offset(i)]
end
end
diff --git a/ruby/red-arrow/lib/arrow/libraries.rb
b/ruby/red-arrow/lib/arrow/libraries.rb
index 52cc1ceb29..a29a5588bb 100644
--- a/ruby/red-arrow/lib/arrow/libraries.rb
+++ b/ruby/red-arrow/lib/arrow/libraries.rb
@@ -134,5 +134,6 @@ require_relative "timestamp-array"
require_relative "timestamp-array-builder"
require_relative "timestamp-data-type"
require_relative "timestamp-parser"
+require_relative "union-array"
require_relative "union-array-builder"
require_relative "writable"
diff --git a/ruby/red-arrow/lib/arrow/sparse-union-array.rb
b/ruby/red-arrow/lib/arrow/sparse-union-array.rb
index 783493f6b6..084001a058 100644
--- a/ruby/red-arrow/lib/arrow/sparse-union-array.rb
+++ b/ruby/red-arrow/lib/arrow/sparse-union-array.rb
@@ -19,7 +19,7 @@ module Arrow
class SparseUnionArray
def get_value(i)
child_id = get_child_id(i)
- field = get_field(child_id)
+ field = fields[child_id]
field[i]
end
end
diff --git a/ruby/red-arrow/lib/arrow/sparse-union-array.rb
b/ruby/red-arrow/lib/arrow/union-array.rb
similarity index 86%
copy from ruby/red-arrow/lib/arrow/sparse-union-array.rb
copy to ruby/red-arrow/lib/arrow/union-array.rb
index 783493f6b6..a316dd38f1 100644
--- a/ruby/red-arrow/lib/arrow/sparse-union-array.rb
+++ b/ruby/red-arrow/lib/arrow/union-array.rb
@@ -16,11 +16,11 @@
# under the License.
module Arrow
- class SparseUnionArray
- def get_value(i)
- child_id = get_child_id(i)
- field = get_field(child_id)
- field[i]
+ class UnionArray
+ def fields
+ @fields ||= n_fields.times.collect do |i|
+ get_field(i)
+ end
end
end
end