This is an automated email from the ASF dual-hosted git repository.
kou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new f658199c32 GH-48642: [Ruby] Add support for reading decimal128 array
(#48643)
f658199c32 is described below
commit f658199c32c81fad65665d7a66b5b197cd57673d
Author: Sutou Kouhei <[email protected]>
AuthorDate: Wed Dec 31 14:20:20 2025 +0900
GH-48642: [Ruby] Add support for reading decimal128 array (#48643)
### Rationale for this change
It's a 128 bits variant of decimal array.
### What changes are included in this PR?
* Add `ArrowFormat::Decimal128Type`
* Add `ArrowFormat::Decimal128Array`
### Are these changes tested?
Yes.
### Are there any user-facing changes?
Yes.
* GitHub Issue: #48642
Authored-by: Sutou Kouhei <[email protected]>
Signed-off-by: Sutou Kouhei <[email protected]>
---
ruby/red-arrow-format/lib/arrow-format/array.rb | 37 ++++
ruby/red-arrow-format/lib/arrow-format/readable.rb | 6 +
ruby/red-arrow-format/lib/arrow-format/type.rb | 206 ++++++++++++++-------
ruby/red-arrow-format/test/test-reader.rb | 22 +++
4 files changed, 208 insertions(+), 63 deletions(-)
diff --git a/ruby/red-arrow-format/lib/arrow-format/array.rb
b/ruby/red-arrow-format/lib/arrow-format/array.rb
index ac96038f19..6474869c30 100644
--- a/ruby/red-arrow-format/lib/arrow-format/array.rb
+++ b/ruby/red-arrow-format/lib/arrow-format/array.rb
@@ -14,6 +14,8 @@
# specific language governing permissions and limitations
# under the License.
+require "bigdecimal"
+
require_relative "bitmap"
module ArrowFormat
@@ -306,6 +308,41 @@ module ArrowFormat
end
end
+ class DecimalArray < FixedSizeBinaryArray
+ end
+
+ class Decimal128Array < DecimalArray
+ def to_a
+ byte_width = @type.byte_width
+ buffer_types = [:u64, :s64] # TODO: big endian support
+ values = 0.step(@size * byte_width - 1, byte_width).collect do |offset|
+ @values_buffer.get_values(buffer_types, offset)
+ end
+ apply_validity(values).collect do |value|
+ if value.nil?
+ nil
+ else
+ low, high = value
+ BigDecimal(format_value(low, high))
+ end
+ end
+ end
+
+ private
+ def format_value(low, high)
+ width = @type.precision
+ width += 1 if high < 0
+ value = (high << 64) + low
+ string = value.to_s.ljust(width, "0")
+ if @type.scale < 0
+ string << ("0" * [email protected])
+ elsif @type.scale > 0
+ string[[email protected], 0] = "."
+ end
+ string
+ end
+ end
+
class VariableSizeListArray < Array
def initialize(type, size, validity_buffer, offsets_buffer, child)
super(type, size, validity_buffer)
diff --git a/ruby/red-arrow-format/lib/arrow-format/readable.rb
b/ruby/red-arrow-format/lib/arrow-format/readable.rb
index 2d64d5387f..5a247c822a 100644
--- a/ruby/red-arrow-format/lib/arrow-format/readable.rb
+++ b/ruby/red-arrow-format/lib/arrow-format/readable.rb
@@ -25,6 +25,7 @@ require_relative "org/apache/arrow/flatbuf/binary"
require_relative "org/apache/arrow/flatbuf/bool"
require_relative "org/apache/arrow/flatbuf/date"
require_relative "org/apache/arrow/flatbuf/date_unit"
+require_relative "org/apache/arrow/flatbuf/decimal"
require_relative "org/apache/arrow/flatbuf/duration"
require_relative "org/apache/arrow/flatbuf/fixed_size_binary"
require_relative "org/apache/arrow/flatbuf/floating_point"
@@ -166,6 +167,11 @@ module ArrowFormat
type = LargeUTF8Type.singleton
when Org::Apache::Arrow::Flatbuf::FixedSizeBinary
type = FixedSizeBinaryType.new(fb_type.byte_width)
+ when Org::Apache::Arrow::Flatbuf::Decimal
+ case fb_type.bit_width
+ when 128
+ type = Decimal128Type.new(fb_type.precision, fb_type.scale)
+ end
end
Field.new(fb_field.name, type, fb_field.nullable?)
end
diff --git a/ruby/red-arrow-format/lib/arrow-format/type.rb
b/ruby/red-arrow-format/lib/arrow-format/type.rb
index c667966012..d6d8b7bb81 100644
--- a/ruby/red-arrow-format/lib/arrow-format/type.rb
+++ b/ruby/red-arrow-format/lib/arrow-format/type.rb
@@ -16,10 +16,6 @@
module ArrowFormat
class Type
- attr_reader :name
- def initialize(name)
- @name = name
- end
end
class NullType < Type
@@ -29,8 +25,8 @@ module ArrowFormat
end
end
- def initialize
- super("Null")
+ def name
+ "Null"
end
def build_array(size)
@@ -45,8 +41,8 @@ module ArrowFormat
end
end
- def initialize
- super("Boolean")
+ def name
+ "Boolean"
end
def build_array(size, validity_buffer, values_buffer)
@@ -60,8 +56,8 @@ module ArrowFormat
class IntType < NumberType
attr_reader :bit_width
attr_reader :signed
- def initialize(name, bit_width, signed)
- super(name)
+ def initialize(bit_width, signed)
+ super()
@bit_width = bit_width
@signed = signed
end
@@ -75,7 +71,11 @@ module ArrowFormat
end
def initialize
- super("Int8", 8, true)
+ super(8, true)
+ end
+
+ def name
+ "Int8"
end
def build_array(size, validity_buffer, values_buffer)
@@ -91,7 +91,11 @@ module ArrowFormat
end
def initialize
- super("UInt8", 8, false)
+ super(8, false)
+ end
+
+ def name
+ "UInt8"
end
def build_array(size, validity_buffer, values_buffer)
@@ -107,7 +111,11 @@ module ArrowFormat
end
def initialize
- super("Int16", 16, true)
+ super(16, true)
+ end
+
+ def name
+ "Int16"
end
def build_array(size, validity_buffer, values_buffer)
@@ -123,7 +131,11 @@ module ArrowFormat
end
def initialize
- super("UInt16", 16, false)
+ super(16, false)
+ end
+
+ def name
+ "UInt16"
end
def build_array(size, validity_buffer, values_buffer)
@@ -139,7 +151,11 @@ module ArrowFormat
end
def initialize
- super("Int32", 32, true)
+ super(32, true)
+ end
+
+ def name
+ "Int32"
end
def build_array(size, validity_buffer, values_buffer)
@@ -155,7 +171,11 @@ module ArrowFormat
end
def initialize
- super("UInt32", 32, false)
+ super(32, false)
+ end
+
+ def name
+ "UInt32"
end
def build_array(size, validity_buffer, values_buffer)
@@ -171,7 +191,11 @@ module ArrowFormat
end
def initialize
- super("Int64", 64, true)
+ super(64, true)
+ end
+
+ def name
+ "Int64"
end
def build_array(size, validity_buffer, values_buffer)
@@ -187,7 +211,11 @@ module ArrowFormat
end
def initialize
- super("UInt64", 64, false)
+ super(64, false)
+ end
+
+ def name
+ "UInt64"
end
def build_array(size, validity_buffer, values_buffer)
@@ -197,8 +225,8 @@ module ArrowFormat
class FloatingPointType < NumberType
attr_reader :precision
- def initialize(name, precision)
- super(name)
+ def initialize(precision)
+ super()
@precision = precision
end
end
@@ -211,7 +239,11 @@ module ArrowFormat
end
def initialize
- super("Float32", :single)
+ super(:single)
+ end
+
+ def name
+ "Float32"
end
def build_array(size, validity_buffer, values_buffer)
@@ -227,7 +259,11 @@ module ArrowFormat
end
def initialize
- super("Float64", :double)
+ super(:double)
+ end
+
+ def name
+ "Float64"
end
def build_array(size, validity_buffer, values_buffer)
@@ -248,8 +284,8 @@ module ArrowFormat
end
end
- def initialize
- super("Date32")
+ def name
+ "Date32"
end
def build_array(size, validity_buffer, values_buffer)
@@ -264,8 +300,8 @@ module ArrowFormat
end
end
- def initialize
- super("Date64")
+ def name
+ "Date64"
end
def build_array(size, validity_buffer, values_buffer)
@@ -275,15 +311,15 @@ module ArrowFormat
class TimeType < TemporalType
attr_reader :unit
- def initialize(name, unit)
- super(name)
+ def initialize(unit)
+ super()
@unit = unit
end
end
class Time32Type < TimeType
- def initialize(unit)
- super("Time32", unit)
+ def name
+ "Time32"
end
def build_array(size, validity_buffer, values_buffer)
@@ -292,8 +328,8 @@ module ArrowFormat
end
class Time64Type < TimeType
- def initialize(unit)
- super("Time64", unit)
+ def name
+ "Time64"
end
def build_array(size, validity_buffer, values_buffer)
@@ -305,11 +341,15 @@ module ArrowFormat
attr_reader :unit
attr_reader :timezone
def initialize(unit, timezone)
- super("Timestamp")
+ super()
@unit = unit
@timezone = timezone
end
+ def name
+ "Timestamp"
+ end
+
def build_array(size, validity_buffer, values_buffer)
TimestampArray.new(self, size, validity_buffer, values_buffer)
end
@@ -319,8 +359,8 @@ module ArrowFormat
end
class YearMonthIntervalType < IntervalType
- def initialize
- super("YearMonthInterval")
+ def name
+ "YearMonthInterval"
end
def build_array(size, validity_buffer, values_buffer)
@@ -329,8 +369,8 @@ module ArrowFormat
end
class DayTimeIntervalType < IntervalType
- def initialize
- super("DayTimeInterval")
+ def name
+ "DayTimeInterval"
end
def build_array(size, validity_buffer, values_buffer)
@@ -339,8 +379,8 @@ module ArrowFormat
end
class MonthDayNanoIntervalType < IntervalType
- def initialize
- super("MonthDayNanoInterval")
+ def name
+ "MonthDayNanoInterval"
end
def build_array(size, validity_buffer, values_buffer)
@@ -354,10 +394,14 @@ module ArrowFormat
class DurationType < TemporalType
attr_reader :unit
def initialize(unit)
- super("Duration")
+ super()
@unit = unit
end
+ def name
+ "Duration"
+ end
+
def build_array(size, validity_buffer, values_buffer)
DurationArray.new(self, size, validity_buffer, values_buffer)
end
@@ -373,8 +417,8 @@ module ArrowFormat
end
end
- def initialize
- super("Binary")
+ def name
+ "Binary"
end
def build_array(size, validity_buffer, offsets_buffer, values_buffer)
@@ -389,8 +433,8 @@ module ArrowFormat
end
end
- def initialize
- super("LargeBinary")
+ def name
+ "LargeBinary"
end
def build_array(size, validity_buffer, offsets_buffer, values_buffer)
@@ -409,8 +453,8 @@ module ArrowFormat
end
end
- def initialize
- super("UTF8")
+ def name
+ "UTF8"
end
def build_array(size, validity_buffer, offsets_buffer, values_buffer)
@@ -425,8 +469,8 @@ module ArrowFormat
end
end
- def initialize
- super("LargeUTF8")
+ def name
+ "LargeUTF8"
end
def build_array(size, validity_buffer, offsets_buffer, values_buffer)
@@ -441,27 +485,55 @@ module ArrowFormat
class FixedSizeBinaryType < Type
attr_reader :byte_width
def initialize(byte_width)
- super("FixedSizeBinary")
+ super()
@byte_width = byte_width
end
+ def name
+ "FixedSizeBinary"
+ end
+
def build_array(size, validity_buffer, values_buffer)
FixedSizeBinaryArray.new(self, size, validity_buffer, values_buffer)
end
end
+ class DecimalType < FixedSizeBinaryType
+ attr_reader :precision
+ attr_reader :scale
+ def initialize(byte_width, precision, scale)
+ super(byte_width)
+ @precision = precision
+ @scale = scale
+ end
+ end
+
+ class Decimal128Type < DecimalType
+ def initialize(precision, scale)
+ super(16, precision, scale)
+ end
+
+ def name
+ "Decimal128"
+ end
+
+ def build_array(size, validity_buffer, values_buffer)
+ Decimal128Array.new(self, size, validity_buffer, values_buffer)
+ end
+ end
+
class VariableSizeListType < Type
attr_reader :child
- def initialize(name, child)
- super(name)
+ def initialize(child)
+ super()
@child = child
end
end
class ListType < VariableSizeListType
- def initialize(child)
- super("List", child)
+ def name
+ "List"
end
def build_array(size, validity_buffer, offsets_buffer, child)
@@ -470,8 +542,8 @@ module ArrowFormat
end
class LargeListType < VariableSizeListType
- def initialize(child)
- super("LargeList", child)
+ def name
+ "LargeList"
end
def build_array(size, validity_buffer, offsets_buffer, child)
@@ -482,10 +554,14 @@ module ArrowFormat
class StructType < Type
attr_reader :children
def initialize(children)
- super("Struct")
+ super()
@children = children
end
+ def name
+ "Struct"
+ end
+
def build_array(size, validity_buffer, children)
StructArray.new(self, size, validity_buffer, children)
end
@@ -509,7 +585,11 @@ module ArrowFormat
raise TypeError.new("Map key field must not be nullable: " +
type.children[0].inspect)
end
- super("Map", child)
+ super(child)
+ end
+
+ def name
+ "Map"
end
def build_array(size, validity_buffer, offsets_buffer, child)
@@ -520,8 +600,8 @@ module ArrowFormat
class UnionType < Type
attr_reader :children
attr_reader :type_ids
- def initialize(name, children, type_ids)
- super(name)
+ def initialize(children, type_ids)
+ super()
@children = children
@type_ids = type_ids
@type_indexes = {}
@@ -533,8 +613,8 @@ module ArrowFormat
end
class DenseUnionType < UnionType
- def initialize(children, type_ids)
- super("DenseUnion", children, type_ids)
+ def name
+ "DenseUnion"
end
def build_array(size, types_buffer, offsets_buffer, children)
@@ -543,8 +623,8 @@ module ArrowFormat
end
class SparseUnionType < UnionType
- def initialize(children, type_ids)
- super("SparseUnion", children, type_ids)
+ def name
+ "SparseUnion"
end
def build_array(size, types_buffer, children)
diff --git a/ruby/red-arrow-format/test/test-reader.rb
b/ruby/red-arrow-format/test/test-reader.rb
index 8095adfd50..cddcea484f 100644
--- a/ruby/red-arrow-format/test/test-reader.rb
+++ b/ruby/red-arrow-format/test/test-reader.rb
@@ -674,6 +674,28 @@ module ReaderTests
end
end
+ sub_test_case("Decimal128") do
+ def build_array
+ @positive = "12345678901234567890123456789012345.678"
+ @negative = "-12345678901234567890123456789012345.67"
+ Arrow::Decimal128Array.new({precision: 38, scale: 3},
+ [@positive, nil, @negative])
+ end
+
+ def test_read
+ assert_equal([
+ {
+ "value" => [
+ BigDecimal(@positive),
+ nil,
+ BigDecimal(@negative),
+ ],
+ },
+ ],
+ read)
+ end
+ end
+
sub_test_case("List") do
def build_array
data_type = Arrow::ListDataType.new(name: "count", type: :int8)