This is an automated email from the ASF dual-hosted git repository.
kou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new 46c6eecbd0 GH-45891: [Ruby] Unify test for dictionary array in
raw_records and each_raw_record (#45927)
46c6eecbd0 is described below
commit 46c6eecbd0bebfe0372e291a6dc7da95d7a5d246
Author: takuya kodama <[email protected]>
AuthorDate: Wed Mar 26 08:39:03 2025 +0800
GH-45891: [Ruby] Unify test for dictionary array in raw_records and
each_raw_record (#45927)
### Rationale for this change
The PR reduces duplicated test cases and ensures that both `raw_records`
and `each_raw_record` behave consistently by extracting their common test cases.
- `Arrow::RecordBatch#each_raw_record`
- `Arrow::Table#each_raw_record`
- `Arrow::RecordBatch#raw_records`
- `Arrow::Table#raw_records`
### What changes are included in this PR?
We extracted shared test cases about dictionary array used by both
raw_records and each_raw_record.
### Are these changes tested?
Yes.
### Are there any user-facing changes?
No.
* GitHub Issue: #45891
Authored-by: otegami <[email protected]>
Signed-off-by: Sutou Kouhei <[email protected]>
---
.../test/each-raw-record/test-dictionary-array.rb | 341 ---------------------
.../test/raw-records/test-dictionary-array.rb | 90 ++++--
2 files changed, 61 insertions(+), 370 deletions(-)
diff --git a/ruby/red-arrow/test/each-raw-record/test-dictionary-array.rb
b/ruby/red-arrow/test/each-raw-record/test-dictionary-array.rb
deleted file mode 100644
index f85294b30a..0000000000
--- a/ruby/red-arrow/test/each-raw-record/test-dictionary-array.rb
+++ /dev/null
@@ -1,341 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-module EachRawRecordDictionaryArrayTests
- def build_record_batch(array)
- dictionary = array.dictionary_encode
- schema = Arrow::Schema.new(column: dictionary.value_data_type)
- Arrow::RecordBatch.new(schema, array.length, [dictionary])
- end
-
- def test_null
- records = [
- [nil],
- [nil],
- [nil],
- [nil],
- ]
- target = build(Arrow::NullArray.new(records.collect(&:first)))
- assert_equal(records, target.each_raw_record.to_a)
- end
-
- def test_boolean
- records = [
- [true],
- [nil],
- [false],
- ]
- target = build(Arrow::BooleanArray.new(records.collect(&:first)))
- assert_equal(records, target.each_raw_record.to_a)
- end
-
- def test_int8
- records = [
- [-(2 ** 7)],
- [nil],
- [(2 ** 7) - 1],
- ]
- target = build(Arrow::Int8Array.new(records.collect(&:first)))
- assert_equal(records, target.each_raw_record.to_a)
- end
-
- def test_uint8
- records = [
- [0],
- [nil],
- [(2 ** 8) - 1],
- ]
- target = build(Arrow::UInt8Array.new(records.collect(&:first)))
- assert_equal(records, target.each_raw_record.to_a)
- end
-
- def test_int16
- records = [
- [-(2 ** 15)],
- [nil],
- [(2 ** 15) - 1],
- ]
- target = build(Arrow::Int16Array.new(records.collect(&:first)))
- assert_equal(records, target.each_raw_record.to_a)
- end
-
- def test_uint16
- records = [
- [0],
- [nil],
- [(2 ** 16) - 1],
- ]
- target = build(Arrow::UInt16Array.new(records.collect(&:first)))
- assert_equal(records, target.each_raw_record.to_a)
- end
-
- def test_int32
- records = [
- [-(2 ** 31)],
- [nil],
- [(2 ** 31) - 1],
- ]
- target = build(Arrow::Int32Array.new(records.collect(&:first)))
- assert_equal(records, target.each_raw_record.to_a)
- end
-
- def test_uint32
- records = [
- [0],
- [nil],
- [(2 ** 32) - 1],
- ]
- target = build(Arrow::UInt32Array.new(records.collect(&:first)))
- assert_equal(records, target.each_raw_record.to_a)
- end
-
- def test_int64
- records = [
- [-(2 ** 63)],
- [nil],
- [(2 ** 63) - 1],
- ]
- target = build(Arrow::Int64Array.new(records.collect(&:first)))
- assert_equal(records, target.each_raw_record.to_a)
- end
-
- def test_uint64
- records = [
- [0],
- [nil],
- [(2 ** 64) - 1],
- ]
- target = build(Arrow::UInt64Array.new(records.collect(&:first)))
- assert_equal(records, target.each_raw_record.to_a)
- end
-
- def test_float
- records = [
- [-1.0],
- [nil],
- [1.0],
- ]
- target = build(Arrow::FloatArray.new(records.collect(&:first)))
- assert_equal(records, target.each_raw_record.to_a)
- end
-
- def test_double
- records = [
- [-1.0],
- [nil],
- [1.0],
- ]
- target = build(Arrow::DoubleArray.new(records.collect(&:first)))
- assert_equal(records, target.each_raw_record.to_a)
- end
-
- def test_binary
- records = [
- ["\x00".b],
- [nil],
- ["\xff".b],
- ]
- target = build(Arrow::BinaryArray.new(records.collect(&:first)))
- assert_equal(records, target.each_raw_record.to_a)
- end
-
- def test_string
- records = [
- ["Ruby"],
- [nil],
- ["\u3042"], # U+3042 HIRAGANA LETTER A
- ]
- target = build(Arrow::StringArray.new(records.collect(&:first)))
- assert_equal(records, target.each_raw_record.to_a)
- end
-
- def test_date32
- records = [
- [Date.new(1960, 1, 1)],
- [nil],
- [Date.new(2017, 8, 23)],
- ]
- target = build(Arrow::Date32Array.new(records.collect(&:first)))
- assert_equal(records, target.each_raw_record.to_a)
- end
-
- def test_date64
- records = [
- [DateTime.new(1960, 1, 1, 2, 9, 30)],
- [nil],
- [DateTime.new(2017, 8, 23, 14, 57, 2)],
- ]
- target = build(Arrow::Date64Array.new(records.collect(&:first)))
- assert_equal(records, target.each_raw_record.to_a)
- end
-
- def test_timestamp_second
- records = [
- [Time.parse("1960-01-01T02:09:30Z")],
- [nil],
- [Time.parse("2017-08-23T14:57:02Z")],
- ]
- target = build(Arrow::TimestampArray.new(:second,
records.collect(&:first)))
- assert_equal(records, target.each_raw_record.to_a)
- end
-
- def test_timestamp_milli
- records = [
- [Time.parse("1960-01-01T02:09:30.123Z")],
- [nil],
- [Time.parse("2017-08-23T14:57:02.987Z")],
- ]
- target = build(Arrow::TimestampArray.new(:milli, records.collect(&:first)))
- assert_equal(records, target.each_raw_record.to_a)
- end
-
- def test_timestamp_micro
- records = [
- [Time.parse("1960-01-01T02:09:30.123456Z")],
- [nil],
- [Time.parse("2017-08-23T14:57:02.987654Z")],
- ]
- target = build(Arrow::TimestampArray.new(:micro, records.collect(&:first)))
- assert_equal(records, target.each_raw_record.to_a)
- end
-
- def test_timestamp_nano
- records = [
- [Time.parse("1960-01-01T02:09:30.123456789Z")],
- [nil],
- [Time.parse("2017-08-23T14:57:02.987654321Z")],
- ]
- target = build(Arrow::TimestampArray.new(:nano, records.collect(&:first)))
- assert_equal(records, target.each_raw_record.to_a)
- end
-
- def test_time32_second
- unit = Arrow::TimeUnit::SECOND
- records = [
- [Arrow::Time.new(unit, 60 * 10)], # 00:10:00
- [nil],
- [Arrow::Time.new(unit, 60 * 60 * 2 + 9)], # 02:00:09
- ]
- target = build(Arrow::Time32Array.new(unit, records.collect(&:first)))
- assert_equal(records, target.each_raw_record.to_a)
- end
-
- def test_time32_milli
- unit = Arrow::TimeUnit::MILLI
- records = [
- [Arrow::Time.new(unit, (60 * 10) * 1000 + 123)], # 00:10:00.123
- [nil],
- [Arrow::Time.new(unit, (60 * 60 * 2 + 9) * 1000 + 987)], # 02:00:09.987
- ]
- target = build(Arrow::Time32Array.new(unit, records.collect(&:first)))
- assert_equal(records, target.each_raw_record.to_a)
- end
-
- def test_time64_micro
- unit = Arrow::TimeUnit::MICRO
- records = [
- # 00:10:00.123456
- [Arrow::Time.new(unit, (60 * 10) * 1_000_000 + 123_456)],
- [nil],
- # 02:00:09.987654
- [Arrow::Time.new(unit, (60 * 60 * 2 + 9) * 1_000_000 + 987_654)],
- ]
- target = build(Arrow::Time64Array.new(unit, records.collect(&:first)))
- assert_equal(records, target.each_raw_record.to_a)
- end
-
- def test_time64_nano
- unit = Arrow::TimeUnit::NANO
- records = [
- # 00:10:00.123456789
- [Arrow::Time.new(unit, (60 * 10) * 1_000_000_000 + 123_456_789)],
- [nil],
- # 02:00:09.987654321
- [Arrow::Time.new(unit, (60 * 60 * 2 + 9) * 1_000_000_000 + 987_654_321)],
- ]
- target = build(Arrow::Time64Array.new(unit, records.collect(&:first)))
- assert_equal(records, target.each_raw_record.to_a)
- end
-
- def test_decimal128
- records = [
- [BigDecimal("92.92")],
- [nil],
- [BigDecimal("29.29")],
- ]
- data_type = Arrow::Decimal128DataType.new(8, 2)
- target = build(Arrow::Decimal128Array.new(data_type,
records.collect(&:first)))
- assert_equal(records, target.each_raw_record.to_a)
- end
-
- def test_decimal256
- records = [
- [BigDecimal("92.92")],
- [nil],
- [BigDecimal("29.29")],
- ]
- data_type = Arrow::Decimal256DataType.new(38, 2)
- target = build(Arrow::Decimal256Array.new(data_type,
records.collect(&:first)))
- assert_equal(records, target.each_raw_record.to_a)
- end
-
- def test_month_interval
- records = [
- [1],
- [nil],
- [12],
- ]
- target = build(Arrow::MonthIntervalArray.new(records.collect(&:first)))
- assert_equal(records, target.each_raw_record.to_a)
- end
-
- def test_day_time_interval
- records = [
- [{day: 1, millisecond: 100}],
- [nil],
- [{day: 2, millisecond: 300}],
- ]
- target = build(Arrow::DayTimeIntervalArray.new(records.collect(&:first)))
- assert_equal(records, target.each_raw_record.to_a)
- end
-
- def test_month_day_nano_interval
- records = [
- [{month: 1, day: 1, nanosecond: 100}],
- [nil],
- [{month: 2, day: 3, nanosecond: 400}],
- ]
- target =
build(Arrow::MonthDayNanoIntervalArray.new(records.collect(&:first)))
- assert_equal(records, target.each_raw_record.to_a)
- end
-end
-
-class EachRawRecordRecordBatchDictionaryArraysTest < Test::Unit::TestCase
- include EachRawRecordDictionaryArrayTests
-
- def build(array)
- build_record_batch(array)
- end
-end
-
-class EachRawRecordTableDictionaryArraysTest < Test::Unit::TestCase
- include EachRawRecordDictionaryArrayTests
-
- def build(array)
- build_record_batch(array).to_table
- end
-end
diff --git a/ruby/red-arrow/test/raw-records/test-dictionary-array.rb
b/ruby/red-arrow/test/raw-records/test-dictionary-array.rb
index 200185bde4..abe946a8eb 100644
--- a/ruby/red-arrow/test/raw-records/test-dictionary-array.rb
+++ b/ruby/red-arrow/test/raw-records/test-dictionary-array.rb
@@ -30,7 +30,7 @@ module RawRecordsDictionaryArrayTests
[nil],
]
target = build(Arrow::NullArray.new(records.collect(&:first)))
- assert_equal(records, target.raw_records)
+ assert_equal(records, actual_records(target))
end
def test_boolean
@@ -40,7 +40,7 @@ module RawRecordsDictionaryArrayTests
[false],
]
target = build(Arrow::BooleanArray.new(records.collect(&:first)))
- assert_equal(records, target.raw_records)
+ assert_equal(records, actual_records(target))
end
def test_int8
@@ -50,7 +50,7 @@ module RawRecordsDictionaryArrayTests
[(2 ** 7) - 1],
]
target = build(Arrow::Int8Array.new(records.collect(&:first)))
- assert_equal(records, target.raw_records)
+ assert_equal(records, actual_records(target))
end
def test_uint8
@@ -60,7 +60,7 @@ module RawRecordsDictionaryArrayTests
[(2 ** 8) - 1],
]
target = build(Arrow::UInt8Array.new(records.collect(&:first)))
- assert_equal(records, target.raw_records)
+ assert_equal(records, actual_records(target))
end
def test_int16
@@ -70,7 +70,7 @@ module RawRecordsDictionaryArrayTests
[(2 ** 15) - 1],
]
target = build(Arrow::Int16Array.new(records.collect(&:first)))
- assert_equal(records, target.raw_records)
+ assert_equal(records, actual_records(target))
end
def test_uint16
@@ -80,7 +80,7 @@ module RawRecordsDictionaryArrayTests
[(2 ** 16) - 1],
]
target = build(Arrow::UInt16Array.new(records.collect(&:first)))
- assert_equal(records, target.raw_records)
+ assert_equal(records, actual_records(target))
end
def test_int32
@@ -90,7 +90,7 @@ module RawRecordsDictionaryArrayTests
[(2 ** 31) - 1],
]
target = build(Arrow::Int32Array.new(records.collect(&:first)))
- assert_equal(records, target.raw_records)
+ assert_equal(records, actual_records(target))
end
def test_uint32
@@ -100,7 +100,7 @@ module RawRecordsDictionaryArrayTests
[(2 ** 32) - 1],
]
target = build(Arrow::UInt32Array.new(records.collect(&:first)))
- assert_equal(records, target.raw_records)
+ assert_equal(records, actual_records(target))
end
def test_int64
@@ -110,7 +110,7 @@ module RawRecordsDictionaryArrayTests
[(2 ** 63) - 1],
]
target = build(Arrow::Int64Array.new(records.collect(&:first)))
- assert_equal(records, target.raw_records)
+ assert_equal(records, actual_records(target))
end
def test_uint64
@@ -120,7 +120,7 @@ module RawRecordsDictionaryArrayTests
[(2 ** 64) - 1],
]
target = build(Arrow::UInt64Array.new(records.collect(&:first)))
- assert_equal(records, target.raw_records)
+ assert_equal(records, actual_records(target))
end
def test_float
@@ -130,7 +130,7 @@ module RawRecordsDictionaryArrayTests
[1.0],
]
target = build(Arrow::FloatArray.new(records.collect(&:first)))
- assert_equal(records, target.raw_records)
+ assert_equal(records, actual_records(target))
end
def test_double
@@ -140,7 +140,7 @@ module RawRecordsDictionaryArrayTests
[1.0],
]
target = build(Arrow::DoubleArray.new(records.collect(&:first)))
- assert_equal(records, target.raw_records)
+ assert_equal(records, actual_records(target))
end
def test_binary
@@ -150,7 +150,7 @@ module RawRecordsDictionaryArrayTests
["\xff".b],
]
target = build(Arrow::BinaryArray.new(records.collect(&:first)))
- assert_equal(records, target.raw_records)
+ assert_equal(records, actual_records(target))
end
def test_string
@@ -160,7 +160,7 @@ module RawRecordsDictionaryArrayTests
["\u3042"], # U+3042 HIRAGANA LETTER A
]
target = build(Arrow::StringArray.new(records.collect(&:first)))
- assert_equal(records, target.raw_records)
+ assert_equal(records, actual_records(target))
end
def test_date32
@@ -170,7 +170,7 @@ module RawRecordsDictionaryArrayTests
[Date.new(2017, 8, 23)],
]
target = build(Arrow::Date32Array.new(records.collect(&:first)))
- assert_equal(records, target.raw_records)
+ assert_equal(records, actual_records(target))
end
def test_date64
@@ -180,7 +180,7 @@ module RawRecordsDictionaryArrayTests
[DateTime.new(2017, 8, 23, 14, 57, 2)],
]
target = build(Arrow::Date64Array.new(records.collect(&:first)))
- assert_equal(records, target.raw_records)
+ assert_equal(records, actual_records(target))
end
def test_timestamp_second
@@ -190,7 +190,7 @@ module RawRecordsDictionaryArrayTests
[Time.parse("2017-08-23T14:57:02Z")],
]
target = build(Arrow::TimestampArray.new(:second,
records.collect(&:first)))
- assert_equal(records, target.raw_records)
+ assert_equal(records, actual_records(target))
end
def test_timestamp_milli
@@ -200,7 +200,7 @@ module RawRecordsDictionaryArrayTests
[Time.parse("2017-08-23T14:57:02.987Z")],
]
target = build(Arrow::TimestampArray.new(:milli, records.collect(&:first)))
- assert_equal(records, target.raw_records)
+ assert_equal(records, actual_records(target))
end
def test_timestamp_micro
@@ -210,7 +210,7 @@ module RawRecordsDictionaryArrayTests
[Time.parse("2017-08-23T14:57:02.987654Z")],
]
target = build(Arrow::TimestampArray.new(:micro, records.collect(&:first)))
- assert_equal(records, target.raw_records)
+ assert_equal(records, actual_records(target))
end
def test_timestamp_nano
@@ -220,7 +220,7 @@ module RawRecordsDictionaryArrayTests
[Time.parse("2017-08-23T14:57:02.987654321Z")],
]
target = build(Arrow::TimestampArray.new(:nano, records.collect(&:first)))
- assert_equal(records, target.raw_records)
+ assert_equal(records, actual_records(target))
end
def test_time32_second
@@ -231,7 +231,7 @@ module RawRecordsDictionaryArrayTests
[Arrow::Time.new(unit, 60 * 60 * 2 + 9)], # 02:00:09
]
target = build(Arrow::Time32Array.new(unit, records.collect(&:first)))
- assert_equal(records, target.raw_records)
+ assert_equal(records, actual_records(target))
end
def test_time32_milli
@@ -242,7 +242,7 @@ module RawRecordsDictionaryArrayTests
[Arrow::Time.new(unit, (60 * 60 * 2 + 9) * 1000 + 987)], # 02:00:09.987
]
target = build(Arrow::Time32Array.new(unit, records.collect(&:first)))
- assert_equal(records, target.raw_records)
+ assert_equal(records, actual_records(target))
end
def test_time64_micro
@@ -255,7 +255,7 @@ module RawRecordsDictionaryArrayTests
[Arrow::Time.new(unit, (60 * 60 * 2 + 9) * 1_000_000 + 987_654)],
]
target = build(Arrow::Time64Array.new(unit, records.collect(&:first)))
- assert_equal(records, target.raw_records)
+ assert_equal(records, actual_records(target))
end
def test_time64_nano
@@ -268,7 +268,7 @@ module RawRecordsDictionaryArrayTests
[Arrow::Time.new(unit, (60 * 60 * 2 + 9) * 1_000_000_000 + 987_654_321)],
]
target = build(Arrow::Time64Array.new(unit, records.collect(&:first)))
- assert_equal(records, target.raw_records)
+ assert_equal(records, actual_records(target))
end
def test_decimal128
@@ -279,7 +279,7 @@ module RawRecordsDictionaryArrayTests
]
data_type = Arrow::Decimal128DataType.new(8, 2)
target = build(Arrow::Decimal128Array.new(data_type,
records.collect(&:first)))
- assert_equal(records, target.raw_records)
+ assert_equal(records, actual_records(target))
end
def test_decimal256
@@ -290,7 +290,7 @@ module RawRecordsDictionaryArrayTests
]
data_type = Arrow::Decimal256DataType.new(38, 2)
target = build(Arrow::Decimal256Array.new(data_type,
records.collect(&:first)))
- assert_equal(records, target.raw_records)
+ assert_equal(records, actual_records(target))
end
def test_month_interval
@@ -300,7 +300,7 @@ module RawRecordsDictionaryArrayTests
[12],
]
target = build(Arrow::MonthIntervalArray.new(records.collect(&:first)))
- assert_equal(records, target.raw_records)
+ assert_equal(records, actual_records(target))
end
def test_day_time_interval
@@ -310,7 +310,7 @@ module RawRecordsDictionaryArrayTests
[{day: 2, millisecond: 300}],
]
target = build(Arrow::DayTimeIntervalArray.new(records.collect(&:first)))
- assert_equal(records, target.raw_records)
+ assert_equal(records, actual_records(target))
end
def test_month_day_nano_interval
@@ -320,7 +320,31 @@ module RawRecordsDictionaryArrayTests
[{month: 2, day: 3, nanosecond: 400}],
]
target =
build(Arrow::MonthDayNanoIntervalArray.new(records.collect(&:first)))
- assert_equal(records, target.raw_records)
+ assert_equal(records, actual_records(target))
+ end
+end
+
+class EachRawRecordRecordBatchDictionaryArraysTest < Test::Unit::TestCase
+ include RawRecordsDictionaryArrayTests
+
+ def build(array)
+ build_record_batch(array)
+ end
+
+ def actual_records(target)
+ target.each_raw_record.to_a
+ end
+end
+
+class EachRawRecordTableDictionaryArraysTest < Test::Unit::TestCase
+ include RawRecordsDictionaryArrayTests
+
+ def build(array)
+ build_record_batch(array).to_table
+ end
+
+ def actual_records(target)
+ target.each_raw_record.to_a
end
end
@@ -330,6 +354,10 @@ class RawRecordsRecordBatchDictionaryArraysTest <
Test::Unit::TestCase
def build(array)
build_record_batch(array)
end
+
+ def actual_records(target)
+ target.raw_records
+ end
end
class RawRecordsTableDictionaryArraysTest < Test::Unit::TestCase
@@ -338,4 +366,8 @@ class RawRecordsTableDictionaryArraysTest <
Test::Unit::TestCase
def build(array)
build_record_batch(array).to_table
end
+
+ def actual_records(target)
+ target.raw_records
+ end
end