This is an automated email from the ASF dual-hosted git repository.

kou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new 46c6eecbd0 GH-45891: [Ruby] Unify test for dictionary array in 
raw_records and each_raw_record (#45927)
46c6eecbd0 is described below

commit 46c6eecbd0bebfe0372e291a6dc7da95d7a5d246
Author: takuya kodama <[email protected]>
AuthorDate: Wed Mar 26 08:39:03 2025 +0800

    GH-45891: [Ruby] Unify test for dictionary array in raw_records and 
each_raw_record (#45927)
    
    ### Rationale for this change
    
    The PR reduces duplicated test cases and ensures that both `raw_records` 
and `each_raw_record` behave consistently by extracting their common test cases.
    - `Arrow::RecordBatch#each_raw_record`
    - `Arrow::Table#each_raw_record`
    - `Arrow::RecordBatch#raw_records`
    - `Arrow::Table#raw_records`
    
    ### What changes are included in this PR?
    
    We extracted shared test cases about dictionary array used by both 
raw_records and each_raw_record.
    
    ### Are these changes tested?
    
    Yes.
    
    ### Are there any user-facing changes?
    
    No.
    
    * GitHub Issue: #45891
    
    Authored-by: otegami <[email protected]>
    Signed-off-by: Sutou Kouhei <[email protected]>
---
 .../test/each-raw-record/test-dictionary-array.rb  | 341 ---------------------
 .../test/raw-records/test-dictionary-array.rb      |  90 ++++--
 2 files changed, 61 insertions(+), 370 deletions(-)

diff --git a/ruby/red-arrow/test/each-raw-record/test-dictionary-array.rb 
b/ruby/red-arrow/test/each-raw-record/test-dictionary-array.rb
deleted file mode 100644
index f85294b30a..0000000000
--- a/ruby/red-arrow/test/each-raw-record/test-dictionary-array.rb
+++ /dev/null
@@ -1,341 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-module EachRawRecordDictionaryArrayTests
-  def build_record_batch(array)
-    dictionary = array.dictionary_encode
-    schema = Arrow::Schema.new(column: dictionary.value_data_type)
-    Arrow::RecordBatch.new(schema, array.length, [dictionary])
-  end
-
-  def test_null
-    records = [
-      [nil],
-      [nil],
-      [nil],
-      [nil],
-    ]
-    target = build(Arrow::NullArray.new(records.collect(&:first)))
-    assert_equal(records, target.each_raw_record.to_a)
-  end
-
-  def test_boolean
-    records = [
-      [true],
-      [nil],
-      [false],
-    ]
-    target = build(Arrow::BooleanArray.new(records.collect(&:first)))
-    assert_equal(records, target.each_raw_record.to_a)
-  end
-
-  def test_int8
-    records = [
-      [-(2 ** 7)],
-      [nil],
-      [(2 ** 7) - 1],
-    ]
-    target = build(Arrow::Int8Array.new(records.collect(&:first)))
-    assert_equal(records, target.each_raw_record.to_a)
-  end
-
-  def test_uint8
-    records = [
-      [0],
-      [nil],
-      [(2 ** 8) - 1],
-    ]
-    target = build(Arrow::UInt8Array.new(records.collect(&:first)))
-    assert_equal(records, target.each_raw_record.to_a)
-  end
-
-  def test_int16
-    records = [
-      [-(2 ** 15)],
-      [nil],
-      [(2 ** 15) - 1],
-    ]
-    target = build(Arrow::Int16Array.new(records.collect(&:first)))
-    assert_equal(records, target.each_raw_record.to_a)
-  end
-
-  def test_uint16
-    records = [
-      [0],
-      [nil],
-      [(2 ** 16) - 1],
-    ]
-    target = build(Arrow::UInt16Array.new(records.collect(&:first)))
-    assert_equal(records, target.each_raw_record.to_a)
-  end
-
-  def test_int32
-    records = [
-      [-(2 ** 31)],
-      [nil],
-      [(2 ** 31) - 1],
-    ]
-    target = build(Arrow::Int32Array.new(records.collect(&:first)))
-    assert_equal(records, target.each_raw_record.to_a)
-  end
-
-  def test_uint32
-    records = [
-      [0],
-      [nil],
-      [(2 ** 32) - 1],
-    ]
-    target = build(Arrow::UInt32Array.new(records.collect(&:first)))
-    assert_equal(records, target.each_raw_record.to_a)
-  end
-
-  def test_int64
-    records = [
-      [-(2 ** 63)],
-      [nil],
-      [(2 ** 63) - 1],
-    ]
-    target = build(Arrow::Int64Array.new(records.collect(&:first)))
-    assert_equal(records, target.each_raw_record.to_a)
-  end
-
-  def test_uint64
-    records = [
-      [0],
-      [nil],
-      [(2 ** 64) - 1],
-    ]
-    target = build(Arrow::UInt64Array.new(records.collect(&:first)))
-    assert_equal(records, target.each_raw_record.to_a)
-  end
-
-  def test_float
-    records = [
-      [-1.0],
-      [nil],
-      [1.0],
-    ]
-    target = build(Arrow::FloatArray.new(records.collect(&:first)))
-    assert_equal(records, target.each_raw_record.to_a)
-  end
-
-  def test_double
-    records = [
-      [-1.0],
-      [nil],
-      [1.0],
-    ]
-    target = build(Arrow::DoubleArray.new(records.collect(&:first)))
-    assert_equal(records, target.each_raw_record.to_a)
-  end
-
-  def test_binary
-    records = [
-      ["\x00".b],
-      [nil],
-      ["\xff".b],
-    ]
-    target = build(Arrow::BinaryArray.new(records.collect(&:first)))
-    assert_equal(records, target.each_raw_record.to_a)
-  end
-
-  def test_string
-    records = [
-      ["Ruby"],
-      [nil],
-      ["\u3042"], # U+3042 HIRAGANA LETTER A
-    ]
-    target = build(Arrow::StringArray.new(records.collect(&:first)))    
-    assert_equal(records, target.each_raw_record.to_a)
-  end
-
-  def test_date32
-    records = [
-      [Date.new(1960, 1, 1)],
-      [nil],
-      [Date.new(2017, 8, 23)],
-    ]
-    target = build(Arrow::Date32Array.new(records.collect(&:first)))
-    assert_equal(records, target.each_raw_record.to_a)
-  end
-
-  def test_date64
-    records = [
-      [DateTime.new(1960, 1, 1, 2, 9, 30)],
-      [nil],
-      [DateTime.new(2017, 8, 23, 14, 57, 2)],
-    ]
-    target = build(Arrow::Date64Array.new(records.collect(&:first)))
-    assert_equal(records, target.each_raw_record.to_a)
-  end
-
-  def test_timestamp_second
-    records = [
-      [Time.parse("1960-01-01T02:09:30Z")],
-      [nil],
-      [Time.parse("2017-08-23T14:57:02Z")],
-    ]
-    target = build(Arrow::TimestampArray.new(:second, 
records.collect(&:first)))
-    assert_equal(records, target.each_raw_record.to_a)
-  end
-
-  def test_timestamp_milli
-    records = [
-      [Time.parse("1960-01-01T02:09:30.123Z")],
-      [nil],
-      [Time.parse("2017-08-23T14:57:02.987Z")],
-    ]
-    target = build(Arrow::TimestampArray.new(:milli, records.collect(&:first)))
-    assert_equal(records, target.each_raw_record.to_a)
-  end
-
-  def test_timestamp_micro
-    records = [
-      [Time.parse("1960-01-01T02:09:30.123456Z")],
-      [nil],
-      [Time.parse("2017-08-23T14:57:02.987654Z")],
-    ]
-    target = build(Arrow::TimestampArray.new(:micro, records.collect(&:first)))
-    assert_equal(records, target.each_raw_record.to_a)
-  end
-
-  def test_timestamp_nano
-    records = [
-      [Time.parse("1960-01-01T02:09:30.123456789Z")],
-      [nil],
-      [Time.parse("2017-08-23T14:57:02.987654321Z")],
-    ]
-    target = build(Arrow::TimestampArray.new(:nano, records.collect(&:first)))
-    assert_equal(records, target.each_raw_record.to_a)
-  end
-
-  def test_time32_second
-    unit = Arrow::TimeUnit::SECOND
-    records = [
-      [Arrow::Time.new(unit, 60 * 10)], # 00:10:00
-      [nil],
-      [Arrow::Time.new(unit, 60 * 60 * 2 + 9)], # 02:00:09
-    ]
-    target = build(Arrow::Time32Array.new(unit, records.collect(&:first)))
-    assert_equal(records, target.each_raw_record.to_a)
-  end
-
-  def test_time32_milli
-    unit = Arrow::TimeUnit::MILLI
-    records = [
-      [Arrow::Time.new(unit, (60 * 10) * 1000 + 123)], # 00:10:00.123
-      [nil],
-      [Arrow::Time.new(unit, (60 * 60 * 2 + 9) * 1000 + 987)], # 02:00:09.987
-    ]
-    target = build(Arrow::Time32Array.new(unit, records.collect(&:first)))
-    assert_equal(records, target.each_raw_record.to_a)
-  end
-
-  def test_time64_micro
-    unit = Arrow::TimeUnit::MICRO
-    records = [
-      # 00:10:00.123456
-      [Arrow::Time.new(unit, (60 * 10) * 1_000_000 + 123_456)],
-      [nil],
-      # 02:00:09.987654
-      [Arrow::Time.new(unit, (60 * 60 * 2 + 9) * 1_000_000 + 987_654)],
-    ]
-    target = build(Arrow::Time64Array.new(unit, records.collect(&:first)))
-    assert_equal(records, target.each_raw_record.to_a)
-  end
-
-  def test_time64_nano
-    unit = Arrow::TimeUnit::NANO
-    records = [
-      # 00:10:00.123456789
-      [Arrow::Time.new(unit, (60 * 10) * 1_000_000_000 + 123_456_789)],
-      [nil],
-      # 02:00:09.987654321
-      [Arrow::Time.new(unit, (60 * 60 * 2 + 9) * 1_000_000_000 + 987_654_321)],
-    ]
-    target = build(Arrow::Time64Array.new(unit, records.collect(&:first)))
-    assert_equal(records, target.each_raw_record.to_a)
-  end
-
-  def test_decimal128
-    records = [
-      [BigDecimal("92.92")],
-      [nil],
-      [BigDecimal("29.29")],
-    ]
-    data_type = Arrow::Decimal128DataType.new(8, 2)
-    target = build(Arrow::Decimal128Array.new(data_type, 
records.collect(&:first)))
-    assert_equal(records, target.each_raw_record.to_a)
-  end
-
-  def test_decimal256
-    records = [
-      [BigDecimal("92.92")],
-      [nil],
-      [BigDecimal("29.29")],
-    ]
-    data_type = Arrow::Decimal256DataType.new(38, 2)
-    target = build(Arrow::Decimal256Array.new(data_type, 
records.collect(&:first)))
-    assert_equal(records, target.each_raw_record.to_a)
-  end
-
-  def test_month_interval
-    records = [
-      [1],
-      [nil],
-      [12],
-    ]
-    target = build(Arrow::MonthIntervalArray.new(records.collect(&:first)))
-    assert_equal(records, target.each_raw_record.to_a)
-  end
-
-  def test_day_time_interval
-    records = [
-      [{day: 1, millisecond: 100}],
-      [nil],
-      [{day: 2, millisecond: 300}],
-    ]
-    target = build(Arrow::DayTimeIntervalArray.new(records.collect(&:first)))
-    assert_equal(records, target.each_raw_record.to_a)
-  end
-
-  def test_month_day_nano_interval
-    records = [
-      [{month: 1, day: 1, nanosecond: 100}],
-      [nil],
-      [{month: 2, day: 3, nanosecond: 400}],
-    ]
-    target = 
build(Arrow::MonthDayNanoIntervalArray.new(records.collect(&:first)))
-    assert_equal(records, target.each_raw_record.to_a)
-  end
-end
-
-class EachRawRecordRecordBatchDictionaryArraysTest < Test::Unit::TestCase
-  include EachRawRecordDictionaryArrayTests
-
-  def build(array)
-    build_record_batch(array)
-  end
-end
-
-class EachRawRecordTableDictionaryArraysTest < Test::Unit::TestCase
-  include EachRawRecordDictionaryArrayTests
-
-  def build(array)
-    build_record_batch(array).to_table
-  end
-end
diff --git a/ruby/red-arrow/test/raw-records/test-dictionary-array.rb 
b/ruby/red-arrow/test/raw-records/test-dictionary-array.rb
index 200185bde4..abe946a8eb 100644
--- a/ruby/red-arrow/test/raw-records/test-dictionary-array.rb
+++ b/ruby/red-arrow/test/raw-records/test-dictionary-array.rb
@@ -30,7 +30,7 @@ module RawRecordsDictionaryArrayTests
       [nil],
     ]
     target = build(Arrow::NullArray.new(records.collect(&:first)))
-    assert_equal(records, target.raw_records)
+    assert_equal(records, actual_records(target))
   end
 
   def test_boolean
@@ -40,7 +40,7 @@ module RawRecordsDictionaryArrayTests
       [false],
     ]
     target = build(Arrow::BooleanArray.new(records.collect(&:first)))
-    assert_equal(records, target.raw_records)
+    assert_equal(records, actual_records(target))
   end
 
   def test_int8
@@ -50,7 +50,7 @@ module RawRecordsDictionaryArrayTests
       [(2 ** 7) - 1],
     ]
     target = build(Arrow::Int8Array.new(records.collect(&:first)))
-    assert_equal(records, target.raw_records)
+    assert_equal(records, actual_records(target))
   end
 
   def test_uint8
@@ -60,7 +60,7 @@ module RawRecordsDictionaryArrayTests
       [(2 ** 8) - 1],
     ]
     target = build(Arrow::UInt8Array.new(records.collect(&:first)))
-    assert_equal(records, target.raw_records)
+    assert_equal(records, actual_records(target))
   end
 
   def test_int16
@@ -70,7 +70,7 @@ module RawRecordsDictionaryArrayTests
       [(2 ** 15) - 1],
     ]
     target = build(Arrow::Int16Array.new(records.collect(&:first)))
-    assert_equal(records, target.raw_records)
+    assert_equal(records, actual_records(target))
   end
 
   def test_uint16
@@ -80,7 +80,7 @@ module RawRecordsDictionaryArrayTests
       [(2 ** 16) - 1],
     ]
     target = build(Arrow::UInt16Array.new(records.collect(&:first)))
-    assert_equal(records, target.raw_records)
+    assert_equal(records, actual_records(target))
   end
 
   def test_int32
@@ -90,7 +90,7 @@ module RawRecordsDictionaryArrayTests
       [(2 ** 31) - 1],
     ]
     target = build(Arrow::Int32Array.new(records.collect(&:first)))
-    assert_equal(records, target.raw_records)
+    assert_equal(records, actual_records(target))
   end
 
   def test_uint32
@@ -100,7 +100,7 @@ module RawRecordsDictionaryArrayTests
       [(2 ** 32) - 1],
     ]
     target = build(Arrow::UInt32Array.new(records.collect(&:first)))
-    assert_equal(records, target.raw_records)
+    assert_equal(records, actual_records(target))
   end
 
   def test_int64
@@ -110,7 +110,7 @@ module RawRecordsDictionaryArrayTests
       [(2 ** 63) - 1],
     ]
     target = build(Arrow::Int64Array.new(records.collect(&:first)))
-    assert_equal(records, target.raw_records)
+    assert_equal(records, actual_records(target))
   end
 
   def test_uint64
@@ -120,7 +120,7 @@ module RawRecordsDictionaryArrayTests
       [(2 ** 64) - 1],
     ]
     target = build(Arrow::UInt64Array.new(records.collect(&:first)))
-    assert_equal(records, target.raw_records)
+    assert_equal(records, actual_records(target))
   end
 
   def test_float
@@ -130,7 +130,7 @@ module RawRecordsDictionaryArrayTests
       [1.0],
     ]
     target = build(Arrow::FloatArray.new(records.collect(&:first)))
-    assert_equal(records, target.raw_records)
+    assert_equal(records, actual_records(target))
   end
 
   def test_double
@@ -140,7 +140,7 @@ module RawRecordsDictionaryArrayTests
       [1.0],
     ]
     target = build(Arrow::DoubleArray.new(records.collect(&:first)))
-    assert_equal(records, target.raw_records)
+    assert_equal(records, actual_records(target))
   end
 
   def test_binary
@@ -150,7 +150,7 @@ module RawRecordsDictionaryArrayTests
       ["\xff".b],
     ]
     target = build(Arrow::BinaryArray.new(records.collect(&:first)))
-    assert_equal(records, target.raw_records)
+    assert_equal(records, actual_records(target))
   end
 
   def test_string
@@ -160,7 +160,7 @@ module RawRecordsDictionaryArrayTests
       ["\u3042"], # U+3042 HIRAGANA LETTER A
     ]
     target = build(Arrow::StringArray.new(records.collect(&:first)))
-    assert_equal(records, target.raw_records)
+    assert_equal(records, actual_records(target))
   end
 
   def test_date32
@@ -170,7 +170,7 @@ module RawRecordsDictionaryArrayTests
       [Date.new(2017, 8, 23)],
     ]
     target = build(Arrow::Date32Array.new(records.collect(&:first)))
-    assert_equal(records, target.raw_records)
+    assert_equal(records, actual_records(target))
   end
 
   def test_date64
@@ -180,7 +180,7 @@ module RawRecordsDictionaryArrayTests
       [DateTime.new(2017, 8, 23, 14, 57, 2)],
     ]
     target = build(Arrow::Date64Array.new(records.collect(&:first)))
-    assert_equal(records, target.raw_records)
+    assert_equal(records, actual_records(target))
   end
 
   def test_timestamp_second
@@ -190,7 +190,7 @@ module RawRecordsDictionaryArrayTests
       [Time.parse("2017-08-23T14:57:02Z")],
     ]
     target = build(Arrow::TimestampArray.new(:second, 
records.collect(&:first)))
-    assert_equal(records, target.raw_records)
+    assert_equal(records, actual_records(target))
   end
 
   def test_timestamp_milli
@@ -200,7 +200,7 @@ module RawRecordsDictionaryArrayTests
       [Time.parse("2017-08-23T14:57:02.987Z")],
     ]
     target = build(Arrow::TimestampArray.new(:milli, records.collect(&:first)))
-    assert_equal(records, target.raw_records)
+    assert_equal(records, actual_records(target))
   end
 
   def test_timestamp_micro
@@ -210,7 +210,7 @@ module RawRecordsDictionaryArrayTests
       [Time.parse("2017-08-23T14:57:02.987654Z")],
     ]
     target = build(Arrow::TimestampArray.new(:micro, records.collect(&:first)))
-    assert_equal(records, target.raw_records)
+    assert_equal(records, actual_records(target))
   end
 
   def test_timestamp_nano
@@ -220,7 +220,7 @@ module RawRecordsDictionaryArrayTests
       [Time.parse("2017-08-23T14:57:02.987654321Z")],
     ]
     target = build(Arrow::TimestampArray.new(:nano, records.collect(&:first)))
-    assert_equal(records, target.raw_records)
+    assert_equal(records, actual_records(target))
   end
 
   def test_time32_second
@@ -231,7 +231,7 @@ module RawRecordsDictionaryArrayTests
       [Arrow::Time.new(unit, 60 * 60 * 2 + 9)], # 02:00:09
     ]
     target = build(Arrow::Time32Array.new(unit, records.collect(&:first)))
-    assert_equal(records, target.raw_records)
+    assert_equal(records, actual_records(target))
   end
 
   def test_time32_milli
@@ -242,7 +242,7 @@ module RawRecordsDictionaryArrayTests
       [Arrow::Time.new(unit, (60 * 60 * 2 + 9) * 1000 + 987)], # 02:00:09.987
     ]
     target = build(Arrow::Time32Array.new(unit, records.collect(&:first)))
-    assert_equal(records, target.raw_records)
+    assert_equal(records, actual_records(target))
   end
 
   def test_time64_micro
@@ -255,7 +255,7 @@ module RawRecordsDictionaryArrayTests
       [Arrow::Time.new(unit, (60 * 60 * 2 + 9) * 1_000_000 + 987_654)],
     ]
     target = build(Arrow::Time64Array.new(unit, records.collect(&:first)))
-    assert_equal(records, target.raw_records)
+    assert_equal(records, actual_records(target))
   end
 
   def test_time64_nano
@@ -268,7 +268,7 @@ module RawRecordsDictionaryArrayTests
       [Arrow::Time.new(unit, (60 * 60 * 2 + 9) * 1_000_000_000 + 987_654_321)],
     ]
     target = build(Arrow::Time64Array.new(unit, records.collect(&:first)))
-    assert_equal(records, target.raw_records)
+    assert_equal(records, actual_records(target))
   end
 
   def test_decimal128
@@ -279,7 +279,7 @@ module RawRecordsDictionaryArrayTests
     ]
     data_type = Arrow::Decimal128DataType.new(8, 2)
     target = build(Arrow::Decimal128Array.new(data_type, 
records.collect(&:first)))
-    assert_equal(records, target.raw_records)
+    assert_equal(records, actual_records(target))
   end
 
   def test_decimal256
@@ -290,7 +290,7 @@ module RawRecordsDictionaryArrayTests
     ]
     data_type = Arrow::Decimal256DataType.new(38, 2)
     target = build(Arrow::Decimal256Array.new(data_type, 
records.collect(&:first)))
-    assert_equal(records, target.raw_records)
+    assert_equal(records, actual_records(target))
   end
 
   def test_month_interval
@@ -300,7 +300,7 @@ module RawRecordsDictionaryArrayTests
       [12],
     ]
     target = build(Arrow::MonthIntervalArray.new(records.collect(&:first)))
-    assert_equal(records, target.raw_records)
+    assert_equal(records, actual_records(target))
   end
 
   def test_day_time_interval
@@ -310,7 +310,7 @@ module RawRecordsDictionaryArrayTests
       [{day: 2, millisecond: 300}],
     ]
     target = build(Arrow::DayTimeIntervalArray.new(records.collect(&:first)))
-    assert_equal(records, target.raw_records)
+    assert_equal(records, actual_records(target))
   end
 
   def test_month_day_nano_interval
@@ -320,7 +320,31 @@ module RawRecordsDictionaryArrayTests
       [{month: 2, day: 3, nanosecond: 400}],
     ]
     target = 
build(Arrow::MonthDayNanoIntervalArray.new(records.collect(&:first)))
-    assert_equal(records, target.raw_records)
+    assert_equal(records, actual_records(target))
+  end
+end
+
+class EachRawRecordRecordBatchDictionaryArraysTest < Test::Unit::TestCase
+  include RawRecordsDictionaryArrayTests
+
+  def build(array)
+    build_record_batch(array)
+  end
+
+  def actual_records(target)
+    target.each_raw_record.to_a
+  end
+end
+
+class EachRawRecordTableDictionaryArraysTest < Test::Unit::TestCase
+  include RawRecordsDictionaryArrayTests
+
+  def build(array)
+    build_record_batch(array).to_table
+  end
+
+  def actual_records(target)
+    target.each_raw_record.to_a
   end
 end
 
@@ -330,6 +354,10 @@ class RawRecordsRecordBatchDictionaryArraysTest < 
Test::Unit::TestCase
   def build(array)
     build_record_batch(array)
   end
+
+  def actual_records(target)
+    target.raw_records
+  end
 end
 
 class RawRecordsTableDictionaryArraysTest < Test::Unit::TestCase
@@ -338,4 +366,8 @@ class RawRecordsTableDictionaryArraysTest < 
Test::Unit::TestCase
   def build(array)
     build_record_batch(array).to_table
   end
+
+  def actual_records(target)
+    target.raw_records
+  end
 end

Reply via email to