This is an automated email from the ASF dual-hosted git repository.
kou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new d510b105c9 GH-49053: [Ruby] Add support for writing timestamp array
(#49054)
d510b105c9 is described below
commit d510b105c991bc2f87a6eb622212e462c249434b
Author: Sutou Kouhei <[email protected]>
AuthorDate: Fri Jan 30 09:26:44 2026 +0900
GH-49053: [Ruby] Add support for writing timestamp array (#49054)
### Rationale for this change
It has `unit` and `time_zone` parameters.
### What changes are included in this PR?
* Add `ArrowFormat::TimestampType#to_flatbuffers`
* Set time zone when GLib timestamp type is converted from C++ timestamp
type
* Use `time_zone` not `timezone`
### Are these changes tested?
Yes.
### Are there any user-facing changes?
Yes.
* GitHub Issue: #49053
Authored-by: Sutou Kouhei <[email protected]>
Signed-off-by: Sutou Kouhei <[email protected]>
---
c_glib/arrow-glib/basic-data-type.cpp | 28 ++++-
ruby/red-arrow-format/lib/arrow-format/type.rb | 13 ++-
ruby/red-arrow-format/test/test-reader.rb | 38 +++---
ruby/red-arrow-format/test/test-writer.rb | 155 +++++++++++++++++++++++--
4 files changed, 197 insertions(+), 37 deletions(-)
diff --git a/c_glib/arrow-glib/basic-data-type.cpp
b/c_glib/arrow-glib/basic-data-type.cpp
index 9b77e87422..87c5eed530 100644
--- a/c_glib/arrow-glib/basic-data-type.cpp
+++ b/c_glib/arrow-glib/basic-data-type.cpp
@@ -1165,13 +1165,13 @@ GArrowTimestampDataType *
garrow_timestamp_data_type_new(GArrowTimeUnit unit, GTimeZone *time_zone)
{
auto arrow_unit = garrow_time_unit_to_raw(unit);
- std::string arrow_timezone;
+ std::string arrow_time_zone;
#if GLIB_CHECK_VERSION(2, 58, 0)
if (time_zone) {
- arrow_timezone = g_time_zone_get_identifier(time_zone);
+ arrow_time_zone = g_time_zone_get_identifier(time_zone);
}
#endif
- auto arrow_data_type = arrow::timestamp(arrow_unit, arrow_timezone);
+ auto arrow_data_type = arrow::timestamp(arrow_unit, arrow_time_zone);
auto data_type =
GARROW_TIMESTAMP_DATA_TYPE(g_object_new(GARROW_TYPE_TIMESTAMP_DATA_TYPE,
"data-type",
@@ -2645,6 +2645,28 @@
garrow_data_type_new_raw(std::shared_ptr<arrow::DataType> *arrow_data_type)
break;
case arrow::Type::type::TIMESTAMP:
type = GARROW_TYPE_TIMESTAMP_DATA_TYPE;
+ {
+ auto arrow_timestamp_data_type =
+ std::static_pointer_cast<arrow::TimestampType>(*arrow_data_type);
+ const auto &arrow_time_zone = arrow_timestamp_data_type->timezone();
+ if (!arrow_time_zone.empty()) {
+#if GLIB_CHECK_VERSION(2, 68, 0)
+ auto time_zone = g_time_zone_new_identifier(arrow_time_zone.c_str());
+#else
+ auto time_zone = g_time_zone_new(arrow_time_zone.c_str());
+#endif
+ data_type = GARROW_DATA_TYPE(g_object_new(type,
+ "data-type",
+ arrow_data_type,
+ "time-zone",
+ time_zone,
+ nullptr));
+ if (time_zone) {
+ g_time_zone_unref(time_zone);
+ }
+ return data_type;
+ }
+ }
break;
case arrow::Type::type::TIME32:
type = GARROW_TYPE_TIME32_DATA_TYPE;
diff --git a/ruby/red-arrow-format/lib/arrow-format/type.rb
b/ruby/red-arrow-format/lib/arrow-format/type.rb
index bfb0d3803a..fd7582a776 100644
--- a/ruby/red-arrow-format/lib/arrow-format/type.rb
+++ b/ruby/red-arrow-format/lib/arrow-format/type.rb
@@ -434,11 +434,11 @@ module ArrowFormat
class TimestampType < TemporalType
attr_reader :unit
- attr_reader :timezone
- def initialize(unit, timezone)
+ attr_reader :time_zone
+ def initialize(unit, time_zone)
super()
@unit = unit
- @timezone = timezone
+ @time_zone = time_zone
end
def name
@@ -448,6 +448,13 @@ module ArrowFormat
def build_array(size, validity_buffer, values_buffer)
TimestampArray.new(self, size, validity_buffer, values_buffer)
end
+
+ def to_flatbuffers
+ fb_type = FB::Timestamp::Data.new
+ fb_type.unit = FB::TimeUnit.try_convert(@unit.to_s.upcase)
+ fb_type.timezone = @time_zone
+ fb_type
+ end
end
class IntervalType < TemporalType
diff --git a/ruby/red-arrow-format/test/test-reader.rb
b/ruby/red-arrow-format/test/test-reader.rb
index a5919c3fb9..e2e27d3dbc 100644
--- a/ruby/red-arrow-format/test/test-reader.rb
+++ b/ruby/red-arrow-format/test/test-reader.rb
@@ -351,7 +351,7 @@ module ReaderTests
sub_test_case("Timestamp(:second)") do
def setup(&block)
- @timestamp_2019_11_18_00_09_11 = 1574003351
+ @timestamp_2019_11_17_15_09_11 = 1574003351
@timestamp_2025_12_16_05_33_58 = 1765863238
super(&block)
end
@@ -359,7 +359,7 @@ module ReaderTests
def build_array
Arrow::TimestampArray.new(:second,
[
- @timestamp_2019_11_18_00_09_11,
+ @timestamp_2019_11_17_15_09_11,
nil,
@timestamp_2025_12_16_05_33_58,
])
@@ -369,7 +369,7 @@ module ReaderTests
assert_equal([
{
"value" => [
- @timestamp_2019_11_18_00_09_11,
+ @timestamp_2019_11_17_15_09_11,
nil,
@timestamp_2025_12_16_05_33_58,
],
@@ -381,7 +381,7 @@ module ReaderTests
sub_test_case("Timestamp(:millisecond)") do
def setup(&block)
- @timestamp_2019_11_18_00_09_11 = 1574003351 * 1_000
+ @timestamp_2019_11_17_15_09_11 = 1574003351 * 1_000
@timestamp_2025_12_16_05_33_58 = 1765863238 * 1_000
super(&block)
end
@@ -389,7 +389,7 @@ module ReaderTests
def build_array
Arrow::TimestampArray.new(:milli,
[
- @timestamp_2019_11_18_00_09_11,
+ @timestamp_2019_11_17_15_09_11,
nil,
@timestamp_2025_12_16_05_33_58,
])
@@ -399,7 +399,7 @@ module ReaderTests
assert_equal([
{
"value" => [
- @timestamp_2019_11_18_00_09_11,
+ @timestamp_2019_11_17_15_09_11,
nil,
@timestamp_2025_12_16_05_33_58,
],
@@ -411,7 +411,7 @@ module ReaderTests
sub_test_case("Timestamp(:microsecond)") do
def setup(&block)
- @timestamp_2019_11_18_00_09_11 = 1574003351 * 1_000_000
+ @timestamp_2019_11_17_15_09_11 = 1574003351 * 1_000_000
@timestamp_2025_12_16_05_33_58 = 1765863238 * 1_000_000
super(&block)
end
@@ -419,7 +419,7 @@ module ReaderTests
def build_array
Arrow::TimestampArray.new(:micro,
[
- @timestamp_2019_11_18_00_09_11,
+ @timestamp_2019_11_17_15_09_11,
nil,
@timestamp_2025_12_16_05_33_58,
])
@@ -429,7 +429,7 @@ module ReaderTests
assert_equal([
{
"value" => [
- @timestamp_2019_11_18_00_09_11,
+ @timestamp_2019_11_17_15_09_11,
nil,
@timestamp_2025_12_16_05_33_58,
],
@@ -441,7 +441,7 @@ module ReaderTests
sub_test_case("Timestamp(:nanosecond)") do
def setup(&block)
- @timestamp_2019_11_18_00_09_11 = 1574003351 * 1_000_000_000
+ @timestamp_2019_11_17_15_09_11 = 1574003351 * 1_000_000_000
@timestamp_2025_12_16_05_33_58 = 1765863238 * 1_000_000_000
super(&block)
end
@@ -449,7 +449,7 @@ module ReaderTests
def build_array
Arrow::TimestampArray.new(:nano,
[
- @timestamp_2019_11_18_00_09_11,
+ @timestamp_2019_11_17_15_09_11,
nil,
@timestamp_2025_12_16_05_33_58,
])
@@ -459,7 +459,7 @@ module ReaderTests
assert_equal([
{
"value" => [
- @timestamp_2019_11_18_00_09_11,
+ @timestamp_2019_11_17_15_09_11,
nil,
@timestamp_2025_12_16_05_33_58,
],
@@ -469,27 +469,27 @@ module ReaderTests
end
end
- sub_test_case("Timestamp(timezone)") do
+ sub_test_case("Timestamp(time_zone)") do
def setup(&block)
- @timezone = "UTC"
- @timestamp_2019_11_18_00_09_11 = 1574003351
+ @time_zone = "UTC"
+ @timestamp_2019_11_17_15_09_11 = 1574003351
@timestamp_2025_12_16_05_33_58 = 1765863238
super(&block)
end
def build_array
- data_type = Arrow::TimestampDataType.new(:second, @timezone)
+ data_type = Arrow::TimestampDataType.new(:second, @time_zone)
Arrow::TimestampArray.new(data_type,
[
- @timestamp_2019_11_18_00_09_11,
+ @timestamp_2019_11_17_15_09_11,
nil,
@timestamp_2025_12_16_05_33_58,
])
end
def test_type
- assert_equal([:second, @timezone],
- [type.unit, type.timezone])
+ assert_equal([:second, @time_zone],
+ [type.unit, type.time_zone])
end
end
diff --git a/ruby/red-arrow-format/test/test-writer.rb
b/ruby/red-arrow-format/test/test-writer.rb
index 4e60aadc3d..c440bc4a59 100644
--- a/ruby/red-arrow-format/test/test-writer.rb
+++ b/ruby/red-arrow-format/test/test-writer.rb
@@ -16,6 +16,14 @@
# under the License.
module WriterTests
+ def convert_time_unit(red_arrow_time_unit)
+ if red_arrow_time_unit.nick == "second"
+ red_arrow_time_unit.nick.to_sym
+ else
+ :"#{red_arrow_time_unit.nick}second"
+ end
+ end
+
def convert_type(red_arrow_type)
case red_arrow_type
when Arrow::NullDataType
@@ -47,19 +55,12 @@ module WriterTests
when Arrow::Date64DataType
ArrowFormat::Date64Type.singleton
when Arrow::Time32DataType
- case red_arrow_type.unit.nick
- when "second"
- ArrowFormat::Time32Type.new(:second)
- when "milli"
- ArrowFormat::Time32Type.new(:millisecond)
- end
+ ArrowFormat::Time32Type.new(convert_time_unit(red_arrow_type.unit))
when Arrow::Time64DataType
- case red_arrow_type.unit.nick
- when "micro"
- ArrowFormat::Time64Type.new(:microsecond)
- when "nano"
- ArrowFormat::Time64Type.new(:nanosecond)
- end
+ ArrowFormat::Time64Type.new(convert_time_unit(red_arrow_type.unit))
+ when Arrow::TimestampDataType
+ ArrowFormat::TimestampType.new(convert_time_unit(red_arrow_type.unit),
+ red_arrow_type.time_zone&.identifier)
when Arrow::BinaryDataType
ArrowFormat::BinaryType.singleton
when Arrow::LargeBinaryDataType
@@ -394,6 +395,134 @@ module WriterTests
end
end
+ sub_test_case("Timestamp(:second)") do
+ def setup(&block)
+ @timestamp_2019_11_17_15_09_11 = 1574003351
+ @timestamp_2025_12_16_05_33_58 = 1765863238
+ super(&block)
+ end
+
+ def build_array
+ Arrow::TimestampArray.new(:second,
+ [
+ @timestamp_2019_11_17_15_09_11,
+ nil,
+ @timestamp_2025_12_16_05_33_58,
+ ])
+ end
+
+ def test_write
+ assert_equal([
+ Time.at(@timestamp_2019_11_17_15_09_11),
+ nil,
+ Time.at(@timestamp_2025_12_16_05_33_58),
+ ],
+ @values)
+ end
+ end
+
+ sub_test_case("Timestamp(:millisecond)") do
+ def setup(&block)
+ @timestamp_2019_11_17_15_09_11 = 1574003351 * 1_000
+ @timestamp_2025_12_16_05_33_58 = 1765863238 * 1_000
+ super(&block)
+ end
+
+ def build_array
+ Arrow::TimestampArray.new(:milli,
+ [
+ @timestamp_2019_11_17_15_09_11,
+ nil,
+ @timestamp_2025_12_16_05_33_58,
+ ])
+ end
+
+ def test_write
+ assert_equal([
+ Time.at(@timestamp_2019_11_17_15_09_11 / 1_000),
+ nil,
+ Time.at(@timestamp_2025_12_16_05_33_58 / 1_000),
+ ],
+ @values)
+ end
+ end
+
+ sub_test_case("Timestamp(:microsecond)") do
+ def setup(&block)
+ @timestamp_2019_11_17_15_09_11 = 1574003351 * 1_000_000
+ @timestamp_2025_12_16_05_33_58 = 1765863238 * 1_000_000
+ super(&block)
+ end
+
+ def build_array
+ Arrow::TimestampArray.new(:micro,
+ [
+ @timestamp_2019_11_17_15_09_11,
+ nil,
+ @timestamp_2025_12_16_05_33_58,
+ ])
+ end
+
+ def test_write
+ assert_equal([
+ Time.at(@timestamp_2019_11_17_15_09_11 / 1_000_000),
+ nil,
+ Time.at(@timestamp_2025_12_16_05_33_58 / 1_000_000),
+ ],
+ @values)
+ end
+ end
+
+ sub_test_case("Timestamp(:nanosecond)") do
+ def setup(&block)
+ @timestamp_2019_11_17_15_09_11 = 1574003351 * 1_000_000_000
+ @timestamp_2025_12_16_05_33_58 = 1765863238 * 1_000_000_000
+ super(&block)
+ end
+
+ def build_array
+ Arrow::TimestampArray.new(:nano,
+ [
+ @timestamp_2019_11_17_15_09_11,
+ nil,
+ @timestamp_2025_12_16_05_33_58,
+ ])
+ end
+
+ def test_write
+ assert_equal([
+ Time.at(@timestamp_2019_11_17_15_09_11 /
1_000_000_000),
+ nil,
+ Time.at(@timestamp_2025_12_16_05_33_58 /
1_000_000_000),
+ ],
+ @values)
+ end
+ end
+
+ sub_test_case("Timestamp(time_zone)") do
+ def setup(&block)
+ @time_zone = "UTC"
+ @timestamp_2019_11_17_15_09_11 = 1574003351
+ @timestamp_2025_12_16_05_33_58 = 1765863238
+ super(&block)
+ end
+
+ def build_array
+ data_type = Arrow::TimestampDataType.new(:second, @time_zone)
+ Arrow::TimestampArray.new(data_type,
+ [
+ @timestamp_2019_11_17_15_09_11,
+ nil,
+ @timestamp_2025_12_16_05_33_58,
+ ])
+ end
+
+ def test_type
+ assert_equal([Arrow::TimeUnit::SECOND, @time_zone],
+ [@type.unit, @type.time_zone&.identifier])
+ end
+ end
+
sub_test_case("Binary") do
def build_array
Arrow::BinaryArray.new(["Hello".b, nil, "World".b])
@@ -535,6 +664,7 @@ class TestFileWriter < Test::Unit::TestCase
end
data = File.open(path, "rb", &:read).freeze
table = Arrow::Table.load(Arrow::Buffer.new(data), format: :arrow)
+ @type = table.value.data_type
@values = table.value.values
end
end
@@ -564,6 +694,7 @@ class TestStreamingWriter < Test::Unit::TestCase
end
data = File.open(path, "rb", &:read).freeze
table = Arrow::Table.load(Arrow::Buffer.new(data), format: :arrows)
+ @type = table.value.data_type
@values = table.value.values
end
end