This is an automated email from the ASF dual-hosted git repository.
lidavidm pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new 87dca7d832 GH-47016: [C++][FlightSQL] Fix negative timestamps to date
types (#47017)
87dca7d832 is described below
commit 87dca7d8320b549ad6ea17d84ef6c80360ef8c33
Author: Igor Antropov <[email protected]>
AuthorDate: Thu Jul 31 08:39:37 2025 +0100
GH-47016: [C++][FlightSQL] Fix negative timestamps to date types (#47017)
### Rationale for this change
Fix negative timestamps from arrow to date structs when using flightsql
odbc.
### Are these changes tested?
Yes
### Are there any user-facing changes?
No
* GitHub Issue: #47016
Authored-by: Igor Antropov <[email protected]>
Signed-off-by: David Li <[email protected]>
---
.../flight_sql/accessors/date_array_accessor.cc | 2 +-
.../accessors/date_array_accessor_test.cc | 36 +++++++------
.../flight_sql/accessors/time_array_accessor.cc | 2 +-
.../accessors/time_array_accessor_test.cc | 8 +--
.../accessors/timestamp_array_accessor.cc | 52 +++++++++++--------
.../accessors/timestamp_array_accessor_test.cc | 60 ++++++++++++++--------
.../sql/odbc/odbcabstraction/calendar_utils.cc | 27 +++++++---
.../include/odbcabstraction/calendar_utils.h | 2 +-
8 files changed, 117 insertions(+), 72 deletions(-)
diff --git
a/cpp/src/arrow/flight/sql/odbc/flight_sql/accessors/date_array_accessor.cc
b/cpp/src/arrow/flight/sql/odbc/flight_sql/accessors/date_array_accessor.cc
index ac8d079cd0..ab139efc92 100644
--- a/cpp/src/arrow/flight/sql/odbc/flight_sql/accessors/date_array_accessor.cc
+++ b/cpp/src/arrow/flight/sql/odbc/flight_sql/accessors/date_array_accessor.cc
@@ -68,7 +68,7 @@ RowStatus DateArrayFlightSqlAccessor<TARGET_TYPE,
ARROW_ARRAY>::MoveSingleCell_i
auto value = convertDate<ARROW_ARRAY>(this->GetArray()->Value(arrow_row));
tm date{};
- GetTimeForSecondsSinceEpoch(date, value);
+ GetTimeForSecondsSinceEpoch(value, date);
buffer[cell_counter].year = 1900 + (date.tm_year);
buffer[cell_counter].month = date.tm_mon + 1;
diff --git
a/cpp/src/arrow/flight/sql/odbc/flight_sql/accessors/date_array_accessor_test.cc
b/cpp/src/arrow/flight/sql/odbc/flight_sql/accessors/date_array_accessor_test.cc
index d0c4d65099..fdd48d2706 100644
---
a/cpp/src/arrow/flight/sql/odbc/flight_sql/accessors/date_array_accessor_test.cc
+++
b/cpp/src/arrow/flight/sql/odbc/flight_sql/accessors/date_array_accessor_test.cc
@@ -38,7 +38,11 @@ using arrow::ArrayFromVector;
using odbcabstraction::GetTimeForSecondsSinceEpoch;
TEST(DateArrayAccessor, Test_Date32Array_CDataType_DATE) {
- std::vector<int32_t> values = {7589, 12320, 18980, 19095};
+ std::vector<int32_t> values = {7589, 12320, 18980, 19095, -1, 0};
+ std::vector<tagDATE_STRUCT> expected = {
+ {1990, 10, 12}, {2003, 9, 25}, {2021, 12, 19},
+ {2022, 4, 13}, {1969, 12, 31}, {1970, 1, 1},
+ };
std::shared_ptr<Array> array;
ArrayFromVector<Date32Type, int32_t>(values, &array);
@@ -60,19 +64,23 @@ TEST(DateArrayAccessor, Test_Date32Array_CDataType_DATE) {
for (size_t i = 0; i < values.size(); ++i) {
ASSERT_EQ(sizeof(DATE_STRUCT), strlen_buffer[i]);
- tm date{};
- int64_t converted_time = values[i] * 86400;
- GetTimeForSecondsSinceEpoch(date, converted_time);
- ASSERT_EQ((date.tm_year + 1900), buffer[i].year);
- ASSERT_EQ(date.tm_mon + 1, buffer[i].month);
- ASSERT_EQ(date.tm_mday, buffer[i].day);
+ ASSERT_EQ(expected[i].year, buffer[i].year);
+ ASSERT_EQ(expected[i].month, buffer[i].month);
+ ASSERT_EQ(expected[i].day, buffer[i].day);
}
}
TEST(DateArrayAccessor, Test_Date64Array_CDataType_DATE) {
- std::vector<int64_t> values = {86400000, 172800000, 259200000,
1649793238110,
- 345600000, 432000000, 518400000};
+ std::vector<int64_t> values = {
+ 86400000, 172800000, 259200000, 1649793238110, 0, 345600000,
432000000,
+ 518400000, -86400000, -17987443200000, -24268068949000};
+ std::vector<tagDATE_STRUCT> expected = {
+ /* year(16), month(u16), day(u16) */
+ {1970, 1, 2}, {1970, 1, 3}, {1970, 1, 4}, {2022, 4, 12},
+ {1970, 1, 1}, {1970, 1, 5}, {1970, 1, 6}, {1970, 1, 7},
+ {1969, 12, 31}, {1400, 1, 1}, {1200, 12, 22},
+ };
std::shared_ptr<Array> array;
ArrayFromVector<Date64Type, int64_t>(values, &array);
@@ -94,13 +102,9 @@ TEST(DateArrayAccessor, Test_Date64Array_CDataType_DATE) {
for (size_t i = 0; i < values.size(); ++i) {
ASSERT_EQ(sizeof(DATE_STRUCT), strlen_buffer[i]);
- tm date{};
-
- int64_t converted_time = values[i] / 1000;
- GetTimeForSecondsSinceEpoch(date, converted_time);
- ASSERT_EQ((date.tm_year + 1900), buffer[i].year);
- ASSERT_EQ(date.tm_mon + 1, buffer[i].month);
- ASSERT_EQ(date.tm_mday, buffer[i].day);
+ ASSERT_EQ(expected[i].year, buffer[i].year);
+ ASSERT_EQ(expected[i].month, buffer[i].month);
+ ASSERT_EQ(expected[i].day, buffer[i].day);
}
}
diff --git
a/cpp/src/arrow/flight/sql/odbc/flight_sql/accessors/time_array_accessor.cc
b/cpp/src/arrow/flight/sql/odbc/flight_sql/accessors/time_array_accessor.cc
index 8f4b7db5ed..93bf11778b 100644
--- a/cpp/src/arrow/flight/sql/odbc/flight_sql/accessors/time_array_accessor.cc
+++ b/cpp/src/arrow/flight/sql/odbc/flight_sql/accessors/time_array_accessor.cc
@@ -108,7 +108,7 @@ RowStatus TimeArrayFlightSqlAccessor<TARGET_TYPE,
ARROW_ARRAY, UNIT>::MoveSingle
auto converted_value_seconds =
ConvertTimeValue<ARROW_ARRAY>(this->GetArray()->Value(arrow_row), UNIT);
- GetTimeForSecondsSinceEpoch(time, converted_value_seconds);
+ GetTimeForSecondsSinceEpoch(converted_value_seconds, time);
buffer[cell_counter].hour = time.tm_hour;
buffer[cell_counter].minute = time.tm_min;
diff --git
a/cpp/src/arrow/flight/sql/odbc/flight_sql/accessors/time_array_accessor_test.cc
b/cpp/src/arrow/flight/sql/odbc/flight_sql/accessors/time_array_accessor_test.cc
index 26f1c1a676..d04a098e23 100644
---
a/cpp/src/arrow/flight/sql/odbc/flight_sql/accessors/time_array_accessor_test.cc
+++
b/cpp/src/arrow/flight/sql/odbc/flight_sql/accessors/time_array_accessor_test.cc
@@ -66,7 +66,7 @@ TEST(TEST_TIME32, TIME_WITH_SECONDS) {
tm time{};
- GetTimeForSecondsSinceEpoch(time, t32_values[i]);
+ GetTimeForSecondsSinceEpoch(t32_values[i], time);
ASSERT_EQ(buffer[i].hour, time.tm_hour);
ASSERT_EQ(buffer[i].minute, time.tm_min);
ASSERT_EQ(buffer[i].second, time.tm_sec);
@@ -103,7 +103,7 @@ TEST(TEST_TIME32, TIME_WITH_MILLI) {
tm time{};
auto convertedValue = t32_values[i] /
odbcabstraction::MILLI_TO_SECONDS_DIVISOR;
- GetTimeForSecondsSinceEpoch(time, convertedValue);
+ GetTimeForSecondsSinceEpoch(convertedValue, time);
ASSERT_EQ(buffer[i].hour, time.tm_hour);
ASSERT_EQ(buffer[i].minute, time.tm_min);
@@ -142,7 +142,7 @@ TEST(TEST_TIME64, TIME_WITH_MICRO) {
tm time{};
const auto convertedValue = t64_values[i] /
odbcabstraction::MICRO_TO_SECONDS_DIVISOR;
- GetTimeForSecondsSinceEpoch(time, convertedValue);
+ GetTimeForSecondsSinceEpoch(convertedValue, time);
ASSERT_EQ(buffer[i].hour, time.tm_hour);
ASSERT_EQ(buffer[i].minute, time.tm_min);
@@ -179,7 +179,7 @@ TEST(TEST_TIME64, TIME_WITH_NANO) {
tm time{};
const auto convertedValue = t64_values[i] /
odbcabstraction::NANO_TO_SECONDS_DIVISOR;
- GetTimeForSecondsSinceEpoch(time, convertedValue);
+ GetTimeForSecondsSinceEpoch(convertedValue, time);
ASSERT_EQ(buffer[i].hour, time.tm_hour);
ASSERT_EQ(buffer[i].minute, time.tm_min);
diff --git
a/cpp/src/arrow/flight/sql/odbc/flight_sql/accessors/timestamp_array_accessor.cc
b/cpp/src/arrow/flight/sql/odbc/flight_sql/accessors/timestamp_array_accessor.cc
index b85cb95a88..68e9f64fff 100644
---
a/cpp/src/arrow/flight/sql/odbc/flight_sql/accessors/timestamp_array_accessor.cc
+++
b/cpp/src/arrow/flight/sql/odbc/flight_sql/accessors/timestamp_array_accessor.cc
@@ -44,25 +44,26 @@ int64_t GetConversionToSecondsDivisor(TimeUnit::type unit) {
return divisor;
}
-uint32_t CalculateFraction(TimeUnit::type unit, uint64_t units_since_epoch) {
- // Convert the given remainder and time unit to nanoseconds
- // since the fraction field on TIMESTAMP_STRUCT is in nanoseconds.
- switch (unit) {
- case TimeUnit::SECOND:
- return 0;
- case TimeUnit::MILLI:
- // 1000000 nanoseconds = 1 millisecond.
- return (units_since_epoch %
driver::odbcabstraction::MILLI_TO_SECONDS_DIVISOR) *
- 1000000;
- case TimeUnit::MICRO:
- // 1000 nanoseconds = 1 microsecond.
- return (units_since_epoch %
driver::odbcabstraction::MICRO_TO_SECONDS_DIVISOR) *
- 1000;
- case TimeUnit::NANO:
- // 1000 nanoseconds = 1 microsecond.
- return (units_since_epoch %
driver::odbcabstraction::NANO_TO_SECONDS_DIVISOR);
+uint32_t CalculateFraction(TimeUnit::type unit, int64_t units_since_epoch) {
+ /**
+ * Convert the given remainder and time unit to nanoseconds
+ * since the fraction field on TIMESTAMP_STRUCT is in nanoseconds.
+ */
+ if (unit == TimeUnit::SECOND) {
+ return 0;
}
- return 0;
+
+ const int64_t divisor = GetConversionToSecondsDivisor(unit);
+ const int64_t nano_divisor = GetConversionToSecondsDivisor(TimeUnit::NANO);
+
+ // Safe remainder calculation that always gives a non-negative result
+ int64_t remainder = units_since_epoch % divisor;
+ if (remainder < 0) {
+ remainder += divisor;
+ }
+
+ // Scale to nanoseconds
+ return static_cast<uint32_t>(remainder * (nano_divisor / divisor));
}
} // namespace
@@ -88,10 +89,21 @@ RowStatus TimestampArrayFlightSqlAccessor<TARGET_TYPE,
UNIT>::MoveSingleCell_imp
int64_t value = this->GetArray()->Value(arrow_row);
const auto divisor = GetConversionToSecondsDivisor(UNIT);
- const auto converted_result_seconds = value / divisor;
+ const auto converted_result_seconds =
+ // We want floor division here; C++ will round towards zero
+ (value < 0)
+ /**
+ * Floor division: Shift all "fractional" (not a multiple of
divisor) values so
+ * they round towards zero (and to the same value) along with the
"floor" less
+ * than them, then add 1 to get back to the floor. Alternative we
could shift
+ * negatively by (divisor - 1) but this breaks near INT64_MIN
causing underflow.
+ */
+ ? ((value + 1) / divisor) - 1
+ // Towards zero is already floor
+ : value / divisor;
tm timestamp = {0};
- GetTimeForSecondsSinceEpoch(timestamp, converted_result_seconds);
+ GetTimeForSecondsSinceEpoch(converted_result_seconds, timestamp);
buffer[cell_counter].year = 1900 + (timestamp.tm_year);
buffer[cell_counter].month = timestamp.tm_mon + 1;
diff --git
a/cpp/src/arrow/flight/sql/odbc/flight_sql/accessors/timestamp_array_accessor_test.cc
b/cpp/src/arrow/flight/sql/odbc/flight_sql/accessors/timestamp_array_accessor_test.cc
index a5fb167e79..930cc6a565 100644
---
a/cpp/src/arrow/flight/sql/odbc/flight_sql/accessors/timestamp_array_accessor_test.cc
+++
b/cpp/src/arrow/flight/sql/odbc/flight_sql/accessors/timestamp_array_accessor_test.cc
@@ -34,8 +34,35 @@ using odbcabstraction::TIMESTAMP_STRUCT;
using odbcabstraction::GetTimeForSecondsSinceEpoch;
TEST(TEST_TIMESTAMP, TIMESTAMP_WITH_MILLI) {
- std::vector<int64_t> values = {86400370, 172800000, 259200000,
1649793238110LL,
- 345600000, 432000000, 518400000};
+ std::vector<int64_t> values = {86400370, 172800000, 259200000,
+ 1649793238110LL, 345600000, 432000000,
+ 518400000, -86399000, 0,
+ -86399999, -86399001, 86400001,
+ 86400999, -3786912000000,
-5364662400000,
+ -1500, -24268068949000};
+ std::vector<TIMESTAMP_STRUCT> expected = {
+ /* year(16), month(u16), day(u16), hour(u16), minute(u16), second(u16),
+ fraction(u32) */
+ {1970, 1, 2, 0, 0, 0, 370000000},
+ {1970, 1, 3, 0, 0, 0, 0},
+ {1970, 1, 4, 0, 0, 0, 0},
+ {2022, 4, 12, 19, 53, 58, 110000000},
+ {1970, 1, 5, 0, 0, 0, 0},
+ {1970, 1, 6, 0, 0, 0, 0},
+ {1970, 1, 7, 0, 0, 0, 0},
+ {1969, 12, 31, 0, 0, 1, 0},
+ {1970, 1, 1, 0, 0, 0, 0},
+ /* Tests both ends of the fraction rounding range to ensure we don't tip
the wrong
+ way */
+ {1969, 12, 31, 0, 0, 0, 1000000},
+ {1969, 12, 31, 0, 0, 0, 999000000},
+ {1970, 1, 2, 0, 0, 0, 1000000},
+ {1970, 1, 2, 0, 0, 0, 999000000},
+ {1849, 12, 31, 0, 0, 0, 0U},
+ {1800, 1, 1, 0, 0, 0, 0U},
+ {1969, 12, 31, 23, 59, 58, 500000000U},
+ {1200, 12, 22, 13, 44, 11, 0U},
+ };
std::shared_ptr<Array> timestamp_array;
@@ -60,22 +87,13 @@ TEST(TEST_TIMESTAMP, TIMESTAMP_WITH_MILLI) {
for (size_t i = 0; i < values.size(); ++i) {
ASSERT_EQ(sizeof(TIMESTAMP_STRUCT), strlen_buffer[i]);
- tm date{};
-
- auto converted_time = values[i] /
odbcabstraction::MILLI_TO_SECONDS_DIVISOR;
- GetTimeForSecondsSinceEpoch(date, converted_time);
-
- ASSERT_EQ(buffer[i].year, 1900 + (date.tm_year));
- ASSERT_EQ(buffer[i].month, date.tm_mon + 1);
- ASSERT_EQ(buffer[i].day, date.tm_mday);
- ASSERT_EQ(buffer[i].hour, date.tm_hour);
- ASSERT_EQ(buffer[i].minute, date.tm_min);
- ASSERT_EQ(buffer[i].second, date.tm_sec);
-
- constexpr uint32_t NANOSECONDS_PER_MILLI = 1000000;
- ASSERT_EQ(
- buffer[i].fraction,
- (values[i] % odbcabstraction::MILLI_TO_SECONDS_DIVISOR) *
NANOSECONDS_PER_MILLI);
+ ASSERT_EQ(buffer[i].year, expected[i].year);
+ ASSERT_EQ(buffer[i].month, expected[i].month);
+ ASSERT_EQ(buffer[i].day, expected[i].day);
+ ASSERT_EQ(buffer[i].hour, expected[i].hour);
+ ASSERT_EQ(buffer[i].minute, expected[i].minute);
+ ASSERT_EQ(buffer[i].second, expected[i].second);
+ ASSERT_EQ(buffer[i].fraction, expected[i].fraction);
}
}
@@ -109,7 +127,7 @@ TEST(TEST_TIMESTAMP, TIMESTAMP_WITH_SECONDS) {
tm date{};
auto converted_time = values[i];
- GetTimeForSecondsSinceEpoch(date, converted_time);
+ GetTimeForSecondsSinceEpoch(converted_time, date);
ASSERT_EQ(buffer[i].year, 1900 + (date.tm_year));
ASSERT_EQ(buffer[i].month, date.tm_mon + 1);
@@ -151,7 +169,7 @@ TEST(TEST_TIMESTAMP, TIMESTAMP_WITH_MICRO) {
tm date{};
auto converted_time = values[i] /
odbcabstraction::MICRO_TO_SECONDS_DIVISOR;
- GetTimeForSecondsSinceEpoch(date, converted_time);
+ GetTimeForSecondsSinceEpoch(converted_time, date);
ASSERT_EQ(buffer[i].year, 1900 + (date.tm_year));
ASSERT_EQ(buffer[i].month, date.tm_mon + 1);
@@ -194,7 +212,7 @@ TEST(TEST_TIMESTAMP, TIMESTAMP_WITH_NANO) {
tm date{};
auto converted_time = values[i] / odbcabstraction::NANO_TO_SECONDS_DIVISOR;
- GetTimeForSecondsSinceEpoch(date, converted_time);
+ GetTimeForSecondsSinceEpoch(converted_time, date);
ASSERT_EQ(buffer[i].year, 1900 + (date.tm_year));
ASSERT_EQ(buffer[i].month, date.tm_mon + 1);
diff --git a/cpp/src/arrow/flight/sql/odbc/odbcabstraction/calendar_utils.cc
b/cpp/src/arrow/flight/sql/odbc/odbcabstraction/calendar_utils.cc
index f4a23419f1..7b92a00a25 100644
--- a/cpp/src/arrow/flight/sql/odbc/odbcabstraction/calendar_utils.cc
+++ b/cpp/src/arrow/flight/sql/odbc/odbcabstraction/calendar_utils.cc
@@ -17,7 +17,9 @@
#include "odbcabstraction/calendar_utils.h"
+#include <chrono>
#include <cstdint>
+#include <cstring>
#include <ctime>
namespace driver {
@@ -26,7 +28,7 @@ int64_t GetTodayTimeFromEpoch() {
tm date{};
int64_t t = std::time(0);
- GetTimeForSecondsSinceEpoch(date, t);
+ GetTimeForSecondsSinceEpoch(t, date);
date.tm_hour = 0;
date.tm_min = 0;
@@ -39,13 +41,22 @@ int64_t GetTodayTimeFromEpoch() {
#endif
}
-void GetTimeForSecondsSinceEpoch(tm& date, int64_t value) {
-#if defined(_WIN32)
- gmtime_s(&date, &value);
-#else
- time_t time_value = static_cast<time_t>(value);
- gmtime_r(&time_value, &date);
-#endif
+void GetTimeForSecondsSinceEpoch(const int64_t seconds_since_epoch, std::tm&
out_tm) {
+ std::memset(&out_tm, 0, sizeof(std::tm));
+
+ std::chrono::time_point<std::chrono::system_clock, std::chrono::seconds>
timepoint{
+ std::chrono::seconds{seconds_since_epoch}};
+ auto tm_days = std::chrono::floor<std::chrono::days>(timepoint);
+
+ std::chrono::year_month_day ymd(tm_days);
+ std::chrono::hh_mm_ss<std::chrono::seconds> timeofday(timepoint - tm_days);
+
+ out_tm.tm_year = static_cast<int>(ymd.year()) - 1900;
+ out_tm.tm_mon = static_cast<unsigned>(ymd.month()) - 1;
+ out_tm.tm_mday = static_cast<unsigned>(ymd.day());
+ out_tm.tm_hour = static_cast<int>(timeofday.hours().count());
+ out_tm.tm_min = static_cast<int>(timeofday.minutes().count());
+ out_tm.tm_sec = static_cast<int>(timeofday.seconds().count());
}
} // namespace odbcabstraction
} // namespace driver
diff --git
a/cpp/src/arrow/flight/sql/odbc/odbcabstraction/include/odbcabstraction/calendar_utils.h
b/cpp/src/arrow/flight/sql/odbc/odbcabstraction/include/odbcabstraction/calendar_utils.h
index 1f4c6e7912..6b55c443cd 100644
---
a/cpp/src/arrow/flight/sql/odbc/odbcabstraction/include/odbcabstraction/calendar_utils.h
+++
b/cpp/src/arrow/flight/sql/odbc/odbcabstraction/include/odbcabstraction/calendar_utils.h
@@ -24,6 +24,6 @@ namespace driver {
namespace odbcabstraction {
int64_t GetTodayTimeFromEpoch();
-void GetTimeForSecondsSinceEpoch(tm& date, int64_t value);
+void GetTimeForSecondsSinceEpoch(const int64_t seconds_since_epoch, std::tm&
out_tm);
} // namespace odbcabstraction
} // namespace driver