This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/main by this push:
new 15db256bcd Fix: `date_part` to extract only the requested part (not
the overall interval) (#7189)
15db256bcd is described below
commit 15db256bcdd739ce3233d90b3242af15ae3945fa
Author: delamarch3 <[email protected]>
AuthorDate: Tue Mar 25 18:07:08 2025 +0000
Fix: `date_part` to extract only the requested part (not the overall
interval) (#7189)
* extract parts out of nanoseconds for date_part (IntervalMonthDayNano)
* extract parts out of days and months (IntervalMonthDayNano)
* extract parts out of IntervalDayTime
* extract parts out of DurationSecond
* extract parts out of DurationMillisecond
* extract parts out of DurationMicrosecond
* extract parts out of DurationNanosecond
* ensure date part interval result consistent with duckdb/postgres
* revert datepart changes for duration types
---------
Co-authored-by: Andrew Lamb <[email protected]>
---
arrow-arith/src/temporal.rs | 141 ++++++++++++++++++++++++++++----------------
1 file changed, 91 insertions(+), 50 deletions(-)
diff --git a/arrow-arith/src/temporal.rs b/arrow-arith/src/temporal.rs
index a3fe7e8ad4..0b2b98b67b 100644
--- a/arrow-arith/src/temporal.rs
+++ b/arrow-arith/src/temporal.rs
@@ -464,11 +464,15 @@ impl ExtractDatePartExt for
PrimitiveArray<IntervalDayTimeType> {
DatePart::Week => Ok(self.unary_opt(|d| Some(d.days / 7))),
DatePart::Day => Ok(self.unary_opt(|d| Some(d.days))),
DatePart::Hour => Ok(self.unary_opt(|d| Some(d.milliseconds / (60
* 60 * 1_000)))),
- DatePart::Minute => Ok(self.unary_opt(|d| Some(d.milliseconds /
(60 * 1_000)))),
- DatePart::Second => Ok(self.unary_opt(|d| Some(d.milliseconds /
1_000))),
- DatePart::Millisecond => Ok(self.unary_opt(|d|
Some(d.milliseconds))),
- DatePart::Microsecond => Ok(self.unary_opt(|d|
d.milliseconds.checked_mul(1_000))),
- DatePart::Nanosecond => Ok(self.unary_opt(|d|
d.milliseconds.checked_mul(1_000_000))),
+ DatePart::Minute => Ok(self.unary_opt(|d| Some(d.milliseconds /
(60 * 1_000) % 60))),
+ DatePart::Second => Ok(self.unary_opt(|d| Some(d.milliseconds /
1_000 % 60))),
+ DatePart::Millisecond => Ok(self.unary_opt(|d| Some(d.milliseconds
% (60 * 1_000)))),
+ DatePart::Microsecond => {
+ Ok(self.unary_opt(|d| (d.milliseconds % (60 *
1_000)).checked_mul(1_000)))
+ }
+ DatePart::Nanosecond => {
+ Ok(self.unary_opt(|d| (d.milliseconds % (60 *
1_000)).checked_mul(1_000_000)))
+ }
DatePart::Quarter
| DatePart::Year
@@ -488,25 +492,31 @@ impl ExtractDatePartExt for
PrimitiveArray<IntervalMonthDayNanoType> {
fn date_part(&self, part: DatePart) -> Result<Int32Array, ArrowError> {
match part {
DatePart::Year => Ok(self.unary_opt(|d: IntervalMonthDayNano|
Some(d.months / 12))),
- DatePart::Month => Ok(self.unary_opt(|d: IntervalMonthDayNano|
Some(d.months))),
+ DatePart::Month => Ok(self.unary_opt(|d: IntervalMonthDayNano|
Some(d.months % 12))),
DatePart::Week => Ok(self.unary_opt(|d: IntervalMonthDayNano|
Some(d.days / 7))),
DatePart::Day => Ok(self.unary_opt(|d: IntervalMonthDayNano|
Some(d.days))),
DatePart::Hour => {
Ok(self.unary_opt(|d| (d.nanoseconds / (60 * 60 *
1_000_000_000)).try_into().ok()))
}
DatePart::Minute => {
- Ok(self.unary_opt(|d| (d.nanoseconds / (60 *
1_000_000_000)).try_into().ok()))
+ Ok(self.unary_opt(|d| (d.nanoseconds / (60 * 1_000_000_000) %
60).try_into().ok()))
}
DatePart::Second => {
- Ok(self.unary_opt(|d| (d.nanoseconds /
1_000_000_000).try_into().ok()))
+ Ok(self.unary_opt(|d| ((d.nanoseconds / 1_000_000_000) %
60).try_into().ok()))
}
- DatePart::Millisecond => {
- Ok(self.unary_opt(|d| (d.nanoseconds /
1_000_000).try_into().ok()))
- }
- DatePart::Microsecond => {
- Ok(self.unary_opt(|d| (d.nanoseconds / 1_000).try_into().ok()))
+ DatePart::Millisecond => Ok(self.unary_opt(|d| {
+ (d.nanoseconds % (60 * 1_000_000_000) / 1_000_000)
+ .try_into()
+ .ok()
+ })),
+ DatePart::Microsecond => Ok(self.unary_opt(|d| {
+ (d.nanoseconds % (60 * 1_000_000_000) / 1_000)
+ .try_into()
+ .ok()
+ })),
+ DatePart::Nanosecond => {
+ Ok(self.unary_opt(|d| (d.nanoseconds % (60 *
1_000_000_000)).try_into().ok()))
}
- DatePart::Nanosecond => Ok(self.unary_opt(|d|
d.nanoseconds.try_into().ok())),
DatePart::Quarter
| DatePart::WeekISO
@@ -1764,9 +1774,14 @@ mod tests {
fn test_interval_day_time_array() {
let input: IntervalDayTimeArray = vec![
IntervalDayTime::ZERO,
- IntervalDayTime::new(10, 42),
- IntervalDayTime::new(10, 1042),
- IntervalDayTime::new(10, MILLISECONDS_IN_DAY as i32 + 1),
+ IntervalDayTime::new(10, 42), // 10d, 42ms
+ IntervalDayTime::new(10, 1042), // 10d, 1s, 42ms
+ IntervalDayTime::new(10, MILLISECONDS_IN_DAY as i32 + 1), // 10d,
24h, 1ms
+ IntervalDayTime::new(
+ 6,
+ (MILLISECONDS * 60 * 60 * 4 + MILLISECONDS * 60 * 22 +
MILLISECONDS * 11 + 3)
+ as i32,
+ ), // 6d, 4h, 22m, 11s, 3ms
]
.into();
@@ -1777,6 +1792,7 @@ mod tests {
assert_eq!(10, actual.value(1));
assert_eq!(10, actual.value(2));
assert_eq!(10, actual.value(3));
+ assert_eq!(6, actual.value(4));
let actual = date_part(&input, DatePart::Week).unwrap();
let actual = actual.as_primitive::<Int32Type>();
@@ -1784,51 +1800,57 @@ mod tests {
assert_eq!(1, actual.value(1));
assert_eq!(1, actual.value(2));
assert_eq!(1, actual.value(3));
+ assert_eq!(0, actual.value(4));
// Days doesn't affect time.
- let actual = date_part(&input, DatePart::Nanosecond).unwrap();
+ let actual = date_part(&input, DatePart::Hour).unwrap();
let actual = actual.as_primitive::<Int32Type>();
assert_eq!(0, actual.value(0));
- assert_eq!(42_000_000, actual.value(1));
- assert_eq!(1_042_000_000, actual.value(2));
- // Overflow returns zero.
+ assert_eq!(0, actual.value(1));
+ assert_eq!(0, actual.value(2));
+ assert_eq!(24, actual.value(3));
+ assert_eq!(4, actual.value(4));
+
+ let actual = date_part(&input, DatePart::Minute).unwrap();
+ let actual = actual.as_primitive::<Int32Type>();
+ assert_eq!(0, actual.value(0));
+ assert_eq!(0, actual.value(1));
+ assert_eq!(0, actual.value(2));
assert_eq!(0, actual.value(3));
+ assert_eq!(22, actual.value(4));
- let actual = date_part(&input, DatePart::Microsecond).unwrap();
+ let actual = date_part(&input, DatePart::Second).unwrap();
let actual = actual.as_primitive::<Int32Type>();
assert_eq!(0, actual.value(0));
- assert_eq!(42_000, actual.value(1));
- assert_eq!(1_042_000, actual.value(2));
- // Overflow returns zero.
+ assert_eq!(0, actual.value(1));
+ assert_eq!(1, actual.value(2));
assert_eq!(0, actual.value(3));
+ assert_eq!(11, actual.value(4));
let actual = date_part(&input, DatePart::Millisecond).unwrap();
let actual = actual.as_primitive::<Int32Type>();
assert_eq!(0, actual.value(0));
assert_eq!(42, actual.value(1));
assert_eq!(1042, actual.value(2));
- assert_eq!(MILLISECONDS_IN_DAY as i32 + 1, actual.value(3));
-
- let actual = date_part(&input, DatePart::Second).unwrap();
- let actual = actual.as_primitive::<Int32Type>();
- assert_eq!(0, actual.value(0));
- assert_eq!(0, actual.value(1));
- assert_eq!(1, actual.value(2));
- assert_eq!(24 * 60 * 60, actual.value(3));
+ assert_eq!(1, actual.value(3));
+ assert_eq!(11003, actual.value(4));
- let actual = date_part(&input, DatePart::Minute).unwrap();
+ let actual = date_part(&input, DatePart::Microsecond).unwrap();
let actual = actual.as_primitive::<Int32Type>();
assert_eq!(0, actual.value(0));
- assert_eq!(0, actual.value(1));
- assert_eq!(0, actual.value(2));
- assert_eq!(24 * 60, actual.value(3));
+ assert_eq!(42_000, actual.value(1));
+ assert_eq!(1_042_000, actual.value(2));
+ assert_eq!(1_000, actual.value(3));
+ assert_eq!(11_003_000, actual.value(4));
- let actual = date_part(&input, DatePart::Hour).unwrap();
+ let actual = date_part(&input, DatePart::Nanosecond).unwrap();
let actual = actual.as_primitive::<Int32Type>();
assert_eq!(0, actual.value(0));
- assert_eq!(0, actual.value(1));
- assert_eq!(0, actual.value(2));
- assert_eq!(24, actual.value(3));
+ assert_eq!(42_000_000, actual.value(1));
+ assert_eq!(1_042_000_000, actual.value(2));
+ assert_eq!(1_000_000, actual.value(3));
+ // Overflow returns zero.
+ assert_eq!(0, actual.value(4));
// Month and year are not valid (since days in month varies).
assert!(date_part(&input, DatePart::Month).is_err());
@@ -1841,8 +1863,18 @@ mod tests {
fn test_interval_month_day_nano_array() {
let input: IntervalMonthDayNanoArray = vec![
IntervalMonthDayNano::ZERO,
- IntervalMonthDayNano::new(5, 10, 42),
- IntervalMonthDayNano::new(16, 35, MILLISECONDS_IN_DAY * 1_000_000
+ 1),
+ IntervalMonthDayNano::new(5, 10, 42), // 5m, 1w, 3d, 42ns
+ IntervalMonthDayNano::new(16, 35, NANOSECONDS_IN_DAY + 1), // 1y,
4m, 5w, 24h, 1ns
+ IntervalMonthDayNano::new(
+ 0,
+ 0,
+ NANOSECONDS * 60 * 60 * 4
+ + NANOSECONDS * 60 * 22
+ + NANOSECONDS * 11
+ + 1_000_000 * 33
+ + 1_000 * 44
+ + 5,
+ ), // 4hr, 22m, 11s, 33ms, 44us, 5ns
]
.into();
@@ -1852,12 +1884,14 @@ mod tests {
assert_eq!(0, actual.value(0));
assert_eq!(0, actual.value(1));
assert_eq!(1, actual.value(2));
+ assert_eq!(0, actual.value(3));
let actual = date_part(&input, DatePart::Month).unwrap();
let actual = actual.as_primitive::<Int32Type>();
assert_eq!(0, actual.value(0));
assert_eq!(5, actual.value(1));
- assert_eq!(16, actual.value(2));
+ assert_eq!(4, actual.value(2));
+ assert_eq!(0, actual.value(3));
// Week and day follow from day, but are not affected by months or
nanos.
let actual = date_part(&input, DatePart::Week).unwrap();
@@ -1865,12 +1899,14 @@ mod tests {
assert_eq!(0, actual.value(0));
assert_eq!(1, actual.value(1));
assert_eq!(5, actual.value(2));
+ assert_eq!(0, actual.value(3));
let actual = date_part(&input, DatePart::Day).unwrap();
let actual = actual.as_primitive::<Int32Type>();
assert_eq!(0, actual.value(0));
assert_eq!(10, actual.value(1));
assert_eq!(35, actual.value(2));
+ assert_eq!(0, actual.value(3));
// Times follow from nanos, but are not affected by months or days.
let actual = date_part(&input, DatePart::Hour).unwrap();
@@ -1878,38 +1914,43 @@ mod tests {
assert_eq!(0, actual.value(0));
assert_eq!(0, actual.value(1));
assert_eq!(24, actual.value(2));
+ assert_eq!(4, actual.value(3));
let actual = date_part(&input, DatePart::Minute).unwrap();
let actual = actual.as_primitive::<Int32Type>();
assert_eq!(0, actual.value(0));
assert_eq!(0, actual.value(1));
- assert_eq!(24 * 60, actual.value(2));
+ assert_eq!(0, actual.value(2));
+ assert_eq!(22, actual.value(3));
let actual = date_part(&input, DatePart::Second).unwrap();
let actual = actual.as_primitive::<Int32Type>();
assert_eq!(0, actual.value(0));
assert_eq!(0, actual.value(1));
- assert_eq!(24 * 60 * 60, actual.value(2));
+ assert_eq!(0, actual.value(2));
+ assert_eq!(11, actual.value(3));
let actual = date_part(&input, DatePart::Millisecond).unwrap();
let actual = actual.as_primitive::<Int32Type>();
assert_eq!(0, actual.value(0));
assert_eq!(0, actual.value(1));
- assert_eq!(24 * 60 * 60 * 1_000, actual.value(2));
+ assert_eq!(0, actual.value(2));
+ assert_eq!(11_033, actual.value(3));
let actual = date_part(&input, DatePart::Microsecond).unwrap();
let actual = actual.as_primitive::<Int32Type>();
assert_eq!(0, actual.value(0));
assert_eq!(0, actual.value(1));
- // Overflow gives zero.
assert_eq!(0, actual.value(2));
+ assert_eq!(11_033_044, actual.value(3));
let actual = date_part(&input, DatePart::Nanosecond).unwrap();
let actual = actual.as_primitive::<Int32Type>();
assert_eq!(0, actual.value(0));
assert_eq!(42, actual.value(1));
- // Overflow gives zero.
- assert_eq!(0, actual.value(2));
+ assert_eq!(1, actual.value(2));
+ // Overflow returns zero.
+ assert_eq!(0, actual.value(3));
}
#[test]