Jefffrey commented on code in PR #22610:
URL: https://github.com/apache/datafusion/pull/22610#discussion_r3377617476
##########
datafusion/functions/src/datetime/date_bin.rs:
##########
@@ -597,91 +564,51 @@ fn date_bin_impl(
return exec_err!("DATE_BIN stride must be non-zero");
}
- fn timestamp_scale<T: ArrowTimestampType>() -> i64 {
- match T::UNIT {
- Nanosecond => 1,
- Microsecond => NANOS_PER_MICRO,
- Millisecond => NANOS_PER_MILLI,
- Second => NANOSECONDS,
- }
- }
-
- fn timestamp_scale_overflow_error(x: i64) -> DataFusionError {
- DataFusionError::Execution(format!(
- "DATE_BIN source timestamp {x} cannot be represented in
nanoseconds"
- ))
+ fn transform_scalar_with_stride<T: ArrowTimestampType>(
+ value: Option<i64>,
+ origin: i64,
+ stride: i64,
+ stride_fn: BinFunction,
+ ) -> Option<i64> {
+ let scale = timestamp_scale(T::UNIT);
+ value.and_then(|val| {
+ checked_scale_to_nanos(val, scale)
+ .and_then(|scaled| stride_fn(stride, scaled, origin))
+ .map(|binned| binned / scale)
+ .ok()
Review Comment:
```suggestion
value
.and_then(|val| val.checked_mul(scale))
.and_then(|scaled| stride_fn(stride, scaled, origin).ok())
.map(|binned| binned / scale)
```
more clear that we dont care about the errors returned by
`checked_scale_to_nanos` so no need to use it
##########
datafusion/functions/src/datetime/date_bin.rs:
##########
@@ -420,14 +418,46 @@ fn date_bin_months_interval(stride_months: i64, source:
i64, origin: i64) -> Res
}
fn to_utc_date_time(nanos: i64) -> Result<DateTime<Utc>> {
- let secs = nanos / NANOS_PER_SEC;
- let nsec = (nanos % NANOS_PER_SEC) as u32;
+ // DateTime::from_timestamp requires a non-negative nanosecond part.
+ let secs = nanos.div_euclid(NANOS_PER_SEC);
+ let nsec = nanos.rem_euclid(NANOS_PER_SEC) as u32;
Review Comment:
whats the significance of changing to `div_euclid`/`rem_euclid` here?
##########
datafusion/functions/src/datetime/date_bin.rs:
##########
@@ -420,14 +418,46 @@ fn date_bin_months_interval(stride_months: i64, source:
i64, origin: i64) -> Res
}
fn to_utc_date_time(nanos: i64) -> Result<DateTime<Utc>> {
- let secs = nanos / NANOS_PER_SEC;
- let nsec = (nanos % NANOS_PER_SEC) as u32;
+ // DateTime::from_timestamp requires a non-negative nanosecond part.
+ let secs = nanos.div_euclid(NANOS_PER_SEC);
+ let nsec = nanos.rem_euclid(NANOS_PER_SEC) as u32;
match DateTime::from_timestamp(secs, nsec) {
Some(dt) => Ok(dt),
None => exec_err!("Invalid timestamp value"),
}
}
+fn timestamp_scale(unit: TimeUnit) -> i64 {
Review Comment:
we could probably keep the original version being generic over
`ArrowTimestampType`
##########
datafusion/functions/src/datetime/date_bin.rs:
##########
@@ -751,24 +673,15 @@ fn date_bin_impl(
T: ArrowTimestampType,
{
let array = as_primitive_array::<T>(array)?;
- let scale = timestamp_scale::<T>();
-
- let values = array
- .iter()
- .map(|val| match val {
- Some(val) => {
- let scaled = val
- .checked_mul(scale)
- .ok_or_else(||
timestamp_scale_overflow_error(val))?;
- Ok(stride_fn(stride, scaled, origin)
- .ok()
- .map(|binned| binned / scale))
- }
- None => Ok(None),
- })
- .collect::<Result<Vec<_>>>()?;
-
- let result = PrimitiveArray::<T>::from_iter(values);
+ let scale = timestamp_scale(T::UNIT);
+
+ // Per-row errors become NULL, matching scalar behavior.
+ let result: PrimitiveArray<T> = array.unary_opt(|val| {
+ checked_scale_to_nanos(val, scale)
Review Comment:
similarly here we dont really need `checked_scale_to_nanos` if we're going
to swallow the error anyway
##########
datafusion/functions/src/datetime/date_bin.rs:
##########
@@ -690,13 +617,12 @@ fn date_bin_impl(
return exec_err!("DATE_BIN with Time32 source requires Time32
origin");
}
let result = v.and_then(|x| {
Review Comment:
similarly here
```rust
let result = v
.and_then(|x| (x as i64).checked_mul(NANOS_PER_MILLI))
.and_then(|scaled| stride_fn(stride, scaled, origin).ok())
.map(|binned| ((binned % NANOSECONDS_IN_DAY) /
NANOS_PER_MILLI) as i32);
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]