liurenjie1024 commented on code in PR #42: URL: https://github.com/apache/iceberg-rust/pull/42#discussion_r1308133680
########## crates/iceberg/src/transform/temporal.rs: ########## @@ -0,0 +1,669 @@ +use super::TransformFunction; +use crate::{Error, Result}; +use arrow::array::{ + Array, Date64Array, TimestampMicrosecondArray, TimestampMillisecondArray, + TimestampNanosecondArray, TimestampSecondArray, +}; +use arrow::compute::binary; +use arrow::datatypes; +use arrow::datatypes::DataType; +use arrow::{ + array::{ArrayRef, Date32Array, Int32Array}, + compute::{month_dyn, year_dyn}, +}; +use chrono::Datelike; +use std::sync::Arc; + +/// 719163 is the number of days from 0000-01-01 to 1970-01-01 +const EPOCH_DAY_FROM_CE: i32 = 719163; +const DAY_PER_SECOND: f64 = 0.0000115741; +const HOUR_PER_SECOND: f64 = 1_f64 / 3600.0; + +/// Extract a date or timestamp year, as years from 1970 +pub struct Year; + +impl TransformFunction for Year { + fn transform(&self, input: ArrayRef) -> Result<ArrayRef> { + let array = year_dyn(&input).map_err(|err| { + Error::new( + crate::ErrorKind::ArrowCompute, + format!("error in transformfunction: {}", err), + ) + })?; + Ok(Arc::<Int32Array>::new( + array + .as_any() + .downcast_ref::<Int32Array>() + .unwrap() + .unary(|v| v - 1970), Review Comment: Please replace `1970` with some constants. ########## crates/iceberg/src/transform/temporal.rs: ########## @@ -0,0 +1,669 @@ +use super::TransformFunction; +use crate::{Error, Result}; +use arrow::array::{ + Array, Date64Array, TimestampMicrosecondArray, TimestampMillisecondArray, + TimestampNanosecondArray, TimestampSecondArray, +}; +use arrow::compute::binary; +use arrow::datatypes; +use arrow::datatypes::DataType; +use arrow::{ + array::{ArrayRef, Date32Array, Int32Array}, + compute::{month_dyn, year_dyn}, +}; +use chrono::Datelike; +use std::sync::Arc; + +/// 719163 is the number of days from 0000-01-01 to 1970-01-01 +const EPOCH_DAY_FROM_CE: i32 = 719163; +const DAY_PER_SECOND: f64 = 0.0000115741; +const HOUR_PER_SECOND: f64 = 1_f64 / 3600.0; + +/// Extract a date or timestamp year, as years from 1970 +pub struct Year; + +impl TransformFunction for Year { + fn transform(&self, input: ArrayRef) -> Result<ArrayRef> { + let array = year_dyn(&input).map_err(|err| { + Error::new( + crate::ErrorKind::ArrowCompute, + format!("error in transformfunction: {}", err), + ) + })?; + Ok(Arc::<Int32Array>::new( + array + .as_any() + .downcast_ref::<Int32Array>() + .unwrap() + .unary(|v| v - 1970), + )) + } +} + +/// Extract a date or timestamp month, as months from 1970-01-01 +pub struct Month; + +impl TransformFunction for Month { + fn transform(&self, input: ArrayRef) -> Result<ArrayRef> { + let year_array = year_dyn(&input) + .map_err(|err| Error::new(crate::ErrorKind::ArrowCompute, format!("{err}")))?; + let year_array: Int32Array = year_array + .as_any() + .downcast_ref::<Int32Array>() + .unwrap() + .unary(|v| 12 * (v - 1970)); Review Comment: As above. ########## crates/iceberg/src/transform/temporal.rs: ########## @@ -0,0 +1,669 @@ +use super::TransformFunction; +use crate::{Error, Result}; +use arrow::array::{ + Array, Date64Array, TimestampMicrosecondArray, TimestampMillisecondArray, + TimestampNanosecondArray, TimestampSecondArray, +}; +use arrow::compute::binary; +use arrow::datatypes; +use arrow::datatypes::DataType; +use arrow::{ + array::{ArrayRef, Date32Array, Int32Array}, + compute::{month_dyn, year_dyn}, +}; +use chrono::Datelike; +use std::sync::Arc; + +/// 719163 is the number of days from 0000-01-01 to 1970-01-01 +const EPOCH_DAY_FROM_CE: i32 = 719163; +const DAY_PER_SECOND: f64 = 0.0000115741; +const HOUR_PER_SECOND: f64 = 1_f64 / 3600.0; + +/// Extract a date or timestamp year, as years from 1970 +pub struct Year; + +impl TransformFunction for Year { + fn transform(&self, input: ArrayRef) -> Result<ArrayRef> { + let array = year_dyn(&input).map_err(|err| { + Error::new( + crate::ErrorKind::ArrowCompute, + format!("error in transformfunction: {}", err), + ) + })?; + Ok(Arc::<Int32Array>::new( + array + .as_any() + .downcast_ref::<Int32Array>() + .unwrap() + .unary(|v| v - 1970), + )) + } +} + +/// Extract a date or timestamp month, as months from 1970-01-01 +pub struct Month; + +impl TransformFunction for Month { + fn transform(&self, input: ArrayRef) -> Result<ArrayRef> { + let year_array = year_dyn(&input) + .map_err(|err| Error::new(crate::ErrorKind::ArrowCompute, format!("{err}")))?; + let year_array: Int32Array = year_array + .as_any() + .downcast_ref::<Int32Array>() + .unwrap() + .unary(|v| 12 * (v - 1970)); + let month_array = month_dyn(&input) + .map_err(|err| Error::new(crate::ErrorKind::ArrowCompute, format!("{err}")))?; + Ok(Arc::<Int32Array>::new( + binary( + month_array.as_any().downcast_ref::<Int32Array>().unwrap(), + year_array.as_any().downcast_ref::<Int32Array>().unwrap(), + // Compute month from 1970-01-01, so minus 1 here. + |a, b| a + b - 1, + ) + .unwrap(), + )) + } +} + +/// Extract a date or timestamp day, as days from 1970-01-01 +pub struct Day; + +impl TransformFunction for Day { + fn transform(&self, input: ArrayRef) -> Result<ArrayRef> { + let res: Int32Array = match input.data_type() { + DataType::Timestamp(unit, _) => match unit { + datatypes::TimeUnit::Second => input + .as_any() + .downcast_ref::<TimestampSecondArray>() + .unwrap() + .unary(|v| -> i32 { (v as f64 * DAY_PER_SECOND) as i32 }), + datatypes::TimeUnit::Millisecond => input + .as_any() + .downcast_ref::<TimestampMillisecondArray>() + .unwrap() + .unary(|v| -> i32 { (v as f64 / 1000.0 * DAY_PER_SECOND) as i32 }), + datatypes::TimeUnit::Microsecond => input + .as_any() + .downcast_ref::<TimestampMicrosecondArray>() + .unwrap() + .unary(|v| -> i32 { (v as f64 / 1000.0 / 1000.0 * DAY_PER_SECOND) as i32 }), + datatypes::TimeUnit::Nanosecond => input + .as_any() + .downcast_ref::<TimestampNanosecondArray>() + .unwrap() + .unary(|v| -> i32 { + (v as f64 / 1000.0 / 1000.0 / 1000.0 * DAY_PER_SECOND) as i32 + }), + }, + DataType::Date32 => { + input + .as_any() + .downcast_ref::<Date32Array>() + .unwrap() + .unary(|v| -> i32 { + datatypes::Date32Type::to_naive_date(v).num_days_from_ce() + - EPOCH_DAY_FROM_CE + }) + } + DataType::Date64 => { + input + .as_any() + .downcast_ref::<Date64Array>() + .unwrap() + .unary(|v| -> i32 { + datatypes::Date64Type::to_naive_date(v).num_days_from_ce() + - EPOCH_DAY_FROM_CE + }) + } + _ => unreachable!( + "Should not call transform in Day with type {:?}", + input.data_type() + ), + }; + Ok(Arc::new(res)) Review Comment: Missing timestamp with timezone? ########## crates/iceberg/src/transform/temporal.rs: ########## @@ -0,0 +1,669 @@ +use super::TransformFunction; +use crate::{Error, Result}; +use arrow::array::{ + Array, Date64Array, TimestampMicrosecondArray, TimestampMillisecondArray, + TimestampNanosecondArray, TimestampSecondArray, +}; +use arrow::compute::binary; +use arrow::datatypes; +use arrow::datatypes::DataType; +use arrow::{ + array::{ArrayRef, Date32Array, Int32Array}, + compute::{month_dyn, year_dyn}, +}; +use chrono::Datelike; +use std::sync::Arc; + +/// 719163 is the number of days from 0000-01-01 to 1970-01-01 +const EPOCH_DAY_FROM_CE: i32 = 719163; +const DAY_PER_SECOND: f64 = 0.0000115741; +const HOUR_PER_SECOND: f64 = 1_f64 / 3600.0; + +/// Extract a date or timestamp year, as years from 1970 +pub struct Year; + +impl TransformFunction for Year { + fn transform(&self, input: ArrayRef) -> Result<ArrayRef> { + let array = year_dyn(&input).map_err(|err| { + Error::new( + crate::ErrorKind::ArrowCompute, + format!("error in transformfunction: {}", err), + ) + })?; + Ok(Arc::<Int32Array>::new( + array + .as_any() + .downcast_ref::<Int32Array>() + .unwrap() + .unary(|v| v - 1970), + )) + } +} + +/// Extract a date or timestamp month, as months from 1970-01-01 +pub struct Month; + +impl TransformFunction for Month { + fn transform(&self, input: ArrayRef) -> Result<ArrayRef> { + let year_array = year_dyn(&input) + .map_err(|err| Error::new(crate::ErrorKind::ArrowCompute, format!("{err}")))?; + let year_array: Int32Array = year_array + .as_any() + .downcast_ref::<Int32Array>() + .unwrap() + .unary(|v| 12 * (v - 1970)); + let month_array = month_dyn(&input) + .map_err(|err| Error::new(crate::ErrorKind::ArrowCompute, format!("{err}")))?; + Ok(Arc::<Int32Array>::new( + binary( + month_array.as_any().downcast_ref::<Int32Array>().unwrap(), + year_array.as_any().downcast_ref::<Int32Array>().unwrap(), + // Compute month from 1970-01-01, so minus 1 here. + |a, b| a + b - 1, + ) + .unwrap(), + )) + } +} + +/// Extract a date or timestamp day, as days from 1970-01-01 +pub struct Day; + +impl TransformFunction for Day { + fn transform(&self, input: ArrayRef) -> Result<ArrayRef> { + let res: Int32Array = match input.data_type() { + DataType::Timestamp(unit, _) => match unit { + datatypes::TimeUnit::Second => input + .as_any() + .downcast_ref::<TimestampSecondArray>() + .unwrap() + .unary(|v| -> i32 { (v as f64 * DAY_PER_SECOND) as i32 }), + datatypes::TimeUnit::Millisecond => input Review Comment: Currently we iceberg only supports microseconds timestamp, we should remove unsupport data types. ########## crates/iceberg/src/transform/temporal.rs: ########## @@ -0,0 +1,669 @@ +use super::TransformFunction; +use crate::{Error, Result}; +use arrow::array::{ + Array, Date64Array, TimestampMicrosecondArray, TimestampMillisecondArray, + TimestampNanosecondArray, TimestampSecondArray, +}; +use arrow::compute::binary; +use arrow::datatypes; +use arrow::datatypes::DataType; +use arrow::{ + array::{ArrayRef, Date32Array, Int32Array}, + compute::{month_dyn, year_dyn}, +}; +use chrono::Datelike; +use std::sync::Arc; + +/// 719163 is the number of days from 0000-01-01 to 1970-01-01 +const EPOCH_DAY_FROM_CE: i32 = 719163; +const DAY_PER_SECOND: f64 = 0.0000115741; +const HOUR_PER_SECOND: f64 = 1_f64 / 3600.0; + +/// Extract a date or timestamp year, as years from 1970 +pub struct Year; + +impl TransformFunction for Year { + fn transform(&self, input: ArrayRef) -> Result<ArrayRef> { + let array = year_dyn(&input).map_err(|err| { + Error::new( + crate::ErrorKind::ArrowCompute, + format!("error in transformfunction: {}", err), + ) + })?; + Ok(Arc::<Int32Array>::new( + array + .as_any() + .downcast_ref::<Int32Array>() + .unwrap() + .unary(|v| v - 1970), + )) + } +} + +/// Extract a date or timestamp month, as months from 1970-01-01 +pub struct Month; + +impl TransformFunction for Month { + fn transform(&self, input: ArrayRef) -> Result<ArrayRef> { + let year_array = year_dyn(&input) + .map_err(|err| Error::new(crate::ErrorKind::ArrowCompute, format!("{err}")))?; + let year_array: Int32Array = year_array + .as_any() + .downcast_ref::<Int32Array>() + .unwrap() + .unary(|v| 12 * (v - 1970)); + let month_array = month_dyn(&input) + .map_err(|err| Error::new(crate::ErrorKind::ArrowCompute, format!("{err}")))?; + Ok(Arc::<Int32Array>::new( + binary( + month_array.as_any().downcast_ref::<Int32Array>().unwrap(), + year_array.as_any().downcast_ref::<Int32Array>().unwrap(), + // Compute month from 1970-01-01, so minus 1 here. + |a, b| a + b - 1, + ) + .unwrap(), + )) + } +} + +/// Extract a date or timestamp day, as days from 1970-01-01 +pub struct Day; + +impl TransformFunction for Day { + fn transform(&self, input: ArrayRef) -> Result<ArrayRef> { + let res: Int32Array = match input.data_type() { + DataType::Timestamp(unit, _) => match unit { + datatypes::TimeUnit::Second => input + .as_any() + .downcast_ref::<TimestampSecondArray>() + .unwrap() + .unary(|v| -> i32 { (v as f64 * DAY_PER_SECOND) as i32 }), + datatypes::TimeUnit::Millisecond => input + .as_any() + .downcast_ref::<TimestampMillisecondArray>() + .unwrap() + .unary(|v| -> i32 { (v as f64 / 1000.0 * DAY_PER_SECOND) as i32 }), + datatypes::TimeUnit::Microsecond => input + .as_any() + .downcast_ref::<TimestampMicrosecondArray>() + .unwrap() + .unary(|v| -> i32 { (v as f64 / 1000.0 / 1000.0 * DAY_PER_SECOND) as i32 }), + datatypes::TimeUnit::Nanosecond => input + .as_any() + .downcast_ref::<TimestampNanosecondArray>() + .unwrap() + .unary(|v| -> i32 { + (v as f64 / 1000.0 / 1000.0 / 1000.0 * DAY_PER_SECOND) as i32 + }), + }, + DataType::Date32 => { + input + .as_any() + .downcast_ref::<Date32Array>() + .unwrap() + .unary(|v| -> i32 { + datatypes::Date32Type::to_naive_date(v).num_days_from_ce() + - EPOCH_DAY_FROM_CE + }) + } + DataType::Date64 => { Review Comment: Currenlty only 32bit dates are supported, remove this? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org