liurenjie1024 commented on code in PR #287: URL: https://github.com/apache/iceberg-rust/pull/287#discussion_r1533295727
########## crates/iceberg/src/transform/mod.rs: ########## @@ -31,6 +34,8 @@ pub trait TransformFunction: Send { /// The implementation of this function will need to check and downcast the input to specific /// type. fn transform(&self, input: ArrayRef) -> Result<ArrayRef>; + /// transform_literal will take an input literal and transform it into a new literal. + fn transform_literal(&self, input: &Literal) -> Result<Option<Literal>>; Review Comment: I agree with @marvinlanhenke that we should accept `Datum` and return `Datum`. In theory `Literal` should be physical value only, while `Datum` is type + value. I'm thinking reducing literal enums in future. ########## crates/iceberg/src/transform/temporal.rs: ########## @@ -51,12 +61,48 @@ impl TransformFunction for Year { .unary(|v| v - UNIX_EPOCH_YEAR), )) } + + fn transform_literal( + &self, + input: &crate::spec::Literal, + ) -> Result<Option<crate::spec::Literal>> { + match input { + Literal::Primitive(PrimitiveLiteral::Date(v)) => Ok(Some(Literal::Primitive( + PrimitiveLiteral::Int(Date32Type::to_naive_date(*v).year() - UNIX_EPOCH_YEAR), + ))), + Literal::Primitive(PrimitiveLiteral::Timestamp(v)) => Ok(Some(Literal::Primitive( + PrimitiveLiteral::Int(Self::timestamp_to_year(*v)), + ))), + Literal::Primitive(PrimitiveLiteral::TimestampTZ(v)) => Ok(Some(Literal::Primitive( + PrimitiveLiteral::Int(Self::timestamp_to_year(*v)), + ))), + _ => unreachable!("Should not call internally for unsupported literal type"), Review Comment: We should not panic here, we should return error. ########## crates/iceberg/src/transform/truncate.rs: ########## @@ -99,13 +118,44 @@ impl TransformFunction for Truncate { .downcast_ref::<arrow_array::LargeStringArray>() .unwrap() .iter() - .map(|v| v.map(|v| Self::truncate_str_by_char(v, len))), + .map(|v| v.map(|v| Self::truncate_str(v, len))), ); Ok(Arc::new(res)) } _ => unreachable!("Truncate transform only supports (int,long,decimal,string) types"), } } + + fn transform_literal(&self, input: &Literal) -> crate::Result<Option<Literal>> { + match input { + Literal::Primitive(PrimitiveLiteral::Int(v)) => Ok(Some({ + let width: i32 = self.width.try_into().map_err(|_| { + Error::new( + crate::ErrorKind::DataInvalid, + "width is failed to convert to i32 when truncate Int32Array", + ) + })?; + Literal::Primitive(PrimitiveLiteral::Int(Self::truncate_i32(*v, width))) + })), + Literal::Primitive(PrimitiveLiteral::Long(v)) => Ok(Some({ + let width = self.width as i64; + Literal::Primitive(PrimitiveLiteral::Long(Self::truncate_i64(*v, width))) + })), + Literal::Primitive(PrimitiveLiteral::Decimal(v)) => Ok(Some({ + let width = self.width as i128; + Literal::Primitive(PrimitiveLiteral::Decimal(Self::truncate_decimal_i128( + *v, width, + ))) + })), + Literal::Primitive(PrimitiveLiteral::String(v)) => Ok(Some({ + let len = self.width as usize; + Literal::Primitive(PrimitiveLiteral::String( + Self::truncate_str(v, len).to_string(), + )) + })), + _ => unreachable!("Truncate transform only supports (int,long,decimal,string) types"), Review Comment: Ditto ########## crates/iceberg/src/transform/temporal.rs: ########## @@ -51,12 +61,48 @@ impl TransformFunction for Year { .unary(|v| v - UNIX_EPOCH_YEAR), )) } + + fn transform_literal( + &self, + input: &crate::spec::Literal, + ) -> Result<Option<crate::spec::Literal>> { + match input { + Literal::Primitive(PrimitiveLiteral::Date(v)) => Ok(Some(Literal::Primitive( + PrimitiveLiteral::Int(Date32Type::to_naive_date(*v).year() - UNIX_EPOCH_YEAR), + ))), + Literal::Primitive(PrimitiveLiteral::Timestamp(v)) => Ok(Some(Literal::Primitive( + PrimitiveLiteral::Int(Self::timestamp_to_year(*v)), + ))), + Literal::Primitive(PrimitiveLiteral::TimestampTZ(v)) => Ok(Some(Literal::Primitive( + PrimitiveLiteral::Int(Self::timestamp_to_year(*v)), + ))), + _ => unreachable!("Should not call internally for unsupported literal type"), + } + } } /// Extract a date or timestamp month, as months from 1970-01-01 #[derive(Debug)] pub struct Month; +impl Month { + #[inline] + fn timestamp_to_month(timestamp: i64) -> i32 { Review Comment: This is incorrect, we should the month is accumulated month, see https://github.com/apache/iceberg/blob/9d42eb44a32ff4a1f86e8427fe023bde1e9652f7/api/src/test/java/org/apache/iceberg/transforms/TestDates.java#L56 ########## crates/iceberg/src/transform/temporal.rs: ########## @@ -39,6 +38,17 @@ const UNIX_EPOCH_YEAR: i32 = 1970; #[derive(Debug)] pub struct Year; +impl Year { + #[inline] + fn timestamp_to_year(timestamp: i64) -> i32 { + (DateTime::from_timestamp_micros(timestamp) + .unwrap() + .signed_duration_since(DateTime::from_timestamp(0, 0).unwrap()) + .num_days() + / 365) as i32 Review Comment: This is incorrect, leap year maybe not 365 days. We should use `NaiveDate::years_since` ########## crates/iceberg/src/transform/temporal.rs: ########## @@ -39,6 +38,17 @@ const UNIX_EPOCH_YEAR: i32 = 1970; #[derive(Debug)] pub struct Year; +impl Year { + #[inline] + fn timestamp_to_year(timestamp: i64) -> i32 { + (DateTime::from_timestamp_micros(timestamp) + .unwrap() + .signed_duration_since(DateTime::from_timestamp(0, 0).unwrap()) + .num_days() + / 365) as i32 Review Comment: Or you can use [`NaiveTime::year`](https://docs.rs/chrono/latest/chrono/trait.Datelike.html#tymethod.year) -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org