Xuanwo commented on code in PR #794: URL: https://github.com/apache/iceberg-rust/pull/794#discussion_r1884853758
########## crates/iceberg/src/spec/manifest.rs: ########## @@ -128,7 +130,61 @@ pub struct ManifestWriter { key_metadata: Vec<u8>, - field_summary: HashMap<i32, FieldSummary>, + partitions: Vec<Struct>, +} + +struct PartitionFieldStats { + partition_type: PrimitiveType, + summary: FieldSummary, +} + +impl PartitionFieldStats { + pub(crate) fn new(partition_type: PrimitiveType) -> Self { + Self { + partition_type, + summary: Default::default(), + } + } + + pub(crate) fn update(&mut self, value: Option<PrimitiveLiteral>) -> Result<()> { + if let Some(value) = value { Review Comment: Hi, how about using the following pattern to make code more readable? ```rust let Some(value) = value else { self.summary.contains_null = true; return } ``` ########## crates/iceberg/src/spec/manifest_list.rs: ########## @@ -857,12 +857,22 @@ pub(super) mod _serde { contains_nan: self.contains_nan, lower_bound: self .lower_bound - .map(|v| Datum::try_from_bytes(&v, r#type.clone())) - .transpose()?, + .as_ref() + .map(|v| Datum::try_from_bytes(v, r#type.clone())) + .transpose() + .map_err(|err| { + err.with_context("type", format!("{:?}", r#type)) + .with_context("bytes", format!("{:?}", self.lower_bound)) Review Comment: `bytes` could be large and usually not human-readable, how about ignore this from error? ########## crates/iceberg/src/spec/manifest.rs: ########## @@ -128,7 +130,61 @@ pub struct ManifestWriter { key_metadata: Vec<u8>, - field_summary: HashMap<i32, FieldSummary>, + partitions: Vec<Struct>, +} + +struct PartitionFieldStats { + partition_type: PrimitiveType, + summary: FieldSummary, +} + +impl PartitionFieldStats { + pub(crate) fn new(partition_type: PrimitiveType) -> Self { + Self { + partition_type, + summary: Default::default(), + } + } + + pub(crate) fn update(&mut self, value: Option<PrimitiveLiteral>) -> Result<()> { + if let Some(value) = value { + if !self.partition_type.compatible(&value) { + return Err(Error::new( + ErrorKind::DataInvalid, + "value is not compatitable with type", + )); + } + let value = Datum::new(self.partition_type.clone(), value); + if value.is_nan() { + self.summary.contains_nan = Some(true); Review Comment: How about using early return to make code more reabable? It's hard to me to find out which `if` I'm in. ########## crates/iceberg/src/spec/manifest.rs: ########## @@ -128,7 +130,61 @@ pub struct ManifestWriter { key_metadata: Vec<u8>, - field_summary: HashMap<i32, FieldSummary>, + partitions: Vec<Struct>, +} + +struct PartitionFieldStats { + partition_type: PrimitiveType, + summary: FieldSummary, +} + +impl PartitionFieldStats { + pub(crate) fn new(partition_type: PrimitiveType) -> Self { + Self { + partition_type, + summary: Default::default(), Review Comment: Better to use `FieldSummary::default()` to make it more reabable. ########## crates/iceberg/src/spec/manifest.rs: ########## @@ -128,7 +130,61 @@ pub struct ManifestWriter { key_metadata: Vec<u8>, - field_summary: HashMap<i32, FieldSummary>, + partitions: Vec<Struct>, +} + +struct PartitionFieldStats { + partition_type: PrimitiveType, + summary: FieldSummary, +} + +impl PartitionFieldStats { + pub(crate) fn new(partition_type: PrimitiveType) -> Self { + Self { + partition_type, + summary: Default::default(), + } + } + + pub(crate) fn update(&mut self, value: Option<PrimitiveLiteral>) -> Result<()> { + if let Some(value) = value { + if !self.partition_type.compatible(&value) { + return Err(Error::new( + ErrorKind::DataInvalid, + "value is not compatitable with type", + )); + } + let value = Datum::new(self.partition_type.clone(), value); Review Comment: I'm a bit confused why `PrimitiveType` is not `Copy`? ########## crates/iceberg/src/spec/manifest.rs: ########## @@ -128,7 +130,61 @@ pub struct ManifestWriter { key_metadata: Vec<u8>, - field_summary: HashMap<i32, FieldSummary>, + partitions: Vec<Struct>, +} + +struct PartitionFieldStats { + partition_type: PrimitiveType, + summary: FieldSummary, +} + +impl PartitionFieldStats { + pub(crate) fn new(partition_type: PrimitiveType) -> Self { + Self { + partition_type, + summary: Default::default(), + } + } + + pub(crate) fn update(&mut self, value: Option<PrimitiveLiteral>) -> Result<()> { + if let Some(value) = value { + if !self.partition_type.compatible(&value) { + return Err(Error::new( + ErrorKind::DataInvalid, + "value is not compatitable with type", + )); + } + let value = Datum::new(self.partition_type.clone(), value); + if value.is_nan() { + self.summary.contains_nan = Some(true); + } else { + if let Some(lower) = self.summary.lower_bound.as_mut() { + if value < *lower { + *lower = value.clone(); + } + } else { + self.summary.lower_bound = Some(value.clone()); + } + if let Some(upper) = self.summary.upper_bound.as_mut() { + if value > *upper { + *upper = value; + } + } else { + self.summary.upper_bound = Some(value); + } + } Review Comment: How about using: ```rust self.summary.upper_bound = Some(self.summary.upper_bound.map_or(value, |u| u.max(value))); ``` ########## crates/iceberg/src/spec/manifest.rs: ########## @@ -1518,6 +1540,7 @@ mod _serde { #[cfg(test)] mod tests { + use core::f32; Review Comment: I'm guessing we don't need this? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org