Xuanwo commented on code in PR #794:
URL: https://github.com/apache/iceberg-rust/pull/794#discussion_r1884853758


##########
crates/iceberg/src/spec/manifest.rs:
##########
@@ -128,7 +130,61 @@ pub struct ManifestWriter {
 
     key_metadata: Vec<u8>,
 
-    field_summary: HashMap<i32, FieldSummary>,
+    partitions: Vec<Struct>,
+}
+
+struct PartitionFieldStats {
+    partition_type: PrimitiveType,
+    summary: FieldSummary,
+}
+
+impl PartitionFieldStats {
+    pub(crate) fn new(partition_type: PrimitiveType) -> Self {
+        Self {
+            partition_type,
+            summary: Default::default(),
+        }
+    }
+
+    pub(crate) fn update(&mut self, value: Option<PrimitiveLiteral>) -> 
Result<()> {
+        if let Some(value) = value {

Review Comment:
   Hi, how about using the following pattern to make code more readable?
   
   ```rust
   let Some(value) = value else {
      self.summary.contains_null = true;
      return 
   }
   ```



##########
crates/iceberg/src/spec/manifest_list.rs:
##########
@@ -857,12 +857,22 @@ pub(super) mod _serde {
                 contains_nan: self.contains_nan,
                 lower_bound: self
                     .lower_bound
-                    .map(|v| Datum::try_from_bytes(&v, r#type.clone()))
-                    .transpose()?,
+                    .as_ref()
+                    .map(|v| Datum::try_from_bytes(v, r#type.clone()))
+                    .transpose()
+                    .map_err(|err| {
+                        err.with_context("type", format!("{:?}", r#type))
+                            .with_context("bytes", format!("{:?}", 
self.lower_bound))

Review Comment:
   `bytes` could be large and usually not human-readable, how about ignore this 
from error?



##########
crates/iceberg/src/spec/manifest.rs:
##########
@@ -128,7 +130,61 @@ pub struct ManifestWriter {
 
     key_metadata: Vec<u8>,
 
-    field_summary: HashMap<i32, FieldSummary>,
+    partitions: Vec<Struct>,
+}
+
+struct PartitionFieldStats {
+    partition_type: PrimitiveType,
+    summary: FieldSummary,
+}
+
+impl PartitionFieldStats {
+    pub(crate) fn new(partition_type: PrimitiveType) -> Self {
+        Self {
+            partition_type,
+            summary: Default::default(),
+        }
+    }
+
+    pub(crate) fn update(&mut self, value: Option<PrimitiveLiteral>) -> 
Result<()> {
+        if let Some(value) = value {
+            if !self.partition_type.compatible(&value) {
+                return Err(Error::new(
+                    ErrorKind::DataInvalid,
+                    "value is not compatitable with type",
+                ));
+            }
+            let value = Datum::new(self.partition_type.clone(), value);
+            if value.is_nan() {
+                self.summary.contains_nan = Some(true);

Review Comment:
   How about using early return to make code more reabable? It's hard to me to 
find out which `if` I'm in.



##########
crates/iceberg/src/spec/manifest.rs:
##########
@@ -128,7 +130,61 @@ pub struct ManifestWriter {
 
     key_metadata: Vec<u8>,
 
-    field_summary: HashMap<i32, FieldSummary>,
+    partitions: Vec<Struct>,
+}
+
+struct PartitionFieldStats {
+    partition_type: PrimitiveType,
+    summary: FieldSummary,
+}
+
+impl PartitionFieldStats {
+    pub(crate) fn new(partition_type: PrimitiveType) -> Self {
+        Self {
+            partition_type,
+            summary: Default::default(),

Review Comment:
   Better to use `FieldSummary::default()` to make it more reabable.



##########
crates/iceberg/src/spec/manifest.rs:
##########
@@ -128,7 +130,61 @@ pub struct ManifestWriter {
 
     key_metadata: Vec<u8>,
 
-    field_summary: HashMap<i32, FieldSummary>,
+    partitions: Vec<Struct>,
+}
+
+struct PartitionFieldStats {
+    partition_type: PrimitiveType,
+    summary: FieldSummary,
+}
+
+impl PartitionFieldStats {
+    pub(crate) fn new(partition_type: PrimitiveType) -> Self {
+        Self {
+            partition_type,
+            summary: Default::default(),
+        }
+    }
+
+    pub(crate) fn update(&mut self, value: Option<PrimitiveLiteral>) -> 
Result<()> {
+        if let Some(value) = value {
+            if !self.partition_type.compatible(&value) {
+                return Err(Error::new(
+                    ErrorKind::DataInvalid,
+                    "value is not compatitable with type",
+                ));
+            }
+            let value = Datum::new(self.partition_type.clone(), value);

Review Comment:
   I'm a bit confused why `PrimitiveType` is not `Copy`?



##########
crates/iceberg/src/spec/manifest.rs:
##########
@@ -128,7 +130,61 @@ pub struct ManifestWriter {
 
     key_metadata: Vec<u8>,
 
-    field_summary: HashMap<i32, FieldSummary>,
+    partitions: Vec<Struct>,
+}
+
+struct PartitionFieldStats {
+    partition_type: PrimitiveType,
+    summary: FieldSummary,
+}
+
+impl PartitionFieldStats {
+    pub(crate) fn new(partition_type: PrimitiveType) -> Self {
+        Self {
+            partition_type,
+            summary: Default::default(),
+        }
+    }
+
+    pub(crate) fn update(&mut self, value: Option<PrimitiveLiteral>) -> 
Result<()> {
+        if let Some(value) = value {
+            if !self.partition_type.compatible(&value) {
+                return Err(Error::new(
+                    ErrorKind::DataInvalid,
+                    "value is not compatitable with type",
+                ));
+            }
+            let value = Datum::new(self.partition_type.clone(), value);
+            if value.is_nan() {
+                self.summary.contains_nan = Some(true);
+            } else {
+                if let Some(lower) = self.summary.lower_bound.as_mut() {
+                    if value < *lower {
+                        *lower = value.clone();
+                    }
+                } else {
+                    self.summary.lower_bound = Some(value.clone());
+                }
+                if let Some(upper) = self.summary.upper_bound.as_mut() {
+                    if value > *upper {
+                        *upper = value;
+                    }
+                } else {
+                    self.summary.upper_bound = Some(value);
+                }
+            }

Review Comment:
   How about using:
   
   ```rust
   self.summary.upper_bound = Some(self.summary.upper_bound.map_or(value, |u| 
u.max(value)));
   ```



##########
crates/iceberg/src/spec/manifest.rs:
##########
@@ -1518,6 +1540,7 @@ mod _serde {
 
 #[cfg(test)]
 mod tests {
+    use core::f32;

Review Comment:
   I'm guessing we don't need this?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org
For additional commands, e-mail: issues-h...@iceberg.apache.org

Reply via email to