liurenjie1024 commented on code in PR #1116: URL: https://github.com/apache/iceberg-rust/pull/1116#discussion_r2046801281
########## crates/iceberg/src/spec/name_mapping/mod.rs: ########## @@ -17,14 +17,76 @@ //! Iceberg name mapping. +use std::collections::HashMap; + use serde::{Deserialize, Serialize}; use serde_with::{serde_as, DefaultOnNull}; +use crate::Error; + +/// Property name for name mapping. +pub const DEFAULT_SCHEMA_NAME_MAPPING: &str = "schema.name-mapping.default"; + /// Iceberg fallback field name to ID mapping. #[derive(Debug, Serialize, Deserialize, PartialEq, Eq, Clone)] #[serde(transparent)] pub struct NameMapping { - pub root: Vec<MappedField>, + root: Vec<MappedField>, + #[serde(skip)] + name_to_id: HashMap<String, i32>, + #[serde(skip)] + id_to_field: HashMap<i32, MappedField>, Review Comment: ```suggestion id_to_field: HashMap<i32, Arc<MappedField>>, ``` `MappedField` maybe deeply nested and expensive to copy. ########## crates/iceberg/src/spec/name_mapping/mod.rs: ########## @@ -17,14 +17,76 @@ //! Iceberg name mapping. +use std::collections::HashMap; + use serde::{Deserialize, Serialize}; use serde_with::{serde_as, DefaultOnNull}; +use crate::Error; + +/// Property name for name mapping. +pub const DEFAULT_SCHEMA_NAME_MAPPING: &str = "schema.name-mapping.default"; + /// Iceberg fallback field name to ID mapping. #[derive(Debug, Serialize, Deserialize, PartialEq, Eq, Clone)] #[serde(transparent)] pub struct NameMapping { - pub root: Vec<MappedField>, + root: Vec<MappedField>, + #[serde(skip)] + name_to_id: HashMap<String, i32>, + #[serde(skip)] + id_to_field: HashMap<i32, MappedField>, +} + +impl NameMapping { + /// Create a new [`NameMapping`] given a collection of mapped fields. + pub fn try_new(fields: Vec<MappedField>) -> Result<NameMapping, Error> { + let mut name_to_id = HashMap::new(); + let mut id_to_field = HashMap::new(); + + for field in &fields { + if let Some(id) = field.field_id() { + if id_to_field.contains_key(&id) { + return Err(Error::new( + crate::ErrorKind::DataInvalid, + format!("duplicate id '{id}' is not allowed"), + )); + } + + id_to_field.insert(id, field.clone()); + for name in field.names() { + if name_to_id.contains_key(name) { + return Err(Error::new( + crate::ErrorKind::DataInvalid, + format!("duplicate name '{name}' is not allowed"), + )); + } + name_to_id.insert(name.to_string(), id); + } + } + } Review Comment: This implementation is incorrect, `id` and `name` in top level should be global, and we should build it using a visitor. I would suggest to remove these two fields in this pr, and add them back later using visitor. ########## crates/iceberg/src/spec/name_mapping/mod.rs: ########## @@ -33,17 +95,44 @@ pub struct NameMapping { #[serde(rename_all = "kebab-case")] pub struct MappedField { #[serde(skip_serializing_if = "Option::is_none")] - pub field_id: Option<i32>, - pub names: Vec<String>, + field_id: Option<i32>, + names: Vec<String>, #[serde(default)] #[serde(skip_serializing_if = "Vec::is_empty")] #[serde_as(deserialize_as = "DefaultOnNull")] - pub fields: Vec<MappedField>, + fields: Vec<MappedField>, Review Comment: ```suggestion fields: Vec<Arc<MappedField>>, ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org