liurenjie1024 commented on code in PR #1072: URL: https://github.com/apache/iceberg-rust/pull/1072#discussion_r1992654031
########## crates/iceberg/src/spec/name_mapping.rs: ########## @@ -33,14 +44,631 @@ pub struct NameMapping { #[serde(rename_all = "kebab-case")] pub struct MappedField { #[serde(skip_serializing_if = "Option::is_none")] + /// Unique identifier for field. pub field_id: Option<i32>, + + /// Contains multiple names to map to a field (for schema evolution). pub names: Vec<String>, + #[serde(default)] #[serde(skip_serializing_if = "Vec::is_empty")] #[serde_as(deserialize_as = "DefaultOnNull")] + /// Holds mappings for nested fields. pub fields: Vec<MappedField>, } +impl MappedField { + fn new(names: Vec<String>, fields: Vec<MappedField>, field_id: Option<i32>) -> Self { + Self { + field_id, + names, + fields, + } + } +} + +impl NameMapping { + /// Parses name_mapping from JSON. + pub fn parse_name_mapping(name_mapping: &str) -> Result<Self> { + let parsed_name_mapping: NameMapping = serde_json::from_str(name_mapping)?; + Ok(parsed_name_mapping) + } + + /// Creates a mapping from a schema + pub fn create_mapping_from_schema(schema: &Schema) -> NameMapping { + let mapped_fields = visit_schema(schema, &mut CreateMapping).unwrap(); + + NameMapping { + root: mapped_fields, + } + } + + /// Updates an existing name mapping with new updates and additions. + pub fn update_mapping( + mapping: NameMapping, + updates: HashMap<i32, NestedField>, + adds: HashMap<i32, Vec<NestedField>>, + ) -> NameMapping { + let visitor = UpdateMapping::new(updates, adds); + let updated_root = visit_name_mapping(&mapping, &visitor); + + NameMapping { root: updated_root } + } + + /// Returns an index mapping names to `MappedField`` by visiting the schema. + pub fn index_by_name(schema: &Schema) -> HashMap<String, MappedField> { + let mapping = Self::create_mapping_from_schema(schema); + visit_name_mapping(&mapping, &IndexByName) + } + + /// Applies the name mapping to the given schema. + pub fn apply_name_mapping(schema: &Schema, mapping: &NameMapping) -> Result<Type> { Review Comment: I'm confusing about this method, is there any case to demonstrate its usage? ########## crates/iceberg/src/spec/name_mapping.rs: ########## @@ -17,13 +17,24 @@ //! Iceberg name mapping. +use std::collections::{HashMap, HashSet}; +use std::sync::Arc; + use serde::{Deserialize, Serialize}; use serde_with::{serde_as, DefaultOnNull}; +use crate::spec::schema::{PartnerAccessor, Schema, SchemaVisitor, SchemaWithPartnerVisitor}; +use crate::spec::{ + visit_schema, visit_schema_with_partner, visit_type, ListType, MapType, NestedField, + NestedFieldRef, PrimitiveType, StructType, Type, +}; +use crate::{Error, ErrorKind, Result}; + /// Iceberg fallback field name to ID mapping. #[derive(Debug, Serialize, Deserialize, PartialEq, Eq, Clone)] #[serde(transparent)] pub struct NameMapping { + /// Holds mapped fields Review Comment: Seems we lack a `MappedFields` field like [java one](https://github.com/apache/iceberg/blob/c07f2aabc0a1d02f068ecf1514d2479c0fbdd3b0/core/src/main/java/org/apache/iceberg/mapping/MappedFields.java#L29) ########## crates/iceberg/src/spec/name_mapping.rs: ########## @@ -33,14 +44,631 @@ pub struct NameMapping { #[serde(rename_all = "kebab-case")] pub struct MappedField { #[serde(skip_serializing_if = "Option::is_none")] + /// Unique identifier for field. pub field_id: Option<i32>, + + /// Contains multiple names to map to a field (for schema evolution). pub names: Vec<String>, + #[serde(default)] #[serde(skip_serializing_if = "Vec::is_empty")] #[serde_as(deserialize_as = "DefaultOnNull")] + /// Holds mappings for nested fields. pub fields: Vec<MappedField>, } +impl MappedField { + fn new(names: Vec<String>, fields: Vec<MappedField>, field_id: Option<i32>) -> Self { + Self { + field_id, + names, + fields, + } + } +} + +impl NameMapping { + /// Parses name_mapping from JSON. + pub fn parse_name_mapping(name_mapping: &str) -> Result<Self> { + let parsed_name_mapping: NameMapping = serde_json::from_str(name_mapping)?; + Ok(parsed_name_mapping) + } + + /// Creates a mapping from a schema + pub fn create_mapping_from_schema(schema: &Schema) -> NameMapping { + let mapped_fields = visit_schema(schema, &mut CreateMapping).unwrap(); + + NameMapping { + root: mapped_fields, + } + } + + /// Updates an existing name mapping with new updates and additions. + pub fn update_mapping( + mapping: NameMapping, + updates: HashMap<i32, NestedField>, + adds: HashMap<i32, Vec<NestedField>>, + ) -> NameMapping { + let visitor = UpdateMapping::new(updates, adds); + let updated_root = visit_name_mapping(&mapping, &visitor); + + NameMapping { root: updated_root } + } + + /// Returns an index mapping names to `MappedField`` by visiting the schema. + pub fn index_by_name(schema: &Schema) -> HashMap<String, MappedField> { Review Comment: I don't think we should expose this method to user, the index should be constructed lazily. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org