hsingh574 commented on code in PR #1172: URL: https://github.com/apache/iceberg-rust/pull/1172#discussion_r2110630320
########## crates/iceberg/src/spec/schema/update.rs: ########## @@ -0,0 +1,464 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::collections::{HashMap, HashSet}; + +use super::{NestedField, SchemaRef}; +use crate::spec::Type; +use crate::transaction::Transaction; +use crate::{Error, ErrorKind}; + +pub const TABLE_ROOT_ID: i32 = -1; + +#[allow(dead_code)] +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum MoveOperation { + First, + Before, + After, +} + +#[allow(dead_code)] +#[derive(Debug)] +pub struct Move { + field_id: i32, + full_name: String, + other_field_id: Option<i32>, + op: MoveOperation, +} + +#[allow(dead_code)] +pub struct UpdateSchema<'a> { + transaction: Transaction<'a>, + schema: SchemaRef, + adds: HashMap<i32, Vec<NestedField>>, + deletes: HashSet<i32>, + updates: HashMap<i32, NestedField>, + moves: HashMap<i32, Vec<Move>>, + added_name_to_id: HashMap<String, i32>, + last_column_id: i32, + identifier_field_ids: HashSet<i32>, + case_sensitive: bool, + allow_incompatible_changes: bool, +} + +#[allow(dead_code)] +impl<'a> UpdateSchema<'a> { + pub fn new( + transaction: Transaction<'a>, + allow_incompatible_changes: bool, + case_sensitive: bool, + schema: Option<SchemaRef>, + ) -> Self { + let current_schema = schema.unwrap_or_else(|| { + transaction + .current_table() + .metadata() + .current_schema() + .clone() + }); + let last_column_id = current_schema.highest_field_id() + 1; + + UpdateSchema { + transaction, + schema: current_schema.clone(), + adds: HashMap::new(), + deletes: HashSet::new(), + updates: HashMap::new(), + moves: HashMap::new(), + added_name_to_id: HashMap::new(), + identifier_field_ids: current_schema + .identifier_field_ids() + .collect::<HashSet<i32>>(), + last_column_id, + case_sensitive, + allow_incompatible_changes, + } + } + + /// Adds a new column to a nested struct or a new top-level column. + /// + /// Because `"."` may be interpreted as a column path separator or used in field names, + /// it is not allowed to add a nested column by passing in a string. To add to nested + /// structures or to add fields with names that contain `"."`, use a tuple instead to + /// indicate the path. + /// + /// If the type is a nested type, its field IDs are reassigned when added to the existing + /// schema. + /// + /// # Arguments + /// + /// * `path` - Name for the new column. + /// * `field_type` - Type for the new column. + /// * `doc` - Documentation string for the new column. + /// * `required` - Whether the new column is required. + /// + /// # Returns + /// + /// This method returns a reference to `Self` to allow for method chaining. + fn add_column( + &mut self, + column_name: Vec<String>, + field_type: Type, + doc: Option<String>, + required: bool, + ) -> Result<&mut Self, Error> { + if column_name.is_empty() { + return Err(Error::new( + ErrorKind::DataInvalid, + "Cannot add column without name.", + )); + } + + for name in &column_name { + if name.contains('.') { + return Err(Error::new( + ErrorKind::DataInvalid, + format!( + "Cannot add column with ambiguous name: {}, provide a vector of names without periods", + name + ), + )); + } + } + + if required && !self.allow_incompatible_changes { + return Err(Error::new( + ErrorKind::FeatureUnsupported, + "Cannot add column because there is no initial value", + )); + } + + let name = column_name.last().unwrap(); + let parent = column_name[..column_name.len() - 1].to_vec(); + + let full_name = column_name.join("."); + let parent_full_path = parent.join("."); + let mut parent_id: i32 = TABLE_ROOT_ID; + + if let Some(existing_field) = if self.case_sensitive { + self.schema.field_by_name(&full_name) + } else { + self.schema.field_by_name_case_insensitive(&full_name) + } { + if !self.deletes.contains(&existing_field.id) { + return Err(Error::new( + crate::ErrorKind::DataInvalid, + format!("Cannot add column {}, to non-struct type.", name), Review Comment: [nit] wrong error message -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org