ZENOTME commented on code in PR #277: URL: https://github.com/apache/iceberg-rust/pull/277#discussion_r1547039753
########## crates/iceberg/src/arrow.rs: ########## @@ -106,3 +114,224 @@ impl ArrowReader { ProjectionMask::all() } } + +/// The key of column id in the metadata of arrow field. +pub const COLUMN_ID_META_KEY: &str = "column_id"; +/// The key of doc in the metadata of arrow field. +pub const DOC: &str = "doc"; + +struct ToArrowSchemaConverter; + +enum ArrowSchemaOrFieldOrType { + Schema(ArrowSchema), + Field(ArrowFieldRef), + Type(ArrowType), +} + +impl SchemaVisitor for ToArrowSchemaConverter { + type T = ArrowSchemaOrFieldOrType; + + fn schema(&mut self, _schema: &crate::spec::Schema, value: Self::T) -> crate::Result<Self::T> { + let struct_type = match value { + ArrowSchemaOrFieldOrType::Type(ArrowType::Struct(fields)) => fields, + _ => unreachable!(), + }; + Ok(ArrowSchemaOrFieldOrType::Schema(ArrowSchema::new( + struct_type, + ))) + } + + fn field( + &mut self, + field: &crate::spec::NestedFieldRef, + value: Self::T, + ) -> crate::Result<Self::T> { + let ty = match value { + ArrowSchemaOrFieldOrType::Type(ty) => ty, + _ => unreachable!(), + }; + let mut metadata = HashMap::new(); + metadata.insert(COLUMN_ID_META_KEY.to_string(), field.id.to_string()); + metadata.insert(PARQUET_FIELD_ID_META_KEY.to_string(), field.id.to_string()); + if let Some(doc) = &field.doc { + metadata.insert(DOC.to_string(), doc.clone()); + } + Ok(ArrowSchemaOrFieldOrType::Field( + ArrowField::new(field.name.clone(), ty, !field.required) + .with_metadata(metadata) + .into(), + )) + } + + fn r#struct( + &mut self, + _: &crate::spec::StructType, + results: Vec<Self::T>, + ) -> crate::Result<Self::T> { + let fields = results + .into_iter() + .map(|result| match result { + ArrowSchemaOrFieldOrType::Field(field) => field, + _ => unreachable!(), + }) + .collect(); + Ok(ArrowSchemaOrFieldOrType::Type(ArrowType::Struct(fields))) + } + + fn list(&mut self, list: &crate::spec::ListType, value: Self::T) -> crate::Result<Self::T> { + let field = match self.field(&list.element_field, value)? { + ArrowSchemaOrFieldOrType::Field(field) => field, + _ => unreachable!(), + }; + Ok(ArrowSchemaOrFieldOrType::Type(ArrowType::List(field))) + } + + fn map( + &mut self, + map: &crate::spec::MapType, + key_value: Self::T, + value: Self::T, + ) -> crate::Result<Self::T> { + let key_field = match self.field(&map.key_field, key_value)? { Review Comment: https://github.com/apache/iceberg-rust/blob/83cdff48f3ff4ab0bc6d2b39ce41a65ace4ee26b/crates/iceberg/src/spec/schema.rs#L344 because the key_value and value in here produced by visit type. Maybe name them as `key_type`,`value type` is better.🤔 ########## crates/iceberg/src/arrow.rs: ########## @@ -106,3 +114,224 @@ impl ArrowReader { ProjectionMask::all() } } + +/// The key of column id in the metadata of arrow field. +pub const COLUMN_ID_META_KEY: &str = "column_id"; +/// The key of doc in the metadata of arrow field. +pub const DOC: &str = "doc"; + +struct ToArrowSchemaConverter; + +enum ArrowSchemaOrFieldOrType { + Schema(ArrowSchema), + Field(ArrowFieldRef), + Type(ArrowType), +} + +impl SchemaVisitor for ToArrowSchemaConverter { + type T = ArrowSchemaOrFieldOrType; + + fn schema(&mut self, _schema: &crate::spec::Schema, value: Self::T) -> crate::Result<Self::T> { + let struct_type = match value { + ArrowSchemaOrFieldOrType::Type(ArrowType::Struct(fields)) => fields, + _ => unreachable!(), + }; + Ok(ArrowSchemaOrFieldOrType::Schema(ArrowSchema::new( + struct_type, + ))) + } + + fn field( + &mut self, + field: &crate::spec::NestedFieldRef, + value: Self::T, + ) -> crate::Result<Self::T> { + let ty = match value { + ArrowSchemaOrFieldOrType::Type(ty) => ty, + _ => unreachable!(), + }; + let mut metadata = HashMap::new(); + metadata.insert(COLUMN_ID_META_KEY.to_string(), field.id.to_string()); + metadata.insert(PARQUET_FIELD_ID_META_KEY.to_string(), field.id.to_string()); + if let Some(doc) = &field.doc { + metadata.insert(DOC.to_string(), doc.clone()); + } + Ok(ArrowSchemaOrFieldOrType::Field( + ArrowField::new(field.name.clone(), ty, !field.required) + .with_metadata(metadata) + .into(), + )) + } + + fn r#struct( + &mut self, + _: &crate::spec::StructType, + results: Vec<Self::T>, + ) -> crate::Result<Self::T> { + let fields = results + .into_iter() + .map(|result| match result { + ArrowSchemaOrFieldOrType::Field(field) => field, + _ => unreachable!(), + }) + .collect(); + Ok(ArrowSchemaOrFieldOrType::Type(ArrowType::Struct(fields))) + } + + fn list(&mut self, list: &crate::spec::ListType, value: Self::T) -> crate::Result<Self::T> { + let field = match self.field(&list.element_field, value)? { + ArrowSchemaOrFieldOrType::Field(field) => field, + _ => unreachable!(), + }; + Ok(ArrowSchemaOrFieldOrType::Type(ArrowType::List(field))) + } + + fn map( + &mut self, + map: &crate::spec::MapType, + key_value: Self::T, + value: Self::T, + ) -> crate::Result<Self::T> { + let key_field = match self.field(&map.key_field, key_value)? { Review Comment: https://github.com/apache/iceberg-rust/blob/83cdff48f3ff4ab0bc6d2b39ce41a65ace4ee26b/crates/iceberg/src/spec/schema.rs#L344 because the key_value and value in here produced by `visit_type`. Maybe name them as `key_type`,`value type` is better.🤔 -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org