ZENOTME commented on code in PR #277:
URL: https://github.com/apache/iceberg-rust/pull/277#discussion_r1547039753


##########
crates/iceberg/src/arrow.rs:
##########
@@ -106,3 +114,224 @@ impl ArrowReader {
         ProjectionMask::all()
     }
 }
+
+/// The key of column id in the metadata of arrow field.
+pub const COLUMN_ID_META_KEY: &str = "column_id";
+/// The key of doc in the metadata of arrow field.
+pub const DOC: &str = "doc";
+
+struct ToArrowSchemaConverter;
+
+enum ArrowSchemaOrFieldOrType {
+    Schema(ArrowSchema),
+    Field(ArrowFieldRef),
+    Type(ArrowType),
+}
+
+impl SchemaVisitor for ToArrowSchemaConverter {
+    type T = ArrowSchemaOrFieldOrType;
+
+    fn schema(&mut self, _schema: &crate::spec::Schema, value: Self::T) -> 
crate::Result<Self::T> {
+        let struct_type = match value {
+            ArrowSchemaOrFieldOrType::Type(ArrowType::Struct(fields)) => 
fields,
+            _ => unreachable!(),
+        };
+        Ok(ArrowSchemaOrFieldOrType::Schema(ArrowSchema::new(
+            struct_type,
+        )))
+    }
+
+    fn field(
+        &mut self,
+        field: &crate::spec::NestedFieldRef,
+        value: Self::T,
+    ) -> crate::Result<Self::T> {
+        let ty = match value {
+            ArrowSchemaOrFieldOrType::Type(ty) => ty,
+            _ => unreachable!(),
+        };
+        let mut metadata = HashMap::new();
+        metadata.insert(COLUMN_ID_META_KEY.to_string(), field.id.to_string());
+        metadata.insert(PARQUET_FIELD_ID_META_KEY.to_string(), 
field.id.to_string());
+        if let Some(doc) = &field.doc {
+            metadata.insert(DOC.to_string(), doc.clone());
+        }
+        Ok(ArrowSchemaOrFieldOrType::Field(
+            ArrowField::new(field.name.clone(), ty, !field.required)
+                .with_metadata(metadata)
+                .into(),
+        ))
+    }
+
+    fn r#struct(
+        &mut self,
+        _: &crate::spec::StructType,
+        results: Vec<Self::T>,
+    ) -> crate::Result<Self::T> {
+        let fields = results
+            .into_iter()
+            .map(|result| match result {
+                ArrowSchemaOrFieldOrType::Field(field) => field,
+                _ => unreachable!(),
+            })
+            .collect();
+        Ok(ArrowSchemaOrFieldOrType::Type(ArrowType::Struct(fields)))
+    }
+
+    fn list(&mut self, list: &crate::spec::ListType, value: Self::T) -> 
crate::Result<Self::T> {
+        let field = match self.field(&list.element_field, value)? {
+            ArrowSchemaOrFieldOrType::Field(field) => field,
+            _ => unreachable!(),
+        };
+        Ok(ArrowSchemaOrFieldOrType::Type(ArrowType::List(field)))
+    }
+
+    fn map(
+        &mut self,
+        map: &crate::spec::MapType,
+        key_value: Self::T,
+        value: Self::T,
+    ) -> crate::Result<Self::T> {
+        let key_field = match self.field(&map.key_field, key_value)? {

Review Comment:
   
https://github.com/apache/iceberg-rust/blob/83cdff48f3ff4ab0bc6d2b39ce41a65ace4ee26b/crates/iceberg/src/spec/schema.rs#L344
   because the key_value and value in here produced by visit type. Maybe name 
them as `key_type`,`value type` is better.🤔



##########
crates/iceberg/src/arrow.rs:
##########
@@ -106,3 +114,224 @@ impl ArrowReader {
         ProjectionMask::all()
     }
 }
+
+/// The key of column id in the metadata of arrow field.
+pub const COLUMN_ID_META_KEY: &str = "column_id";
+/// The key of doc in the metadata of arrow field.
+pub const DOC: &str = "doc";
+
+struct ToArrowSchemaConverter;
+
+enum ArrowSchemaOrFieldOrType {
+    Schema(ArrowSchema),
+    Field(ArrowFieldRef),
+    Type(ArrowType),
+}
+
+impl SchemaVisitor for ToArrowSchemaConverter {
+    type T = ArrowSchemaOrFieldOrType;
+
+    fn schema(&mut self, _schema: &crate::spec::Schema, value: Self::T) -> 
crate::Result<Self::T> {
+        let struct_type = match value {
+            ArrowSchemaOrFieldOrType::Type(ArrowType::Struct(fields)) => 
fields,
+            _ => unreachable!(),
+        };
+        Ok(ArrowSchemaOrFieldOrType::Schema(ArrowSchema::new(
+            struct_type,
+        )))
+    }
+
+    fn field(
+        &mut self,
+        field: &crate::spec::NestedFieldRef,
+        value: Self::T,
+    ) -> crate::Result<Self::T> {
+        let ty = match value {
+            ArrowSchemaOrFieldOrType::Type(ty) => ty,
+            _ => unreachable!(),
+        };
+        let mut metadata = HashMap::new();
+        metadata.insert(COLUMN_ID_META_KEY.to_string(), field.id.to_string());
+        metadata.insert(PARQUET_FIELD_ID_META_KEY.to_string(), 
field.id.to_string());
+        if let Some(doc) = &field.doc {
+            metadata.insert(DOC.to_string(), doc.clone());
+        }
+        Ok(ArrowSchemaOrFieldOrType::Field(
+            ArrowField::new(field.name.clone(), ty, !field.required)
+                .with_metadata(metadata)
+                .into(),
+        ))
+    }
+
+    fn r#struct(
+        &mut self,
+        _: &crate::spec::StructType,
+        results: Vec<Self::T>,
+    ) -> crate::Result<Self::T> {
+        let fields = results
+            .into_iter()
+            .map(|result| match result {
+                ArrowSchemaOrFieldOrType::Field(field) => field,
+                _ => unreachable!(),
+            })
+            .collect();
+        Ok(ArrowSchemaOrFieldOrType::Type(ArrowType::Struct(fields)))
+    }
+
+    fn list(&mut self, list: &crate::spec::ListType, value: Self::T) -> 
crate::Result<Self::T> {
+        let field = match self.field(&list.element_field, value)? {
+            ArrowSchemaOrFieldOrType::Field(field) => field,
+            _ => unreachable!(),
+        };
+        Ok(ArrowSchemaOrFieldOrType::Type(ArrowType::List(field)))
+    }
+
+    fn map(
+        &mut self,
+        map: &crate::spec::MapType,
+        key_value: Self::T,
+        value: Self::T,
+    ) -> crate::Result<Self::T> {
+        let key_field = match self.field(&map.key_field, key_value)? {

Review Comment:
   
https://github.com/apache/iceberg-rust/blob/83cdff48f3ff4ab0bc6d2b39ce41a65ace4ee26b/crates/iceberg/src/spec/schema.rs#L344
   because the key_value and value in here produced by `visit_type`. Maybe name 
them as `key_type`,`value type` is better.🤔



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org
For additional commands, e-mail: issues-h...@iceberg.apache.org

Reply via email to