liurenjie1024 commented on code in PR #277:
URL: https://github.com/apache/iceberg-rust/pull/277#discussion_r1572416327


##########
crates/iceberg/src/arrow/schema.rs:
##########
@@ -385,25 +389,236 @@ impl ArrowSchemaVisitor for ArrowSchemaConverter {
     }
 }
 
+struct ToArrowSchemaConverter;
+
+enum ArrowSchemaOrFieldOrType {
+    Schema(ArrowSchema),
+    Field(Field),
+    Type(DataType),
+}
+
+impl SchemaVisitor for ToArrowSchemaConverter {
+    type T = ArrowSchemaOrFieldOrType;
+
+    fn schema(
+        &mut self,
+        _schema: &crate::spec::Schema,
+        value: ArrowSchemaOrFieldOrType,
+    ) -> crate::Result<ArrowSchemaOrFieldOrType> {
+        let struct_type = match value {
+            ArrowSchemaOrFieldOrType::Type(DataType::Struct(fields)) => fields,
+            _ => unreachable!(),
+        };
+        Ok(ArrowSchemaOrFieldOrType::Schema(ArrowSchema::new(
+            struct_type,
+        )))
+    }
+
+    fn field(
+        &mut self,
+        field: &crate::spec::NestedFieldRef,
+        value: ArrowSchemaOrFieldOrType,
+    ) -> crate::Result<ArrowSchemaOrFieldOrType> {
+        let ty = match value {
+            ArrowSchemaOrFieldOrType::Type(ty) => ty,
+            _ => unreachable!(),
+        };
+        let mut metadata = HashMap::new();
+        metadata.insert(PARQUET_FIELD_ID_META_KEY.to_string(), 
field.id.to_string());
+        if let Some(doc) = &field.doc {
+            metadata.insert(ARROW_FIELD_DOC_KEY.to_string(), doc.clone());
+        }
+        Ok(ArrowSchemaOrFieldOrType::Field(
+            Field::new(field.name.clone(), ty, 
!field.required).with_metadata(metadata),
+        ))
+    }
+
+    fn r#struct(
+        &mut self,
+        _: &crate::spec::StructType,
+        results: Vec<ArrowSchemaOrFieldOrType>,
+    ) -> crate::Result<ArrowSchemaOrFieldOrType> {
+        let fields = results
+            .into_iter()
+            .map(|result| match result {
+                ArrowSchemaOrFieldOrType::Field(field) => field,
+                _ => unreachable!(),
+            })
+            .collect();
+        Ok(ArrowSchemaOrFieldOrType::Type(DataType::Struct(fields)))
+    }
+
+    fn list(
+        &mut self,
+        list: &crate::spec::ListType,
+        value: ArrowSchemaOrFieldOrType,
+    ) -> crate::Result<Self::T> {
+        let field = match self.field(&list.element_field, value)? {
+            ArrowSchemaOrFieldOrType::Field(field) => field,
+            _ => unreachable!(),
+        };
+        let mut meta = HashMap::new();
+        meta.insert(
+            PARQUET_FIELD_ID_META_KEY.to_string(),
+            list.element_field.id.to_string(),
+        );
+        if let Some(doc) = &list.element_field.doc {
+            meta.insert(ARROW_FIELD_DOC_KEY.to_string(), doc.clone());
+        }
+        let field = field.with_metadata(meta);
+        Ok(ArrowSchemaOrFieldOrType::Type(DataType::List(Arc::new(
+            field,
+        ))))
+    }
+
+    fn map(
+        &mut self,
+        map: &crate::spec::MapType,
+        key_value: ArrowSchemaOrFieldOrType,
+        value: ArrowSchemaOrFieldOrType,
+    ) -> crate::Result<ArrowSchemaOrFieldOrType> {
+        let key_field = match self.field(&map.key_field, key_value)? {
+            ArrowSchemaOrFieldOrType::Field(field) => field,
+            _ => unreachable!(),
+        };
+        let value_field = match self.field(&map.value_field, value)? {
+            ArrowSchemaOrFieldOrType::Field(field) => field,
+            _ => unreachable!(),
+        };
+        let field = Field::new(
+            "entries",
+            DataType::Struct(vec![key_field, value_field].into()),
+            map.value_field.required,
+        );
+
+        Ok(ArrowSchemaOrFieldOrType::Type(DataType::Map(
+            field.into(),
+            false,
+        )))
+    }
+
+    fn primitive(
+        &mut self,
+        p: &crate::spec::PrimitiveType,
+    ) -> crate::Result<ArrowSchemaOrFieldOrType> {
+        match p {
+            crate::spec::PrimitiveType::Boolean => {
+                Ok(ArrowSchemaOrFieldOrType::Type(DataType::Boolean))
+            }
+            crate::spec::PrimitiveType::Int => 
Ok(ArrowSchemaOrFieldOrType::Type(DataType::Int32)),
+            crate::spec::PrimitiveType::Long => 
Ok(ArrowSchemaOrFieldOrType::Type(DataType::Int64)),
+            crate::spec::PrimitiveType::Float => {
+                Ok(ArrowSchemaOrFieldOrType::Type(DataType::Float32))
+            }
+            crate::spec::PrimitiveType::Double => {
+                Ok(ArrowSchemaOrFieldOrType::Type(DataType::Float64))
+            }
+            crate::spec::PrimitiveType::Decimal { precision, scale } => {
+                let (precision, scale) = {
+                    let precision: u8 = 
precision.to_owned().try_into().map_err(|err| {
+                        Error::new(
+                            crate::ErrorKind::DataInvalid,
+                            "incompatible precision for decimal type convert",
+                        )
+                        .with_source(err)
+                    })?;
+                    let scale = scale.to_owned().try_into().map_err(|err| {
+                        Error::new(
+                            crate::ErrorKind::DataInvalid,
+                            "incompatible scale for decimal type convert",
+                        )
+                        .with_source(err)
+                    })?;
+                    (precision, scale)
+                };
+                
validate_decimal_precision_and_scale::<Decimal128Type>(precision, 
scale).map_err(
+                    |err| {
+                        Error::new(
+                            crate::ErrorKind::DataInvalid,
+                            "incompatible precision and scale for decimal type 
convert",
+                        )
+                        .with_source(err)
+                    },
+                )?;
+                Ok(ArrowSchemaOrFieldOrType::Type(DataType::Decimal128(
+                    precision, scale,
+                )))
+            }
+            crate::spec::PrimitiveType::Date => {
+                Ok(ArrowSchemaOrFieldOrType::Type(DataType::Date32))
+            }
+            crate::spec::PrimitiveType::Time => 
Ok(ArrowSchemaOrFieldOrType::Type(
+                DataType::Time32(TimeUnit::Microsecond),
+            )),
+            crate::spec::PrimitiveType::Timestamp => 
Ok(ArrowSchemaOrFieldOrType::Type(
+                DataType::Timestamp(TimeUnit::Microsecond, None),
+            )),
+            crate::spec::PrimitiveType::Timestamptz => 
Ok(ArrowSchemaOrFieldOrType::Type(
+                // Timestampz always stored as UTC
+                DataType::Timestamp(TimeUnit::Microsecond, 
Some("+00:00".into())),
+            )),
+            crate::spec::PrimitiveType::String => {
+                Ok(ArrowSchemaOrFieldOrType::Type(DataType::Utf8))
+            }
+            crate::spec::PrimitiveType::Uuid => 
Ok(ArrowSchemaOrFieldOrType::Type(
+                DataType::FixedSizeBinary(16),
+            )),
+            crate::spec::PrimitiveType::Fixed(len) => 
Ok(ArrowSchemaOrFieldOrType::Type(
+                len.to_i32()
+                    .map(DataType::FixedSizeBinary)
+                    .unwrap_or(DataType::LargeBinary),
+            )),
+            crate::spec::PrimitiveType::Binary => {
+                Ok(ArrowSchemaOrFieldOrType::Type(DataType::LargeBinary))

Review Comment:
   +1 for large binary. I think there is a difference between binary and 
string? Also again, there is no string type in arrow.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org
For additional commands, e-mail: issues-h...@iceberg.apache.org

Reply via email to