flaneur2020 commented on code in PR #871:
URL: https://github.com/apache/iceberg-rust/pull/871#discussion_r1901836558


##########
crates/iceberg/src/metadata_scan.rs:
##########
@@ -134,44 +137,122 @@ pub struct ManifestsTable<'a> {
 }
 
 impl<'a> ManifestsTable<'a> {
-    fn partition_summary_fields(&self) -> Vec<Field> {
-        vec![
-            Field::new("contains_null", DataType::Boolean, false),
-            Field::new("contains_nan", DataType::Boolean, true),
-            Field::new("lower_bound", DataType::Utf8, true),
-            Field::new("upper_bound", DataType::Utf8, true),
-        ]
-    }
-
-    /// Returns the schema of the manifests table.
-    pub fn schema(&self) -> Schema {
-        Schema::new(vec![
-            Field::new("content", DataType::Int8, false),
-            Field::new("path", DataType::Utf8, false),
-            Field::new("length", DataType::Int64, false),
-            Field::new("partition_spec_id", DataType::Int32, false),
-            Field::new("added_snapshot_id", DataType::Int64, false),
-            Field::new("added_data_files_count", DataType::Int32, false),
-            Field::new("existing_data_files_count", DataType::Int32, false),
-            Field::new("deleted_data_files_count", DataType::Int32, false),
-            Field::new("added_delete_files_count", DataType::Int32, false),
-            Field::new("existing_delete_files_count", DataType::Int32, false),
-            Field::new("deleted_delete_files_count", DataType::Int32, false),
-            Field::new(
+    /// Returns the iceberg schema of the manifests table.
+    pub fn schema(&self) -> crate::spec::Schema {
+        let fields = vec![
+            NestedField::new(14, "content", 
Type::Primitive(PrimitiveType::Int), true),
+            NestedField::new(1, "path", 
Type::Primitive(PrimitiveType::String), true),
+            NestedField::new(2, "length", 
Type::Primitive(PrimitiveType::Long), true),
+            NestedField::new(
+                3,
+                "partition_spec_id",
+                Type::Primitive(PrimitiveType::Int),
+                true,
+            ),
+            NestedField::new(
+                4,
+                "added_snapshot_id",
+                Type::Primitive(PrimitiveType::Long),
+                true,
+            ),
+            NestedField::new(
+                5,
+                "added_data_files_count",
+                Type::Primitive(PrimitiveType::Int),
+                true,
+            ),
+            NestedField::new(
+                6,
+                "existing_data_files_count",
+                Type::Primitive(PrimitiveType::Int),
+                true,
+            ),
+            NestedField::new(
+                7,
+                "deleted_data_files_count",
+                Type::Primitive(PrimitiveType::Int),
+                true,
+            ),
+            NestedField::new(
+                15,
+                "added_delete_files_count",
+                Type::Primitive(PrimitiveType::Int),
+                true,
+            ),
+            NestedField::new(
+                16,
+                "existing_delete_files_count",
+                Type::Primitive(PrimitiveType::Int),
+                true,
+            ),
+            NestedField::new(
+                17,
+                "deleted_delete_files_count",
+                Type::Primitive(PrimitiveType::Int),
+                true,
+            ),
+            NestedField::new(
+                8,
                 "partition_summaries",
-                DataType::List(Arc::new(Field::new_struct(
-                    "item",
-                    self.partition_summary_fields(),
-                    false,
-                ))),
-                false,
+                Type::List(ListType {
+                    element_field: Arc::new(NestedField::new(
+                        9,
+                        "item",
+                        Type::Struct(StructType::new(vec![
+                            Arc::new(NestedField::new(
+                                10,
+                                "contains_null",
+                                Type::Primitive(PrimitiveType::Boolean),
+                                true,
+                            )),
+                            Arc::new(NestedField::new(
+                                11,
+                                "contains_nan",
+                                Type::Primitive(PrimitiveType::Boolean),
+                                false,
+                            )),
+                            Arc::new(NestedField::new(
+                                12,
+                                "lower_bound",
+                                Type::Primitive(PrimitiveType::String),
+                                false,
+                            )),
+                            Arc::new(NestedField::new(
+                                13,
+                                "upper_bound",
+                                Type::Primitive(PrimitiveType::String),
+                                false,
+                            )),
+                        ])),
+                        true,
+                    )),
+                }),
+                true,
             ),
-        ])
+        ];
+
+        crate::spec::Schema::builder()
+            .with_fields(fields.into_iter().map(|f| f.into()))
+            .build()
+            .unwrap()
     }
 
     /// Scans the manifests table.
     pub async fn scan(&self) -> Result<RecordBatch> {
-        let mut content = PrimitiveBuilder::<Int8Type>::new();
+        let schema = schema_to_arrow_schema(&self.schema())?;
+        let partition_summary_fields = if let DataType::List(list_type) =
+            schema.field_with_name("partition_summaries")?.data_type()
+        {
+            if let DataType::Struct(fields) = list_type.data_type() {
+                fields.to_vec()
+            } else {
+                unreachable!()
+            }
+        } else {
+            unreachable!()
+        };

Review Comment:
   very ugly here 😲



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org
For additional commands, e-mail: issues-h...@iceberg.apache.org

Reply via email to