Xuanwo commented on code in PR #76:
URL: https://github.com/apache/iceberg-rust/pull/76#discussion_r1363171863


##########
crates/iceberg/src/avro/schema.rs:
##########
@@ -96,7 +98,13 @@ impl SchemaVisitor for SchemaToAvroSchema {
         _struct: &StructType,
         results: Vec<AvroSchemaOrField>,
     ) -> Result<AvroSchemaOrField> {
-        let avro_fields = results.into_iter().map(|r| 
r.unwrap_right()).collect();
+        let avro_fields: Vec<AvroRecordField> =
+            results.into_iter().map(|r| r.unwrap_right()).collect();
+
+        let mut lookup = BTreeMap::new();

Review Comment:
   How about using `BTreeMap::from_iter()` instead of for-loop insert? 
`from_iter` uses bulk build internally which has better performance.



##########
crates/iceberg/src/spec/manifest_list.rs:
##########
@@ -49,77 +57,128 @@ impl ManifestList {
         partition_type: &StructType,
     ) -> Result<ManifestList, Error> {
         match version {
-            FormatVersion::V2 => {
-                let schema = schema_to_avro_schema("manifest_list", 
&Self::v2_schema()).unwrap();
-                let reader = Reader::with_schema(&schema, bs)?;
-                let values = Value::Array(reader.collect::<Result<Vec<Value>, 
_>>()?);
-                
from_value::<_serde::ManifestListV2>(&values)?.try_into(partition_type)
-            }
             FormatVersion::V1 => {
-                let schema = schema_to_avro_schema("manifest_list", 
&Self::v1_schema()).unwrap();
-                let reader = Reader::with_schema(&schema, bs)?;
+                let reader = 
Reader::with_schema(&MANIFEST_LIST_AVRO_SCHEMA_V1, bs)?;
                 let values = Value::Array(reader.collect::<Result<Vec<Value>, 
_>>()?);
                 
from_value::<_serde::ManifestListV1>(&values)?.try_into(partition_type)
             }
+            FormatVersion::V2 => {
+                let reader = 
Reader::with_schema(&MANIFEST_LIST_AVRO_SCHEMA_V2, bs)?;
+                let values = Value::Array(reader.collect::<Result<Vec<Value>, 
_>>()?);
+                
from_value::<_serde::ManifestListV2>(&values)?.try_into(partition_type)
+            }
         }
     }
 
     /// Get the entries in the manifest list.
     pub fn entries(&self) -> &[ManifestListEntry] {
         &self.entries
     }
+}
 
-    /// Get the v2 schema of the manifest list entry.
-    pub(crate) fn v2_schema() -> Schema {
-        let fields = vec![
-            _const_fields::MANIFEST_PATH.clone(),
-            _const_fields::MANIFEST_LENGTH.clone(),
-            _const_fields::PARTITION_SPEC_ID.clone(),
-            _const_fields::CONTENT.clone(),
-            _const_fields::SEQUENCE_NUMBER.clone(),
-            _const_fields::MIN_SEQUENCE_NUMBER.clone(),
-            _const_fields::ADDED_SNAPSHOT_ID.clone(),
-            _const_fields::ADDED_FILES_COUNT_V2.clone(),
-            _const_fields::EXISTING_FILES_COUNT_V2.clone(),
-            _const_fields::DELETED_FILES_COUNT_V2.clone(),
-            _const_fields::ADDED_ROWS_COUNT_V2.clone(),
-            _const_fields::EXISTING_ROWS_COUNT_V2.clone(),
-            _const_fields::DELETED_ROWS_COUNT_V2.clone(),
-            _const_fields::PARTITIONS.clone(),
-            _const_fields::KEY_METADATA.clone(),
-        ];
-        Schema::builder().with_fields(fields).build().unwrap()
+/// A manifest list writer.
+pub struct ManifestListWriter {
+    format_version: FormatVersion,
+    output_file: OutputFile,
+    avro_writer: Writer<'static, Vec<u8>>,
+}
+
+impl ManifestListWriter {
+    /// Construct a v1 [`ManifestListWriter`] that writes to a provided 
[`OutputFile`].
+    pub fn v1(output_file: OutputFile, snapshot_id: i64, parent_snapshot_id: 
i64) -> Self {
+        let mut metadata = HashMap::new();

Review Comment:
   How about using `HashMap::from_iter([("snapshot-id".to_string(), 
snapshot_id.to_string()), ...])`?



##########
crates/iceberg/src/avro/schema.rs:
##########
@@ -96,7 +98,13 @@ impl SchemaVisitor for SchemaToAvroSchema {
         _struct: &StructType,

Review Comment:
   Hi, @liurenjie1024, why we need `StructType` here, it seems not been used?



##########
crates/iceberg/src/spec/manifest_list.rs:
##########
@@ -49,77 +57,128 @@ impl ManifestList {
         partition_type: &StructType,
     ) -> Result<ManifestList, Error> {
         match version {
-            FormatVersion::V2 => {
-                let schema = schema_to_avro_schema("manifest_list", 
&Self::v2_schema()).unwrap();
-                let reader = Reader::with_schema(&schema, bs)?;
-                let values = Value::Array(reader.collect::<Result<Vec<Value>, 
_>>()?);
-                
from_value::<_serde::ManifestListV2>(&values)?.try_into(partition_type)
-            }
             FormatVersion::V1 => {
-                let schema = schema_to_avro_schema("manifest_list", 
&Self::v1_schema()).unwrap();
-                let reader = Reader::with_schema(&schema, bs)?;
+                let reader = 
Reader::with_schema(&MANIFEST_LIST_AVRO_SCHEMA_V1, bs)?;
                 let values = Value::Array(reader.collect::<Result<Vec<Value>, 
_>>()?);
                 
from_value::<_serde::ManifestListV1>(&values)?.try_into(partition_type)
             }
+            FormatVersion::V2 => {
+                let reader = 
Reader::with_schema(&MANIFEST_LIST_AVRO_SCHEMA_V2, bs)?;
+                let values = Value::Array(reader.collect::<Result<Vec<Value>, 
_>>()?);
+                
from_value::<_serde::ManifestListV2>(&values)?.try_into(partition_type)
+            }
         }
     }
 
     /// Get the entries in the manifest list.
     pub fn entries(&self) -> &[ManifestListEntry] {
         &self.entries
     }
+}
 
-    /// Get the v2 schema of the manifest list entry.
-    pub(crate) fn v2_schema() -> Schema {
-        let fields = vec![
-            _const_fields::MANIFEST_PATH.clone(),
-            _const_fields::MANIFEST_LENGTH.clone(),
-            _const_fields::PARTITION_SPEC_ID.clone(),
-            _const_fields::CONTENT.clone(),
-            _const_fields::SEQUENCE_NUMBER.clone(),
-            _const_fields::MIN_SEQUENCE_NUMBER.clone(),
-            _const_fields::ADDED_SNAPSHOT_ID.clone(),
-            _const_fields::ADDED_FILES_COUNT_V2.clone(),
-            _const_fields::EXISTING_FILES_COUNT_V2.clone(),
-            _const_fields::DELETED_FILES_COUNT_V2.clone(),
-            _const_fields::ADDED_ROWS_COUNT_V2.clone(),
-            _const_fields::EXISTING_ROWS_COUNT_V2.clone(),
-            _const_fields::DELETED_ROWS_COUNT_V2.clone(),
-            _const_fields::PARTITIONS.clone(),
-            _const_fields::KEY_METADATA.clone(),
-        ];
-        Schema::builder().with_fields(fields).build().unwrap()
+/// A manifest list writer.
+pub struct ManifestListWriter {
+    format_version: FormatVersion,
+    output_file: OutputFile,
+    avro_writer: Writer<'static, Vec<u8>>,
+}
+
+impl ManifestListWriter {
+    /// Construct a v1 [`ManifestListWriter`] that writes to a provided 
[`OutputFile`].
+    pub fn v1(output_file: OutputFile, snapshot_id: i64, parent_snapshot_id: 
i64) -> Self {
+        let mut metadata = HashMap::new();
+        metadata.insert("snapshot-id".to_string(), snapshot_id.to_string());
+        metadata.insert(
+            "parent-snapshot-id".to_string(),
+            parent_snapshot_id.to_string(),
+        );
+        metadata.insert("format-version".to_string(), "1".to_string());
+        Self::new(FormatVersion::V1, output_file, metadata)
     }
 
-    /// Get the v1 schema of the manifest list entry.
-    pub(crate) fn v1_schema() -> Schema {
-        let fields = vec![
-            _const_fields::MANIFEST_PATH.clone(),
-            _const_fields::MANIFEST_LENGTH.clone(),
-            _const_fields::PARTITION_SPEC_ID.clone(),
-            _const_fields::ADDED_SNAPSHOT_ID.clone(),
-            _const_fields::ADDED_FILES_COUNT_V1.clone().to_owned(),
-            _const_fields::EXISTING_FILES_COUNT_V1.clone(),
-            _const_fields::DELETED_FILES_COUNT_V1.clone(),
-            _const_fields::ADDED_ROWS_COUNT_V1.clone(),
-            _const_fields::EXISTING_ROWS_COUNT_V1.clone(),
-            _const_fields::DELETED_ROWS_COUNT_V1.clone(),
-            _const_fields::PARTITIONS.clone(),
-            _const_fields::KEY_METADATA.clone(),
-        ];
-        Schema::builder().with_fields(fields).build().unwrap()
+    /// Construct a v2 [`ManifestListWriter`] that writes to a provided 
[`OutputFile`].
+    pub fn v2(
+        output_file: OutputFile,
+        snapshot_id: i64,
+        parent_snapshot_id: i64,
+        sequence_number: i64,
+    ) -> Self {
+        let mut metadata = HashMap::new();
+        metadata.insert("snapshot-id".to_string(), snapshot_id.to_string());
+        metadata.insert(
+            "parent-snapshot-id".to_string(),
+            parent_snapshot_id.to_string(),
+        );
+        metadata.insert("sequence-number".to_string(), 
sequence_number.to_string());
+        metadata.insert("format-version".to_string(), "2".to_string());
+        Self::new(FormatVersion::V2, output_file, metadata)
+    }
+
+    fn new(
+        format_version: FormatVersion,
+        output_file: OutputFile,
+        metadata: HashMap<String, String>,
+    ) -> Self {
+        let avro_schema = match format_version {
+            FormatVersion::V1 => &MANIFEST_LIST_AVRO_SCHEMA_V1,
+            FormatVersion::V2 => &MANIFEST_LIST_AVRO_SCHEMA_V2,
+        };
+        let mut avro_writer = Writer::new(avro_schema, Vec::new());
+        for (key, value) in metadata {
+            avro_writer.add_user_metadata(key, value).unwrap();

Review Comment:
   How about using `expect()` to clarify why we anticipate this operation will 
always succeed?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org
For additional commands, e-mail: issues-h...@iceberg.apache.org

Reply via email to