barronw commented on code in PR #76:
URL: https://github.com/apache/iceberg-rust/pull/76#discussion_r1363755844


##########
crates/iceberg/src/avro/schema.rs:
##########
@@ -96,7 +98,13 @@ impl SchemaVisitor for SchemaToAvroSchema {
         _struct: &StructType,
         results: Vec<AvroSchemaOrField>,
     ) -> Result<AvroSchemaOrField> {
-        let avro_fields = results.into_iter().map(|r| 
r.unwrap_right()).collect();
+        let avro_fields: Vec<AvroRecordField> =
+            results.into_iter().map(|r| r.unwrap_right()).collect();
+
+        let mut lookup = BTreeMap::new();

Review Comment:
   Updated



##########
crates/iceberg/src/spec/manifest_list.rs:
##########
@@ -49,77 +57,128 @@ impl ManifestList {
         partition_type: &StructType,
     ) -> Result<ManifestList, Error> {
         match version {
-            FormatVersion::V2 => {
-                let schema = schema_to_avro_schema("manifest_list", 
&Self::v2_schema()).unwrap();
-                let reader = Reader::with_schema(&schema, bs)?;
-                let values = Value::Array(reader.collect::<Result<Vec<Value>, 
_>>()?);
-                
from_value::<_serde::ManifestListV2>(&values)?.try_into(partition_type)
-            }
             FormatVersion::V1 => {
-                let schema = schema_to_avro_schema("manifest_list", 
&Self::v1_schema()).unwrap();
-                let reader = Reader::with_schema(&schema, bs)?;
+                let reader = 
Reader::with_schema(&MANIFEST_LIST_AVRO_SCHEMA_V1, bs)?;
                 let values = Value::Array(reader.collect::<Result<Vec<Value>, 
_>>()?);
                 
from_value::<_serde::ManifestListV1>(&values)?.try_into(partition_type)
             }
+            FormatVersion::V2 => {
+                let reader = 
Reader::with_schema(&MANIFEST_LIST_AVRO_SCHEMA_V2, bs)?;
+                let values = Value::Array(reader.collect::<Result<Vec<Value>, 
_>>()?);
+                
from_value::<_serde::ManifestListV2>(&values)?.try_into(partition_type)
+            }
         }
     }
 
     /// Get the entries in the manifest list.
     pub fn entries(&self) -> &[ManifestListEntry] {
         &self.entries
     }
+}
 
-    /// Get the v2 schema of the manifest list entry.
-    pub(crate) fn v2_schema() -> Schema {
-        let fields = vec![
-            _const_fields::MANIFEST_PATH.clone(),
-            _const_fields::MANIFEST_LENGTH.clone(),
-            _const_fields::PARTITION_SPEC_ID.clone(),
-            _const_fields::CONTENT.clone(),
-            _const_fields::SEQUENCE_NUMBER.clone(),
-            _const_fields::MIN_SEQUENCE_NUMBER.clone(),
-            _const_fields::ADDED_SNAPSHOT_ID.clone(),
-            _const_fields::ADDED_FILES_COUNT_V2.clone(),
-            _const_fields::EXISTING_FILES_COUNT_V2.clone(),
-            _const_fields::DELETED_FILES_COUNT_V2.clone(),
-            _const_fields::ADDED_ROWS_COUNT_V2.clone(),
-            _const_fields::EXISTING_ROWS_COUNT_V2.clone(),
-            _const_fields::DELETED_ROWS_COUNT_V2.clone(),
-            _const_fields::PARTITIONS.clone(),
-            _const_fields::KEY_METADATA.clone(),
-        ];
-        Schema::builder().with_fields(fields).build().unwrap()
+/// A manifest list writer.
+pub struct ManifestListWriter {
+    format_version: FormatVersion,
+    output_file: OutputFile,
+    avro_writer: Writer<'static, Vec<u8>>,
+}
+
+impl ManifestListWriter {
+    /// Construct a v1 [`ManifestListWriter`] that writes to a provided 
[`OutputFile`].
+    pub fn v1(output_file: OutputFile, snapshot_id: i64, parent_snapshot_id: 
i64) -> Self {
+        let mut metadata = HashMap::new();

Review Comment:
   Updated



##########
crates/iceberg/src/spec/manifest_list.rs:
##########
@@ -49,77 +57,128 @@ impl ManifestList {
         partition_type: &StructType,
     ) -> Result<ManifestList, Error> {
         match version {
-            FormatVersion::V2 => {
-                let schema = schema_to_avro_schema("manifest_list", 
&Self::v2_schema()).unwrap();
-                let reader = Reader::with_schema(&schema, bs)?;
-                let values = Value::Array(reader.collect::<Result<Vec<Value>, 
_>>()?);
-                
from_value::<_serde::ManifestListV2>(&values)?.try_into(partition_type)
-            }
             FormatVersion::V1 => {
-                let schema = schema_to_avro_schema("manifest_list", 
&Self::v1_schema()).unwrap();
-                let reader = Reader::with_schema(&schema, bs)?;
+                let reader = 
Reader::with_schema(&MANIFEST_LIST_AVRO_SCHEMA_V1, bs)?;
                 let values = Value::Array(reader.collect::<Result<Vec<Value>, 
_>>()?);
                 
from_value::<_serde::ManifestListV1>(&values)?.try_into(partition_type)
             }
+            FormatVersion::V2 => {
+                let reader = 
Reader::with_schema(&MANIFEST_LIST_AVRO_SCHEMA_V2, bs)?;
+                let values = Value::Array(reader.collect::<Result<Vec<Value>, 
_>>()?);
+                
from_value::<_serde::ManifestListV2>(&values)?.try_into(partition_type)
+            }
         }
     }
 
     /// Get the entries in the manifest list.
     pub fn entries(&self) -> &[ManifestListEntry] {
         &self.entries
     }
+}
 
-    /// Get the v2 schema of the manifest list entry.
-    pub(crate) fn v2_schema() -> Schema {
-        let fields = vec![
-            _const_fields::MANIFEST_PATH.clone(),
-            _const_fields::MANIFEST_LENGTH.clone(),
-            _const_fields::PARTITION_SPEC_ID.clone(),
-            _const_fields::CONTENT.clone(),
-            _const_fields::SEQUENCE_NUMBER.clone(),
-            _const_fields::MIN_SEQUENCE_NUMBER.clone(),
-            _const_fields::ADDED_SNAPSHOT_ID.clone(),
-            _const_fields::ADDED_FILES_COUNT_V2.clone(),
-            _const_fields::EXISTING_FILES_COUNT_V2.clone(),
-            _const_fields::DELETED_FILES_COUNT_V2.clone(),
-            _const_fields::ADDED_ROWS_COUNT_V2.clone(),
-            _const_fields::EXISTING_ROWS_COUNT_V2.clone(),
-            _const_fields::DELETED_ROWS_COUNT_V2.clone(),
-            _const_fields::PARTITIONS.clone(),
-            _const_fields::KEY_METADATA.clone(),
-        ];
-        Schema::builder().with_fields(fields).build().unwrap()
+/// A manifest list writer.
+pub struct ManifestListWriter {
+    format_version: FormatVersion,
+    output_file: OutputFile,
+    avro_writer: Writer<'static, Vec<u8>>,
+}
+
+impl ManifestListWriter {
+    /// Construct a v1 [`ManifestListWriter`] that writes to a provided 
[`OutputFile`].
+    pub fn v1(output_file: OutputFile, snapshot_id: i64, parent_snapshot_id: 
i64) -> Self {
+        let mut metadata = HashMap::new();
+        metadata.insert("snapshot-id".to_string(), snapshot_id.to_string());
+        metadata.insert(
+            "parent-snapshot-id".to_string(),
+            parent_snapshot_id.to_string(),
+        );
+        metadata.insert("format-version".to_string(), "1".to_string());
+        Self::new(FormatVersion::V1, output_file, metadata)
     }
 
-    /// Get the v1 schema of the manifest list entry.
-    pub(crate) fn v1_schema() -> Schema {
-        let fields = vec![
-            _const_fields::MANIFEST_PATH.clone(),
-            _const_fields::MANIFEST_LENGTH.clone(),
-            _const_fields::PARTITION_SPEC_ID.clone(),
-            _const_fields::ADDED_SNAPSHOT_ID.clone(),
-            _const_fields::ADDED_FILES_COUNT_V1.clone().to_owned(),
-            _const_fields::EXISTING_FILES_COUNT_V1.clone(),
-            _const_fields::DELETED_FILES_COUNT_V1.clone(),
-            _const_fields::ADDED_ROWS_COUNT_V1.clone(),
-            _const_fields::EXISTING_ROWS_COUNT_V1.clone(),
-            _const_fields::DELETED_ROWS_COUNT_V1.clone(),
-            _const_fields::PARTITIONS.clone(),
-            _const_fields::KEY_METADATA.clone(),
-        ];
-        Schema::builder().with_fields(fields).build().unwrap()
+    /// Construct a v2 [`ManifestListWriter`] that writes to a provided 
[`OutputFile`].
+    pub fn v2(
+        output_file: OutputFile,
+        snapshot_id: i64,
+        parent_snapshot_id: i64,
+        sequence_number: i64,
+    ) -> Self {
+        let mut metadata = HashMap::new();
+        metadata.insert("snapshot-id".to_string(), snapshot_id.to_string());
+        metadata.insert(
+            "parent-snapshot-id".to_string(),
+            parent_snapshot_id.to_string(),
+        );
+        metadata.insert("sequence-number".to_string(), 
sequence_number.to_string());
+        metadata.insert("format-version".to_string(), "2".to_string());
+        Self::new(FormatVersion::V2, output_file, metadata)
+    }
+
+    fn new(
+        format_version: FormatVersion,
+        output_file: OutputFile,
+        metadata: HashMap<String, String>,
+    ) -> Self {
+        let avro_schema = match format_version {
+            FormatVersion::V1 => &MANIFEST_LIST_AVRO_SCHEMA_V1,
+            FormatVersion::V2 => &MANIFEST_LIST_AVRO_SCHEMA_V2,
+        };
+        let mut avro_writer = Writer::new(avro_schema, Vec::new());
+        for (key, value) in metadata {
+            avro_writer.add_user_metadata(key, value).unwrap();

Review Comment:
   Updated



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org
For additional commands, e-mail: issues-h...@iceberg.apache.org

Reply via email to