Xuanwo commented on code in PR #56:
URL: https://github.com/apache/iceberg-rust/pull/56#discussion_r1333840579


##########
crates/iceberg/src/spec/manifest_list.rs:
##########
@@ -0,0 +1,943 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! ManifestList for Iceberg.
+
+use crate::{avro::schema_to_avro_schema, spec::Literal, Error};
+use apache_avro::{from_value, types::Value, Reader};
+
+use super::{FormatVersion, Schema, StructType};
+
+/// Snapshots are embedded in table metadata, but the list of manifests for a
+/// snapshot are stored in a separate manifest list file.
+///
+/// A new manifest list is written for each attempt to commit a snapshot
+/// because the list of manifests always changes to produce a new snapshot.
+/// When a manifest list is written, the (optimistic) sequence number of the
+/// snapshot is written for all new manifest files tracked by the list.
+///
+/// A manifest list includes summary metadata that can be used to avoid
+/// scanning all of the manifests in a snapshot when planning a table scan.
+/// This includes the number of added, existing, and deleted files, and a
+/// summary of values for each field of the partition spec used to write the
+/// manifest.
+#[derive(Debug, Clone)]
+pub struct ManifestList {
+    /// Entries in a manifest list.
+    entries: Vec<ManifestListEntry>,
+}
+
+impl ManifestList {
+    /// Parse manifest list from bytes.
+    pub fn parse_with_version(
+        bs: &[u8],
+        version: FormatVersion,
+        partition_type: &StructType,
+    ) -> Result<ManifestList, Error> {
+        match version {
+            FormatVersion::V2 => {
+                let schema = schema_to_avro_schema("manifest_list", 
&Self::v2_schema()).unwrap();
+                let reader = Reader::with_schema(&schema, bs)?;
+                let values = Value::Array(reader.collect::<Result<Vec<Value>, 
_>>()?);
+                
from_value::<_serde::ManifestListV2>(&values)?.try_into(partition_type)
+            }
+            FormatVersion::V1 => {
+                let schema = schema_to_avro_schema("manifest_list", 
&Self::v1_schema()).unwrap();
+                let reader = Reader::with_schema(&schema, bs)?;
+                let values = Value::Array(reader.collect::<Result<Vec<Value>, 
_>>()?);
+                
from_value::<_serde::ManifestListV1>(&values)?.try_into(partition_type)
+            }
+        }
+    }
+
+    /// Get the entries in the manifest list.
+    pub fn entries(&self) -> &[ManifestListEntry] {
+        &self.entries
+    }
+
+    /// Get the v2 schema of the manifest list entry.
+    pub(crate) fn v2_schema() -> Schema {
+        let fields = vec![
+            _schema::MANIFEST_PATH.clone(),
+            _schema::MANIFEST_LENGTH.clone(),
+            _schema::PARTITION_SPEC_ID.clone(),
+            _schema::CONTENT.clone(),
+            _schema::SEQUENCE_NUMBER.clone(),
+            _schema::MIN_SEQUENCE_NUMBER.clone(),
+            _schema::ADDED_SNAPSHOT_ID.clone(),
+            _schema::ADDED_FILES_COUNT_V2.clone(),
+            _schema::EXISTING_FILES_COUNT_V2.clone(),
+            _schema::DELETED_FILES_COUNT_V2.clone(),
+            _schema::ADDED_ROWS_COUNT_V2.clone(),
+            _schema::EXISTING_ROWS_COUNT_V2.clone(),
+            _schema::DELETED_ROWS_COUNT_V2.clone(),
+            _schema::PARTITIONS.clone(),
+            _schema::KEY_METADATA.clone(),
+        ];
+        Schema::builder().with_fields(fields).build().unwrap()
+    }
+
+    /// Get the v1 schema of the manifest list entry.
+    pub(crate) fn v1_schema() -> Schema {
+        let fields = vec![
+            _schema::MANIFEST_PATH.clone(),
+            _schema::MANIFEST_LENGTH.clone(),
+            _schema::PARTITION_SPEC_ID.clone(),
+            _schema::ADDED_SNAPSHOT_ID.clone(),
+            _schema::ADDED_FILES_COUNT_V1.clone().to_owned(),
+            _schema::EXISTING_FILES_COUNT_V1.clone(),
+            _schema::DELETED_FILES_COUNT_V1.clone(),
+            _schema::ADDED_ROWS_COUNT_V1.clone(),
+            _schema::EXISTING_ROWS_COUNT_V1.clone(),
+            _schema::DELETED_ROWS_COUNT_V1.clone(),
+            _schema::PARTITIONS.clone(),
+            _schema::KEY_METADATA.clone(),
+        ];
+        Schema::builder().with_fields(fields).build().unwrap()
+    }
+}
+
+/// This is a helper module that defines the schema field of the manifest list 
entry.
+mod _schema {

Review Comment:
   The naming convention `_schema` seems a bit strange to me. What about using 
`fields` or `constant_fields` instead?



##########
crates/iceberg/testdata/simple_manifest_list_v1.avro:
##########


Review Comment:
   By default, ASF releases do not allow binary files. Should we generate these 
files or exclude them from the release? 
   
   cc @Fokko for comments as you are likely to be our first release manager.



##########
crates/iceberg/src/avro/mod.rs:
##########
@@ -18,3 +18,4 @@
 //! Avro related codes.
 #[allow(dead_code)]

Review Comment:
   We still allow dead code here?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to