Fokko commented on code in PR #76:
URL: https://github.com/apache/iceberg-rust/pull/76#discussion_r1360636839


##########
crates/iceberg/src/spec/manifest_list.rs:
##########
@@ -940,4 +1025,108 @@ mod test {
             
r#"[{"manifest_path":"s3a://icebergdata/demo/s1/t1/metadata/05ffe08b-810f-49b3-a8f4-e88fc99b254a-m0.avro","manifest_length":6926,"partition_spec_id":0,"content":0,"sequence_number":1,"min_sequence_number":1,"added_snapshot_id":377075049360453639,"added_data_files_count":1,"existing_data_files_count":0,"deleted_data_files_count":0,"added_rows_count":3,"existing_rows_count":0,"deleted_rows_count":0,"partitions":[{"contains_null":false,"contains_nan":false,"lower_bound":[1,0,0,0,0,0,0,0],"upper_bound":[1,0,0,0,0,0,0,0]}],"key_metadata":null}]"#
         );
     }
+
+    #[tokio::test]
+    async fn test_manifest_list_writer_v1() {
+        let expected_manifest_list = ManifestList {
+            entries: vec![ManifestListEntry {
+                manifest_path: 
"/opt/bitnami/spark/warehouse/db/table/metadata/10d28031-9739-484c-92db-cdf2975cead4-m0.avro".to_string(),
+                manifest_length: 5806,
+                partition_spec_id: 0,
+                content: ManifestContentType::Data,
+                sequence_number: 0,
+                min_sequence_number: 0,
+                added_snapshot_id: 1646658105718557341,
+                added_data_files_count: Some(3),
+                existing_data_files_count: Some(0),
+                deleted_data_files_count: Some(0),
+                added_rows_count: Some(3),
+                existing_rows_count: Some(0),
+                deleted_rows_count: Some(0),
+                partitions: vec![FieldSummary { contains_null: false, 
contains_nan: Some(false), lower_bound: Some(Literal::long(1)), upper_bound: 
Some(Literal::long(1))}],

Review Comment:
   It is not wrong, but might be confusing. I see partition information here, 
but the `partition_spec_id=0` which is reserved for unpartitioned.



##########
crates/iceberg/src/spec/manifest_list.rs:
##########
@@ -940,4 +1025,108 @@ mod test {
             
r#"[{"manifest_path":"s3a://icebergdata/demo/s1/t1/metadata/05ffe08b-810f-49b3-a8f4-e88fc99b254a-m0.avro","manifest_length":6926,"partition_spec_id":0,"content":0,"sequence_number":1,"min_sequence_number":1,"added_snapshot_id":377075049360453639,"added_data_files_count":1,"existing_data_files_count":0,"deleted_data_files_count":0,"added_rows_count":3,"existing_rows_count":0,"deleted_rows_count":0,"partitions":[{"contains_null":false,"contains_nan":false,"lower_bound":[1,0,0,0,0,0,0,0],"upper_bound":[1,0,0,0,0,0,0,0]}],"key_metadata":null}]"#
         );
     }
+
+    #[tokio::test]
+    async fn test_manifest_list_writer_v1() {
+        let expected_manifest_list = ManifestList {
+            entries: vec![ManifestListEntry {
+                manifest_path: 
"/opt/bitnami/spark/warehouse/db/table/metadata/10d28031-9739-484c-92db-cdf2975cead4-m0.avro".to_string(),
+                manifest_length: 5806,
+                partition_spec_id: 0,
+                content: ManifestContentType::Data,
+                sequence_number: 0,
+                min_sequence_number: 0,
+                added_snapshot_id: 1646658105718557341,
+                added_data_files_count: Some(3),
+                existing_data_files_count: Some(0),
+                deleted_data_files_count: Some(0),
+                added_rows_count: Some(3),
+                existing_rows_count: Some(0),
+                deleted_rows_count: Some(0),
+                partitions: vec![FieldSummary { contains_null: false, 
contains_nan: Some(false), lower_bound: Some(Literal::long(1)), upper_bound: 
Some(Literal::long(1))}],
+                key_metadata: vec![],
+            }]
+        };
+
+        let temp_dir = TempDir::new("manifest_list_v1").unwrap();
+        let path = temp_dir.path().join("manifest_list_v1.avro");
+        let io = FileIOBuilder::new_fs_io().build().unwrap();
+        let output_file = io.new_output(path.to_str().unwrap()).unwrap();
+
+        let mut metadata = HashMap::new();
+        metadata.insert(String::from("format-version"), String::from("1"));
+        let mut writer =
+            ManifestListWriter::new(output_file, 
crate::spec::FormatVersion::V1, metadata);
+        writer
+            
.add_manifest_entries(expected_manifest_list.entries.clone().into_iter())
+            .unwrap();
+        writer.close().await.unwrap();
+
+        let bs = fs::read(path).unwrap();
+        let manifest_list = ManifestList::parse_with_version(
+            &bs,
+            crate::spec::FormatVersion::V1,
+            &StructType::new(vec![Arc::new(NestedField::required(
+                1,
+                "test",
+                Type::Primitive(PrimitiveType::Long),
+            ))]),
+        )
+        .unwrap();
+        assert_eq!(manifest_list, expected_manifest_list);
+
+        temp_dir.close().unwrap();
+    }
+
+    #[tokio::test]
+    async fn test_manifest_list_writer_v2() {
+        let expected_manifest_list = ManifestList {
+            entries: vec![ManifestListEntry {
+                manifest_path: 
"s3a://icebergdata/demo/s1/t1/metadata/05ffe08b-810f-49b3-a8f4-e88fc99b254a-m0.avro".to_string(),
+                manifest_length: 6926,
+                partition_spec_id: 0,
+                content: ManifestContentType::Data,
+                sequence_number: 1,
+                min_sequence_number: 1,
+                added_snapshot_id: 377075049360453639,
+                added_data_files_count: Some(1),
+                existing_data_files_count: Some(0),
+                deleted_data_files_count: Some(0),
+                added_rows_count: Some(3),
+                existing_rows_count: Some(0),
+                deleted_rows_count: Some(0),
+                partitions: vec![FieldSummary { contains_null: false, 
contains_nan: Some(false), lower_bound: Some(Literal::long(1)), upper_bound: 
Some(Literal::long(1))}],
+                key_metadata: vec![],
+            }]
+        };
+
+        let temp_dir = TempDir::new("manifest_list_v2").unwrap();
+        let path = temp_dir.path().join("manifest_list_v2.avro");
+        let io = FileIOBuilder::new_fs_io().build().unwrap();
+        let output_file = io.new_output(path.to_str().unwrap()).unwrap();
+
+        let mut metadata = HashMap::new();
+        metadata.insert(String::from("format-version"), String::from("2"));

Review Comment:
   Since we always want to write this metadata, shouldn't it be part of the 
API? (We also pass in `crate::spec::FormatVersion::V2` below, couldn't we set 
the version based on that?) 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org
For additional commands, e-mail: issues-h...@iceberg.apache.org

Reply via email to