Sl1mb0 commented on issue #706:
URL: https://github.com/apache/iceberg-rust/issues/706#issuecomment-2488967779

   Sure!
   
   ```rust
   mod tests {
       use std::collections::HashMap;
       use std::io::Seek;
       use std::sync::Arc;
   
       use arrow::{
           array::Float16BufferBuilder, buffer, compute::kernels::partition, 
ipc::FieldBuilder,
       };
       use flatbuffers::FlatBufferBuilder;
       use iceberg::spec::{
           DataFile, DataFileBuilder, Datum, FormatVersion, Literal, 
ManifestStatus, NestedField,
           PartitionField, PartitionSpec, PrimitiveLiteral, PrimitiveType, 
Struct, Transform, Type,
       };
       use iceberg::transform;
   
       use super::*;
   
       #[tokio::test]
       async fn test_manifest_builder() {
           // Create a schema with a single nested field.
           let schema_fields = (0..4).map(|i| {
               Arc::new(NestedField {
                   id: i,
                   name: format!("field_{}", i),
                   required: true,
                   field_type: 
Box::new(Type::Primitive(PrimitiveType::Boolean)),
                   doc: None,
                   initial_default: None,
                   write_default: None,
               })
           });
   
           let schema = IcebergSchema::builder()
               .with_fields(schema_fields)
               .with_schema_id(0)
               .build()
               .unwrap();
   
           // Get the schema id, and create an arbitrary snapshot id.
           let schema_id = schema.schema_id();
           let snapshot_id = 0
   
           // Create a partition spec with a single partition field.
           let partition_spec = PartitionSpec::builder(&schema)
               .with_spec_id(0)
               .build()
               .unwrap();
   
           // Create the manifest metadata with `schema` and `partition_spec`.
           let metadata = ManifestMetadata::builder()
               .schema(schema)
               .schema_id(schema_id)
               .content(iceberg::spec::ManifestContentType::Data)
               .partition_spec(partition_spec)
               .format_version(FormatVersion::V2)
               .build();
   
           let partition = Struct::from_iter(vec![Some(Literal::Primitive(
               PrimitiveLiteral::Boolean(true),
           ))]);
   
           let mut column_sizes = HashMap::new();
           column_sizes.insert(0i32, 4u64);
   
           let mut lower_bounds = HashMap::new();
           lower_bounds.insert(0i32, Datum::bool(false));
   
           let mut upper_bounds = HashMap::new();
           upper_bounds.insert(1i32, Datum::bool(true));
   
           let tmp_dir = TempDir::new().unwrap();
           let file_path = tmp_dir.path().join("data.parquet");
   
           // Create an arbitrary data file.
           let data_file = DataFileBuilder::default()
               .content(iceberg::spec::DataContentType::Data)
               .file_path(file_path.to_str().unwrap().to_string())
               .file_format(iceberg::spec::DataFileFormat::Parquet)
               .column_sizes(column_sizes)
               .partition(partition)
               .equality_ids(vec![0, 1, 2, 3])
               .record_count(0)
               .file_size_in_bytes(0)
               .key_metadata(Vec::new())
               .lower_bounds(lower_bounds)
               .upper_bounds(upper_bounds)
               .build()
               .unwrap();
   
           // Create an arbitrary manifest entry with `snapshot_id` and 
`data_file`.
           let manifest_entry = ManifestEntry::builder()
               .status(ManifestStatus::Added)
               .snapshot_id(snapshot_id)
               .data_file(data_file)
               .build();
   
           // Create new `Manifest`
           let input_manifest = Manifest::new(metadata, vec![manifest_entry]);
   
           // Write the manifest to an avro file.
           let temp_dir = 
TempDir::new().unwrap().into_path().join("manifest.avro");
           let output_file = FileIOBuilder::new_fs_io()
               .build()
               .unwrap()
               .new_output(temp_dir.to_str().unwrap())
               .unwrap();
   
           let manifest_writer = ManifestWriter::new(output_file, snapshot_id, 
Vec::new());
           let manifest_list_entry = 
manifest_writer.write(input_manifest.clone()).await.unwrap();
   
           // Assert that the manifest path exists.
           let manifest_path = PathBuf::from(manifest_list_entry.manifest_path);
           assert!(manifest_path.exists());
   
           // Assert that manifest file can be parsed back into a `Manifest` 
struct.
           let buf = std::fs::read(&manifest_path).unwrap();
           let output_manifest = Manifest::parse_avro(&buf).unwrap();
   
           assert_manifest_eq(input_manifest, output_manifest);
       }
   
       /// Validates that two `Manifest`s are equal.
       ///
       /// The partial_eq implementation for `Manifest` seems to compare some 
fields structurally
       /// e
       fn assert_manifest_eq(m1: Manifest, m2: Manifest) {
           let (entries1, metadata1) = m1.into_parts();
           let (entries2, metadata2) = m2.into_parts();
           assert_eq!(entries1.len(), entries2.len());
           for (e1, e2) in entries1.into_iter().zip(entries2.into_iter()) {
               assert_manifest_entry_eq(&e1, &e2);
           }
           assert_eq!(metadata1, metadata2);
       }
   
       /// Validates that two `ManifestEntry`s are equal.
       ///
       /// Can't use the partial_eq implementation for `ManifestEntry` because 
it
       /// seems to have a bug in the equality check for partition.
       fn assert_manifest_entry_eq(e1: &ManifestEntry, e2: &ManifestEntry) {
           assert_eq!(e1.content_type(), e2.content_type());
           assert_eq!(e1.sequence_number(), e2.sequence_number());
           let file1 = e1.data_file();
           let file2 = e2.data_file();
           assert_eq!(file1.content_type(), file2.content_type());
           assert_eq!(file1.file_path(), file2.file_path());
           assert_eq!(file1.file_format(), file2.file_format());
           // seems to have a bug in the equality check for partition;
           assert_eq!(file1.partition(), file2.partition());
           assert_eq!(file1.record_count(), file2.record_count());
           assert_eq!(file1.file_size_in_bytes(), file2.file_size_in_bytes());
           assert_eq!(file1.column_sizes(), file2.column_sizes());
           assert_eq!(file1.value_counts(), file2.value_counts());
           assert_eq!(file1.null_value_counts(), file2.null_value_counts());
           assert_eq!(file1.nan_value_counts(), file2.nan_value_counts());
           assert_eq!(file1.lower_bounds(), file2.lower_bounds());
           assert_eq!(file1.upper_bounds(), file2.upper_bounds());
           assert_eq!(file1.key_metadata(), file2.key_metadata());
           assert_eq!(file1.split_offsets(), file2.split_offsets());
           assert_eq!(file1.equality_ids(), file2.equality_ids());
           assert_eq!(file1.sort_order_id(), file2.sort_order_id());
       }
   }
   ```
   


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org
For additional commands, e-mail: issues-h...@iceberg.apache.org

Reply via email to