Fokko commented on code in PR #118: URL: https://github.com/apache/iceberg-rust/pull/118#discussion_r1426366787
########## crates/iceberg/src/spec/manifest.rs: ########## @@ -1311,511 +1311,452 @@ mod tests { use crate::spec::Type; use std::sync::Arc; - #[test] - fn test_parse_manifest_v2_unpartition() { - let path = format!( - "{}/testdata/unpartition_manifest_v2.avro", - env!("CARGO_MANIFEST_DIR") - ); - let bs = fs::read(path).expect("read_file must succeed"); - let manifest = Manifest::parse_avro(bs.as_slice()).unwrap(); - // test metadata - assert!(manifest.metadata.schema_id == 0); - assert_eq!(manifest.metadata.schema, { - let fields = vec![ - // id v_int v_long v_float v_double v_varchar v_bool v_date v_timestamp v_decimal v_ts_ntz - Arc::new(NestedField::optional( - 1, - "id", - Type::Primitive(PrimitiveType::Long), - )), - Arc::new(NestedField::optional( - 2, - "v_int", - Type::Primitive(PrimitiveType::Int), - )), - Arc::new(NestedField::optional( - 3, - "v_long", - Type::Primitive(PrimitiveType::Long), - )), - Arc::new(NestedField::optional( - 4, - "v_float", - Type::Primitive(PrimitiveType::Float), - )), - Arc::new(NestedField::optional( - 5, - "v_double", - Type::Primitive(PrimitiveType::Double), - )), - Arc::new(NestedField::optional( - 6, - "v_varchar", - Type::Primitive(PrimitiveType::String), - )), - Arc::new(NestedField::optional( - 7, - "v_bool", - Type::Primitive(PrimitiveType::Boolean), - )), - Arc::new(NestedField::optional( - 8, - "v_date", - Type::Primitive(PrimitiveType::Date), - )), - Arc::new(NestedField::optional( - 9, - "v_timestamp", - Type::Primitive(PrimitiveType::Timestamptz), - )), - Arc::new(NestedField::optional( - 10, - "v_decimal", - Type::Primitive(PrimitiveType::Decimal { - precision: 36, - scale: 10, - }), - )), - Arc::new(NestedField::optional( - 11, - "v_ts_ntz", - Type::Primitive(PrimitiveType::Timestamp), - )), - ]; - Schema::builder().with_fields(fields).build().unwrap() - }); - assert!(manifest.metadata.partition_spec.fields.is_empty()); - assert!(manifest.metadata.content == ManifestContentType::Data); - assert!(manifest.metadata.format_version == FormatVersion::V2); - // test entries - assert!(manifest.entries.len() == 1); - let entry = &manifest.entries[0]; - assert!(entry.status == ManifestStatus::Added); - assert!(entry.snapshot_id == Some(0)); - assert!(entry.sequence_number == Some(1)); - assert!(entry.file_sequence_number == Some(1)); - assert_eq!( - entry.data_file, - DataFile { - content: DataContentType::Data, - file_path: "s3a://icebergdata/demo/s1/t1/data/00000-0-ba56fbfa-f2ff-40c9-bb27-565ad6dc2be8-00000.parquet".to_string(), - file_format: DataFileFormat::Parquet, - partition: Struct::empty(), - record_count: 1, - file_size_in_bytes: 5442, - column_sizes: HashMap::from([(0,73),(6,34),(2,73),(7,61),(3,61),(5,62),(9,79),(10,73),(1,61),(4,73),(8,73)]), - value_counts: HashMap::from([(4,1),(5,1),(2,1),(0,1),(3,1),(6,1),(8,1),(1,1),(10,1),(7,1),(9,1)]), - null_value_counts: HashMap::from([(1,0),(6,0),(2,0),(8,0),(0,0),(3,0),(5,0),(9,0),(7,0),(4,0),(10,0)]), - nan_value_counts: HashMap::new(), - lower_bounds: HashMap::new(), - upper_bounds: HashMap::new(), - key_metadata: Vec::new(), - split_offsets: vec![4], - equality_ids: Vec::new(), - sort_order_id: None, - } - ); + #[tokio::test] + async fn test_parse_manifest_v2_unpartition() { + let manifest = Manifest { + metadata: ManifestMetadata { + schema_id: 0, + schema: Schema::builder() + .with_fields(vec![ + // id v_int v_long v_float v_double v_varchar v_bool v_date v_timestamp v_decimal v_ts_ntz + Arc::new(NestedField::optional( + 1, + "id", + Type::Primitive(PrimitiveType::Long), + )), + Arc::new(NestedField::optional( + 2, + "v_int", + Type::Primitive(PrimitiveType::Int), + )), + Arc::new(NestedField::optional( + 3, + "v_long", + Type::Primitive(PrimitiveType::Long), + )), + Arc::new(NestedField::optional( + 4, + "v_float", + Type::Primitive(PrimitiveType::Float), + )), + Arc::new(NestedField::optional( + 5, + "v_double", + Type::Primitive(PrimitiveType::Double), + )), + Arc::new(NestedField::optional( + 6, + "v_varchar", + Type::Primitive(PrimitiveType::String), + )), + Arc::new(NestedField::optional( + 7, + "v_bool", + Type::Primitive(PrimitiveType::Boolean), + )), + Arc::new(NestedField::optional( + 8, + "v_date", + Type::Primitive(PrimitiveType::Date), + )), + Arc::new(NestedField::optional( + 9, + "v_timestamp", + Type::Primitive(PrimitiveType::Timestamptz), + )), + Arc::new(NestedField::optional( + 10, + "v_decimal", + Type::Primitive(PrimitiveType::Decimal { + precision: 36, + scale: 10, + }), + )), + Arc::new(NestedField::optional( + 11, + "v_ts_ntz", + Type::Primitive(PrimitiveType::Timestamp), + )), + ]) + .build() + .unwrap(), + partition_spec: PartitionSpec { + spec_id: 0, + fields: vec![], + }, + content: ManifestContentType::Data, + format_version: FormatVersion::V2, + }, + entries: vec![ + ManifestEntry { + status: ManifestStatus::Added, + snapshot_id: Some(0), + sequence_number: Some(1), + file_sequence_number: Some(1), Review Comment: Typically when a new manifest-entry is being added, the snapshot-id and sequence-numbers are not set and inherited from the snapshot itself. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org