liurenjie1024 commented on code in PR #118: URL: https://github.com/apache/iceberg-rust/pull/118#discussion_r1426434871
########## crates/iceberg/src/spec/manifest.rs: ########## @@ -1311,511 +1311,452 @@ mod tests { use crate::spec::Type; use std::sync::Arc; - #[test] - fn test_parse_manifest_v2_unpartition() { - let path = format!( - "{}/testdata/unpartition_manifest_v2.avro", - env!("CARGO_MANIFEST_DIR") - ); - let bs = fs::read(path).expect("read_file must succeed"); - let manifest = Manifest::parse_avro(bs.as_slice()).unwrap(); - // test metadata - assert!(manifest.metadata.schema_id == 0); - assert_eq!(manifest.metadata.schema, { - let fields = vec![ - // id v_int v_long v_float v_double v_varchar v_bool v_date v_timestamp v_decimal v_ts_ntz - Arc::new(NestedField::optional( - 1, - "id", - Type::Primitive(PrimitiveType::Long), - )), - Arc::new(NestedField::optional( - 2, - "v_int", - Type::Primitive(PrimitiveType::Int), - )), - Arc::new(NestedField::optional( - 3, - "v_long", - Type::Primitive(PrimitiveType::Long), - )), - Arc::new(NestedField::optional( - 4, - "v_float", - Type::Primitive(PrimitiveType::Float), - )), - Arc::new(NestedField::optional( - 5, - "v_double", - Type::Primitive(PrimitiveType::Double), - )), - Arc::new(NestedField::optional( - 6, - "v_varchar", - Type::Primitive(PrimitiveType::String), - )), - Arc::new(NestedField::optional( - 7, - "v_bool", - Type::Primitive(PrimitiveType::Boolean), - )), - Arc::new(NestedField::optional( - 8, - "v_date", - Type::Primitive(PrimitiveType::Date), - )), - Arc::new(NestedField::optional( - 9, - "v_timestamp", - Type::Primitive(PrimitiveType::Timestamptz), - )), - Arc::new(NestedField::optional( - 10, - "v_decimal", - Type::Primitive(PrimitiveType::Decimal { - precision: 36, - scale: 10, - }), - )), - Arc::new(NestedField::optional( - 11, - "v_ts_ntz", - Type::Primitive(PrimitiveType::Timestamp), - )), - ]; - Schema::builder().with_fields(fields).build().unwrap() - }); - assert!(manifest.metadata.partition_spec.fields.is_empty()); - assert!(manifest.metadata.content == ManifestContentType::Data); - assert!(manifest.metadata.format_version == FormatVersion::V2); - // test entries - assert!(manifest.entries.len() == 1); - let entry = &manifest.entries[0]; - assert!(entry.status == ManifestStatus::Added); - assert!(entry.snapshot_id == Some(0)); - assert!(entry.sequence_number == Some(1)); - assert!(entry.file_sequence_number == Some(1)); - assert_eq!( - entry.data_file, - DataFile { - content: DataContentType::Data, - file_path: "s3a://icebergdata/demo/s1/t1/data/00000-0-ba56fbfa-f2ff-40c9-bb27-565ad6dc2be8-00000.parquet".to_string(), - file_format: DataFileFormat::Parquet, - partition: Struct::empty(), - record_count: 1, - file_size_in_bytes: 5442, - column_sizes: HashMap::from([(0,73),(6,34),(2,73),(7,61),(3,61),(5,62),(9,79),(10,73),(1,61),(4,73),(8,73)]), - value_counts: HashMap::from([(4,1),(5,1),(2,1),(0,1),(3,1),(6,1),(8,1),(1,1),(10,1),(7,1),(9,1)]), - null_value_counts: HashMap::from([(1,0),(6,0),(2,0),(8,0),(0,0),(3,0),(5,0),(9,0),(7,0),(4,0),(10,0)]), - nan_value_counts: HashMap::new(), - lower_bounds: HashMap::new(), - upper_bounds: HashMap::new(), - key_metadata: Vec::new(), - split_offsets: vec![4], - equality_ids: Vec::new(), - sort_order_id: None, - } - ); + #[tokio::test] + async fn test_parse_manifest_v2_unpartition() { + let manifest = Manifest { + metadata: ManifestMetadata { + schema_id: 0, + schema: Schema::builder() + .with_fields(vec![ + // id v_int v_long v_float v_double v_varchar v_bool v_date v_timestamp v_decimal v_ts_ntz + Arc::new(NestedField::optional( + 1, + "id", + Type::Primitive(PrimitiveType::Long), + )), + Arc::new(NestedField::optional( + 2, + "v_int", + Type::Primitive(PrimitiveType::Int), + )), + Arc::new(NestedField::optional( + 3, + "v_long", + Type::Primitive(PrimitiveType::Long), + )), + Arc::new(NestedField::optional( + 4, + "v_float", + Type::Primitive(PrimitiveType::Float), + )), + Arc::new(NestedField::optional( + 5, + "v_double", + Type::Primitive(PrimitiveType::Double), + )), + Arc::new(NestedField::optional( + 6, + "v_varchar", + Type::Primitive(PrimitiveType::String), + )), + Arc::new(NestedField::optional( + 7, + "v_bool", + Type::Primitive(PrimitiveType::Boolean), + )), + Arc::new(NestedField::optional( + 8, + "v_date", + Type::Primitive(PrimitiveType::Date), + )), + Arc::new(NestedField::optional( + 9, + "v_timestamp", + Type::Primitive(PrimitiveType::Timestamptz), + )), + Arc::new(NestedField::optional( + 10, + "v_decimal", + Type::Primitive(PrimitiveType::Decimal { + precision: 36, + scale: 10, + }), + )), + Arc::new(NestedField::optional( + 11, + "v_ts_ntz", + Type::Primitive(PrimitiveType::Timestamp), + )), + ]) + .build() + .unwrap(), + partition_spec: PartitionSpec { + spec_id: 0, + fields: vec![], + }, + content: ManifestContentType::Data, + format_version: FormatVersion::V2, + }, + entries: vec![ + ManifestEntry { + status: ManifestStatus::Added, + snapshot_id: Some(0), + sequence_number: Some(1), + file_sequence_number: Some(1), Review Comment: Thanks for check. In v2 by default it's `None`, and in v1 it should be `0`. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org