liurenjie1024 commented on code in PR #118:
URL: https://github.com/apache/iceberg-rust/pull/118#discussion_r1426434871
##########
crates/iceberg/src/spec/manifest.rs:
##########
@@ -1311,511 +1311,452 @@ mod tests {
use crate::spec::Type;
use std::sync::Arc;
- #[test]
- fn test_parse_manifest_v2_unpartition() {
- let path = format!(
- "{}/testdata/unpartition_manifest_v2.avro",
- env!("CARGO_MANIFEST_DIR")
- );
- let bs = fs::read(path).expect("read_file must succeed");
- let manifest = Manifest::parse_avro(bs.as_slice()).unwrap();
- // test metadata
- assert!(manifest.metadata.schema_id == 0);
- assert_eq!(manifest.metadata.schema, {
- let fields = vec![
- // id v_int v_long v_float v_double v_varchar v_bool v_date
v_timestamp v_decimal v_ts_ntz
- Arc::new(NestedField::optional(
- 1,
- "id",
- Type::Primitive(PrimitiveType::Long),
- )),
- Arc::new(NestedField::optional(
- 2,
- "v_int",
- Type::Primitive(PrimitiveType::Int),
- )),
- Arc::new(NestedField::optional(
- 3,
- "v_long",
- Type::Primitive(PrimitiveType::Long),
- )),
- Arc::new(NestedField::optional(
- 4,
- "v_float",
- Type::Primitive(PrimitiveType::Float),
- )),
- Arc::new(NestedField::optional(
- 5,
- "v_double",
- Type::Primitive(PrimitiveType::Double),
- )),
- Arc::new(NestedField::optional(
- 6,
- "v_varchar",
- Type::Primitive(PrimitiveType::String),
- )),
- Arc::new(NestedField::optional(
- 7,
- "v_bool",
- Type::Primitive(PrimitiveType::Boolean),
- )),
- Arc::new(NestedField::optional(
- 8,
- "v_date",
- Type::Primitive(PrimitiveType::Date),
- )),
- Arc::new(NestedField::optional(
- 9,
- "v_timestamp",
- Type::Primitive(PrimitiveType::Timestamptz),
- )),
- Arc::new(NestedField::optional(
- 10,
- "v_decimal",
- Type::Primitive(PrimitiveType::Decimal {
- precision: 36,
- scale: 10,
- }),
- )),
- Arc::new(NestedField::optional(
- 11,
- "v_ts_ntz",
- Type::Primitive(PrimitiveType::Timestamp),
- )),
- ];
- Schema::builder().with_fields(fields).build().unwrap()
- });
- assert!(manifest.metadata.partition_spec.fields.is_empty());
- assert!(manifest.metadata.content == ManifestContentType::Data);
- assert!(manifest.metadata.format_version == FormatVersion::V2);
- // test entries
- assert!(manifest.entries.len() == 1);
- let entry = &manifest.entries[0];
- assert!(entry.status == ManifestStatus::Added);
- assert!(entry.snapshot_id == Some(0));
- assert!(entry.sequence_number == Some(1));
- assert!(entry.file_sequence_number == Some(1));
- assert_eq!(
- entry.data_file,
- DataFile {
- content: DataContentType::Data,
- file_path:
"s3a://icebergdata/demo/s1/t1/data/00000-0-ba56fbfa-f2ff-40c9-bb27-565ad6dc2be8-00000.parquet".to_string(),
- file_format: DataFileFormat::Parquet,
- partition: Struct::empty(),
- record_count: 1,
- file_size_in_bytes: 5442,
- column_sizes:
HashMap::from([(0,73),(6,34),(2,73),(7,61),(3,61),(5,62),(9,79),(10,73),(1,61),(4,73),(8,73)]),
- value_counts:
HashMap::from([(4,1),(5,1),(2,1),(0,1),(3,1),(6,1),(8,1),(1,1),(10,1),(7,1),(9,1)]),
- null_value_counts:
HashMap::from([(1,0),(6,0),(2,0),(8,0),(0,0),(3,0),(5,0),(9,0),(7,0),(4,0),(10,0)]),
- nan_value_counts: HashMap::new(),
- lower_bounds: HashMap::new(),
- upper_bounds: HashMap::new(),
- key_metadata: Vec::new(),
- split_offsets: vec![4],
- equality_ids: Vec::new(),
- sort_order_id: None,
- }
- );
+ #[tokio::test]
+ async fn test_parse_manifest_v2_unpartition() {
+ let manifest = Manifest {
+ metadata: ManifestMetadata {
+ schema_id: 0,
+ schema: Schema::builder()
+ .with_fields(vec![
+ // id v_int v_long v_float v_double v_varchar v_bool
v_date v_timestamp v_decimal v_ts_ntz
+ Arc::new(NestedField::optional(
+ 1,
+ "id",
+ Type::Primitive(PrimitiveType::Long),
+ )),
+ Arc::new(NestedField::optional(
+ 2,
+ "v_int",
+ Type::Primitive(PrimitiveType::Int),
+ )),
+ Arc::new(NestedField::optional(
+ 3,
+ "v_long",
+ Type::Primitive(PrimitiveType::Long),
+ )),
+ Arc::new(NestedField::optional(
+ 4,
+ "v_float",
+ Type::Primitive(PrimitiveType::Float),
+ )),
+ Arc::new(NestedField::optional(
+ 5,
+ "v_double",
+ Type::Primitive(PrimitiveType::Double),
+ )),
+ Arc::new(NestedField::optional(
+ 6,
+ "v_varchar",
+ Type::Primitive(PrimitiveType::String),
+ )),
+ Arc::new(NestedField::optional(
+ 7,
+ "v_bool",
+ Type::Primitive(PrimitiveType::Boolean),
+ )),
+ Arc::new(NestedField::optional(
+ 8,
+ "v_date",
+ Type::Primitive(PrimitiveType::Date),
+ )),
+ Arc::new(NestedField::optional(
+ 9,
+ "v_timestamp",
+ Type::Primitive(PrimitiveType::Timestamptz),
+ )),
+ Arc::new(NestedField::optional(
+ 10,
+ "v_decimal",
+ Type::Primitive(PrimitiveType::Decimal {
+ precision: 36,
+ scale: 10,
+ }),
+ )),
+ Arc::new(NestedField::optional(
+ 11,
+ "v_ts_ntz",
+ Type::Primitive(PrimitiveType::Timestamp),
+ )),
+ ])
+ .build()
+ .unwrap(),
+ partition_spec: PartitionSpec {
+ spec_id: 0,
+ fields: vec![],
+ },
+ content: ManifestContentType::Data,
+ format_version: FormatVersion::V2,
+ },
+ entries: vec![
+ ManifestEntry {
+ status: ManifestStatus::Added,
+ snapshot_id: Some(0),
+ sequence_number: Some(1),
+ file_sequence_number: Some(1),
Review Comment:
Thanks for check. In v2 by default it's `None`, and in v1 it should be `0`.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]