kevinjqliu commented on code in PR #1482: URL: https://github.com/apache/iceberg-rust/pull/1482#discussion_r2188658689
########## crates/iceberg/src/avro/schema.rs: ########## @@ -92,9 +127,12 @@ impl SchemaVisitor for SchemaToAvroSchema { custom_attributes: Default::default(), }; - if !field.required { + if let Some(default) = &field.initial_default { + avro_record_field.default = Some(literal_to_json(default)?); + } else if !field.required { avro_record_field.default = Some(Value::Null); } + Review Comment: wdyt about moving this above the constructor? and set the constructors default ``` let default = if let Some(default) = &field.initial_default { Some(literal_to_json(default)?) } else if !field.required { Some(Value::Null) } else { None }; ``` ########## crates/iceberg/src/spec/manifest/_serde.rs: ########## @@ -398,4 +398,73 @@ mod tests { assert_eq!(data_files, actual_data_file); } + + #[tokio::test] + async fn test_data_file_serialize_deserialize_v1_data_on_v2_reader() { + let schema = Arc::new( + Schema::builder() + .with_fields(vec![ + Arc::new(NestedField::optional( + 1, + "v1", + Type::Primitive(PrimitiveType::Int), + )), + Arc::new(NestedField::optional( + 2, + "v2", + Type::Primitive(PrimitiveType::String), + )), + Arc::new(NestedField::optional( + 3, + "v3", + Type::Primitive(PrimitiveType::String), + )), + ]) + .build() + .unwrap(), + ); + let data_files = vec![DataFile { Review Comment: both this and the schema above are copied/pasted from the test above, maybe we can consolidate with a helper function ########## crates/iceberg/src/spec/manifest/entry.rs: ########## @@ -232,11 +232,10 @@ static FILE_SEQUENCE_NUMBER: Lazy<NestedFieldRef> = { static CONTENT: Lazy<NestedFieldRef> = { Lazy::new(|| { - Arc::new(NestedField::required( - 134, - "content", - Type::Primitive(PrimitiveType::Int), - )) + Arc::new( + NestedField::required(134, "content", Type::Primitive(PrimitiveType::Int)) + .with_initial_default(Literal::Primitive(PrimitiveLiteral::Int(0))), Review Comment: nit: maybe add a comment here about `0` representing `DataContentType::DATA ` ########## crates/iceberg/src/avro/schema.rs: ########## @@ -43,6 +43,41 @@ const MAP_LOGICAL_TYPE: &str = "map"; // This const may better to maintain in avro-rs. const LOGICAL_TYPE: &str = "logicalType"; +fn literal_to_json(literal: &crate::spec::Literal) -> Result<Value> { Review Comment: whats the reason behind serializing to json? ########## crates/iceberg/src/spec/manifest/_serde.rs: ########## @@ -398,4 +398,73 @@ mod tests { assert_eq!(data_files, actual_data_file); } + + #[tokio::test] + async fn test_data_file_serialize_deserialize_v1_data_on_v2_reader() { + let schema = Arc::new( + Schema::builder() + .with_fields(vec![ + Arc::new(NestedField::optional( + 1, + "v1", + Type::Primitive(PrimitiveType::Int), + )), + Arc::new(NestedField::optional( + 2, + "v2", + Type::Primitive(PrimitiveType::String), + )), + Arc::new(NestedField::optional( + 3, + "v3", + Type::Primitive(PrimitiveType::String), + )), + ]) + .build() + .unwrap(), Review Comment: do u know what this schema is for? i couldnt find how its used. and why is v1 int but v2/v3 string? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org