liurenjie1024 commented on code in PR #797: URL: https://github.com/apache/iceberg-rust/pull/797#discussion_r1899889761
########## crates/iceberg/src/spec/manifest.rs: ########## @@ -656,6 +656,38 @@ mod _const_schema { }) }; + fn data_file_fields_v2(partition_type: StructType) -> Vec<NestedFieldRef> { Review Comment: ```suggestion fn data_file_fields_v2(partition_type: &StructType) -> Vec<NestedFieldRef> { ``` ########## crates/iceberg/src/spec/manifest.rs: ########## @@ -656,6 +656,38 @@ mod _const_schema { }) }; + fn data_file_fields_v2(partition_type: StructType) -> Vec<NestedFieldRef> { + vec![ + CONTENT.clone(), + FILE_PATH.clone(), + FILE_FORMAT.clone(), + Arc::new(NestedField::required( + 102, + "partition", + Type::Struct(partition_type), + )), + RECORD_COUNT.clone(), + FILE_SIZE_IN_BYTES.clone(), + COLUMN_SIZES.clone(), + VALUE_COUNTS.clone(), + NULL_VALUE_COUNTS.clone(), + NAN_VALUE_COUNTS.clone(), + LOWER_BOUNDS.clone(), + UPPER_BOUNDS.clone(), + KEY_METADATA.clone(), + SPLIT_OFFSETS.clone(), + EQUALITY_IDS.clone(), + SORT_ORDER_ID.clone(), + ] + } + + pub(super) fn data_file_schema_v2(partition_type: StructType) -> Result<AvroSchema, Error> { Review Comment: ```suggestion pub(super) fn data_file_schema_v2(partition_type: &StructType) -> Result<AvroSchema, Error> { ``` ########## crates/iceberg/src/spec/manifest.rs: ########## @@ -1189,6 +1211,47 @@ impl DataFile { self.sort_order_id } } + +/// Convert data files to avro bytes. +pub fn data_files_to_avro( Review Comment: To maintain api consistency, I would suggest to have sth like following: ```rust /// comments pub struct DataFileWriter<W> { writer: W, version: FormatVersion, partition_type: &StructType } impl<W> DataFileWriter<W> { pub fn v1_writer(w: W, partition_type: &StructType) { } } impl<W: Write> DataFileWriter { pub fn append(data_file: &DataFile) { ... } pub fn fluse(mut self) {...} } ``` It would be better to have similar data structures for reader. ########## crates/iceberg/src/spec/manifest.rs: ########## @@ -665,62 +697,52 @@ mod _const_schema { Arc::new(NestedField::required( 2, "data_file", - Type::Struct(StructType::new(vec![ - CONTENT.clone(), - FILE_PATH.clone(), - FILE_FORMAT.clone(), - Arc::new(NestedField::required( - 102, - "partition", - Type::Struct(partition_type), - )), - RECORD_COUNT.clone(), - FILE_SIZE_IN_BYTES.clone(), - COLUMN_SIZES.clone(), - VALUE_COUNTS.clone(), - NULL_VALUE_COUNTS.clone(), - NAN_VALUE_COUNTS.clone(), - LOWER_BOUNDS.clone(), - UPPER_BOUNDS.clone(), - KEY_METADATA.clone(), - SPLIT_OFFSETS.clone(), - EQUALITY_IDS.clone(), - SORT_ORDER_ID.clone(), - ])), + Type::Struct(StructType::new(data_file_fields_v2(partition_type))), )), ]; let schema = Schema::builder().with_fields(fields).build()?; schema_to_avro_schema("manifest_entry", &schema) } + fn data_file_fields_v1(partition_type: StructType) -> Vec<NestedFieldRef> { Review Comment: ```suggestion fn data_file_fields_v1(partition_type: &StructType) -> Vec<NestedFieldRef> { ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org