liurenjie1024 commented on code in PR #797:
URL: https://github.com/apache/iceberg-rust/pull/797#discussion_r1899889761


##########
crates/iceberg/src/spec/manifest.rs:
##########
@@ -656,6 +656,38 @@ mod _const_schema {
         })
     };
 
+    fn data_file_fields_v2(partition_type: StructType) -> Vec<NestedFieldRef> {

Review Comment:
   ```suggestion
       fn data_file_fields_v2(partition_type: &StructType) -> 
Vec<NestedFieldRef> {
   ```



##########
crates/iceberg/src/spec/manifest.rs:
##########
@@ -656,6 +656,38 @@ mod _const_schema {
         })
     };
 
+    fn data_file_fields_v2(partition_type: StructType) -> Vec<NestedFieldRef> {
+        vec![
+            CONTENT.clone(),
+            FILE_PATH.clone(),
+            FILE_FORMAT.clone(),
+            Arc::new(NestedField::required(
+                102,
+                "partition",
+                Type::Struct(partition_type),
+            )),
+            RECORD_COUNT.clone(),
+            FILE_SIZE_IN_BYTES.clone(),
+            COLUMN_SIZES.clone(),
+            VALUE_COUNTS.clone(),
+            NULL_VALUE_COUNTS.clone(),
+            NAN_VALUE_COUNTS.clone(),
+            LOWER_BOUNDS.clone(),
+            UPPER_BOUNDS.clone(),
+            KEY_METADATA.clone(),
+            SPLIT_OFFSETS.clone(),
+            EQUALITY_IDS.clone(),
+            SORT_ORDER_ID.clone(),
+        ]
+    }
+
+    pub(super) fn data_file_schema_v2(partition_type: StructType) -> 
Result<AvroSchema, Error> {

Review Comment:
   ```suggestion
       pub(super) fn data_file_schema_v2(partition_type: &StructType) -> 
Result<AvroSchema, Error> {
   ```



##########
crates/iceberg/src/spec/manifest.rs:
##########
@@ -1189,6 +1211,47 @@ impl DataFile {
         self.sort_order_id
     }
 }
+
+/// Convert data files to avro bytes.
+pub fn data_files_to_avro(

Review Comment:
   To maintain api consistency, I would suggest to have sth like following:
   
   ```rust
   /// comments
   pub struct DataFileWriter<W> {
     writer: W,
     version: FormatVersion,
     partition_type: &StructType
   }
   
   impl<W> DataFileWriter<W> {
     pub fn v1_writer(w: W, partition_type: &StructType) {
     }
   }
   
   impl<W: Write> DataFileWriter {
     pub fn append(data_file: &DataFile) { ... }
     pub fn fluse(mut self) {...}
   }
   ```
   
   It would be better to have similar data structures for reader. 



##########
crates/iceberg/src/spec/manifest.rs:
##########
@@ -665,62 +697,52 @@ mod _const_schema {
             Arc::new(NestedField::required(
                 2,
                 "data_file",
-                Type::Struct(StructType::new(vec![
-                    CONTENT.clone(),
-                    FILE_PATH.clone(),
-                    FILE_FORMAT.clone(),
-                    Arc::new(NestedField::required(
-                        102,
-                        "partition",
-                        Type::Struct(partition_type),
-                    )),
-                    RECORD_COUNT.clone(),
-                    FILE_SIZE_IN_BYTES.clone(),
-                    COLUMN_SIZES.clone(),
-                    VALUE_COUNTS.clone(),
-                    NULL_VALUE_COUNTS.clone(),
-                    NAN_VALUE_COUNTS.clone(),
-                    LOWER_BOUNDS.clone(),
-                    UPPER_BOUNDS.clone(),
-                    KEY_METADATA.clone(),
-                    SPLIT_OFFSETS.clone(),
-                    EQUALITY_IDS.clone(),
-                    SORT_ORDER_ID.clone(),
-                ])),
+                
Type::Struct(StructType::new(data_file_fields_v2(partition_type))),
             )),
         ];
         let schema = Schema::builder().with_fields(fields).build()?;
         schema_to_avro_schema("manifest_entry", &schema)
     }
 
+    fn data_file_fields_v1(partition_type: StructType) -> Vec<NestedFieldRef> {

Review Comment:
   ```suggestion
       fn data_file_fields_v1(partition_type: &StructType) -> 
Vec<NestedFieldRef> {
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org
For additional commands, e-mail: issues-h...@iceberg.apache.org

Reply via email to