alamb commented on code in PR #7795:
URL: https://github.com/apache/arrow-rs/pull/7795#discussion_r2173197366


##########
parquet-variant/src/builder.rs:
##########
@@ -248,6 +254,45 @@ impl MetadataBuilder {
     fn metadata_size(&self) -> usize {
         self.field_names.iter().map(|k| k.len()).sum()
     }
+
+    fn finish(self) -> Vec<u8> {
+        let nkeys = self.num_field_names();
+
+        // Calculate metadata size
+        let total_dict_size: usize = self.metadata_size();
+
+        // Determine appropriate offset size based on the larger of dict size 
or total string size
+        let max_offset = std::cmp::max(total_dict_size, nkeys);
+        let offset_size = int_size(max_offset);
+
+        let offset_start = 1 + offset_size as usize;
+        let string_start = offset_start + (nkeys + 1) * offset_size as usize;
+        let metadata_size = string_start + total_dict_size;
+
+        let mut metadata = Vec::with_capacity(metadata_size);
+
+        // Write header: version=1, not sorted, with calculated offset_size
+        metadata.push(0x01 | ((offset_size - 1) << 6));
+
+        // Write dictionary size
+        write_offset(&mut metadata, nkeys, offset_size);
+
+        // Write offsets
+        let mut cur_offset = 0;
+        for key in self.field_names.iter() {
+            write_offset(&mut metadata, cur_offset, offset_size);
+            cur_offset += key.len();
+        }
+        // Write final offset
+        write_offset(&mut metadata, cur_offset, offset_size);
+
+        // Write string data
+        for key in self.field_names.iter() {

Review Comment:
   - filed https://github.com/apache/arrow-rs/issues/7814



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to