liurenjie1024 commented on code in PR #1207:
URL: https://github.com/apache/iceberg-rust/pull/1207#discussion_r2081297329


##########
crates/iceberg/src/puffin/metadata.rs:
##########
@@ -285,7 +305,67 @@ impl FileMetadata {
 
         let footer_payload_str =
             FileMetadata::extract_footer_payload_as_str(&footer_bytes, 
footer_payload_length)?;
+
         FileMetadata::from_json_str(&footer_payload_str)
+        //

Review Comment:
   Remove this?



##########
crates/iceberg/src/puffin/metadata.rs:
##########
@@ -264,6 +280,10 @@ impl FileMetadata {
     }
 
     /// Returns the file metadata about a Puffin file
+    ///
+    /// `prefetch_hint` is used to try to fetch the entire footer in one read. 
If
+    /// the entire footer isn't fetched in one read the function will call the 
`read_no_prefetch`
+    /// option.

Review Comment:
   Move this `read_with_prefetch`?



##########
crates/iceberg/src/puffin/metadata.rs:
##########
@@ -285,7 +305,67 @@ impl FileMetadata {
 
         let footer_payload_str =
             FileMetadata::extract_footer_payload_as_str(&footer_bytes, 
footer_payload_length)?;
+
         FileMetadata::from_json_str(&footer_payload_str)
+        //
+    }
+
+    #[allow(dead_code)]
+    pub(crate) async fn read_with_prefetch(
+        input_file: &InputFile,
+        prefetch_hint: u8,

Review Comment:
   It would be better to explain the meaning of this argument in doc.



##########
crates/iceberg/src/puffin/metadata.rs:
##########
@@ -285,7 +305,67 @@ impl FileMetadata {
 
         let footer_payload_str =
             FileMetadata::extract_footer_payload_as_str(&footer_bytes, 
footer_payload_length)?;
+
         FileMetadata::from_json_str(&footer_payload_str)
+        //
+    }
+
+    #[allow(dead_code)]
+    pub(crate) async fn read_with_prefetch(
+        input_file: &InputFile,
+        prefetch_hint: u8,
+    ) -> Result<FileMetadata> {
+        if prefetch_hint > 16 {
+            let input_file_length = input_file.metadata().await?.size;
+            let file_read = input_file.reader().await?;
+
+            // Hint cannot be larger than input file
+            if prefetch_hint as u64 > input_file_length {
+                return FileMetadata::read(input_file).await;
+            }
+
+            // Read footer based on prefetchi hint
+            let start = input_file_length - prefetch_hint as u64;
+            let end = input_file_length;
+            let footer_bytes = file_read.read(start..end).await?;
+
+            let payload_length_start =
+                footer_bytes.len() - (FileMetadata::FOOTER_STRUCT_LENGTH as 
usize);
+            let payload_length_end =
+                payload_length_start + 
(FileMetadata::FOOTER_STRUCT_PAYLOAD_LENGTH_LENGTH as usize);
+            let payload_length_bytes = 
&footer_bytes[payload_length_start..payload_length_end];
+
+            let mut buf = [0; 4];
+            buf.copy_from_slice(payload_length_bytes);
+            let footer_payload_length = u32::from_le_bytes(buf);
+
+            // If the (footer payload length + FOOTER_STRUCT_LENGTH + 
MAGIC_LENGTH) is greater
+            // than the fetched footer then you can have it read regularly 
from a read with no
+            // prefetch while passing in the footer_payload_length.
+            let footer_length = (footer_payload_length as usize)
+                + FileMetadata::FOOTER_STRUCT_LENGTH as usize
+                + FileMetadata::MAGIC_LENGTH as usize;
+            if footer_length > prefetch_hint as usize {
+                return FileMetadata::read(input_file).await;

Review Comment:
   Ditto.



##########
crates/iceberg/src/puffin/metadata.rs:
##########
@@ -285,7 +305,67 @@ impl FileMetadata {
 
         let footer_payload_str =
             FileMetadata::extract_footer_payload_as_str(&footer_bytes, 
footer_payload_length)?;
+
         FileMetadata::from_json_str(&footer_payload_str)
+        //
+    }
+
+    #[allow(dead_code)]
+    pub(crate) async fn read_with_prefetch(
+        input_file: &InputFile,
+        prefetch_hint: u8,
+    ) -> Result<FileMetadata> {
+        if prefetch_hint > 16 {
+            let input_file_length = input_file.metadata().await?.size;
+            let file_read = input_file.reader().await?;
+
+            // Hint cannot be larger than input file
+            if prefetch_hint as u64 > input_file_length {
+                return FileMetadata::read(input_file).await;

Review Comment:
   It would be better to add some warning log here.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org
For additional commands, e-mail: issues-h...@iceberg.apache.org

Reply via email to