fqaiser94 commented on code in PR #765:
URL: https://github.com/apache/iceberg-rust/pull/765#discussion_r1912192560


##########
crates/iceberg/src/puffin/metadata.rs:
##########
@@ -0,0 +1,777 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::collections::{HashMap, HashSet};
+
+use bytes::Bytes;
+use serde::{Deserialize, Serialize};
+
+use crate::io::{FileRead, InputFile};
+use crate::puffin::compression::CompressionCodec;
+use crate::{Error, ErrorKind, Result};
+
+/// Human-readable identification of the application writing the file, along 
with its version.
+/// Example: "Trino version 381"
+pub(crate) const CREATED_BY_PROPERTY: &str = "created-by";
+
+/// Metadata about a blob.
+/// For more information, see: 
https://iceberg.apache.org/puffin-spec/#blobmetadata
+#[derive(Debug, PartialEq, Eq, Serialize, Deserialize, Clone)]
+#[serde(rename_all = "kebab-case")]
+pub(crate) struct BlobMetadata {
+    /// See blob types: https://iceberg.apache.org/puffin-spec/#blob-types
+    pub(crate) r#type: String,
+    /// List of field IDs the blob was computed for; the order of items is 
used to compute sketches stored in the blob.
+    pub(crate) fields: Vec<i32>,
+    /// ID of the Iceberg table's snapshot the blob was computed from
+    pub(crate) snapshot_id: i64,
+    /// Sequence number of the Iceberg table's snapshot the blob was computed 
from
+    pub(crate) sequence_number: i64,
+    /// The offset in the file where the blob contents start
+    pub(crate) offset: u64,
+    /// The length of the blob stored in the file (after compression, if 
compressed)
+    pub(crate) length: usize,
+    /// The compression codec used to compress the data
+    #[serde(skip_serializing_if = "CompressionCodec::is_none")]
+    #[serde(default)]
+    pub(crate) compression_codec: CompressionCodec,
+    /// Arbitrary meta-information about the blob
+    #[serde(skip_serializing_if = "HashMap::is_empty")]
+    #[serde(default)]
+    pub(crate) properties: HashMap<String, String>,
+}
+
+#[derive(Clone, Copy, PartialEq, Eq, Hash, Debug)]
+pub(crate) enum Flag {
+    FooterPayloadCompressed = 0,
+}
+
+impl Flag {
+    pub(crate) fn byte_idx(self) -> u8 {
+        (self as u8) / 8
+    }
+
+    pub(crate) fn bit_idx(self) -> u8 {
+        (self as u8) % 8
+    }
+
+    fn matches(self, byte_idx: &u8, bit_idx: &u8) -> bool {

Review Comment:
   > It's trivial, but passing function parameters by reference for primitive 
types is not rusty.
   
   Fixed!
   
   > The spec says flags is always 32 bit(4 bytes). How about we just give Flag 
an index into u32? e.g. 0 byte 0 bit is 0, 1 byte 0 bit is 8, 1 byte 1 bit is 9
   
   I did try to implement this suggestion but I found the syntax inconvenient 
if we want to throw an error for unknown byte + bit combinations and decided 
against keep it. Cool trick though, didn't know we could do this in Rust!



##########
crates/iceberg/src/puffin/metadata.rs:
##########
@@ -0,0 +1,777 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::collections::{HashMap, HashSet};
+
+use bytes::Bytes;
+use serde::{Deserialize, Serialize};
+
+use crate::io::{FileRead, InputFile};
+use crate::puffin::compression::CompressionCodec;
+use crate::{Error, ErrorKind, Result};
+
+/// Human-readable identification of the application writing the file, along 
with its version.
+/// Example: "Trino version 381"
+pub(crate) const CREATED_BY_PROPERTY: &str = "created-by";
+
+/// Metadata about a blob.
+/// For more information, see: 
https://iceberg.apache.org/puffin-spec/#blobmetadata
+#[derive(Debug, PartialEq, Eq, Serialize, Deserialize, Clone)]
+#[serde(rename_all = "kebab-case")]
+pub(crate) struct BlobMetadata {
+    /// See blob types: https://iceberg.apache.org/puffin-spec/#blob-types
+    pub(crate) r#type: String,
+    /// List of field IDs the blob was computed for; the order of items is 
used to compute sketches stored in the blob.
+    pub(crate) fields: Vec<i32>,
+    /// ID of the Iceberg table's snapshot the blob was computed from
+    pub(crate) snapshot_id: i64,
+    /// Sequence number of the Iceberg table's snapshot the blob was computed 
from
+    pub(crate) sequence_number: i64,
+    /// The offset in the file where the blob contents start
+    pub(crate) offset: u64,
+    /// The length of the blob stored in the file (after compression, if 
compressed)
+    pub(crate) length: usize,
+    /// The compression codec used to compress the data
+    #[serde(skip_serializing_if = "CompressionCodec::is_none")]
+    #[serde(default)]
+    pub(crate) compression_codec: CompressionCodec,
+    /// Arbitrary meta-information about the blob
+    #[serde(skip_serializing_if = "HashMap::is_empty")]
+    #[serde(default)]
+    pub(crate) properties: HashMap<String, String>,
+}
+
+#[derive(Clone, Copy, PartialEq, Eq, Hash, Debug)]
+pub(crate) enum Flag {
+    FooterPayloadCompressed = 0,
+}
+
+impl Flag {
+    pub(crate) fn byte_idx(self) -> u8 {
+        (self as u8) / 8
+    }
+
+    pub(crate) fn bit_idx(self) -> u8 {
+        (self as u8) % 8
+    }
+
+    fn matches(self, byte_idx: &u8, bit_idx: &u8) -> bool {
+        &self.byte_idx() == byte_idx && &self.bit_idx() == bit_idx
+    }
+
+    fn from(byte_idx: &u8, bit_idx: &u8) -> Result<Flag> {

Review Comment:
   Fixed!



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org
For additional commands, e-mail: issues-h...@iceberg.apache.org

Reply via email to