liurenjie1024 commented on code in PR #6:
URL: https://github.com/apache/iceberg-rust/pull/6#discussion_r1271809860
##########
src/spec/datatypes.rs:
##########
@@ -0,0 +1,408 @@
+/*!
+ * Data Types
+*/
+use std::{fmt, ops::Index};
+
+use serde::{
+ de::{Error, IntoDeserializer},
+ Deserialize, Deserializer, Serialize, Serializer,
+};
+
+#[derive(Debug, Serialize, Deserialize, PartialEq, Eq, Clone)]
+#[serde(untagged)]
+/// All data types are either primitives or nested types, which are maps,
lists, or structs.
+pub enum Type {
+ /// Primitive types
+ Primitive(PrimitiveType),
+ /// Struct type
+ Struct(StructType),
+ /// List type.
+ List(ListType),
+ /// Map type
+ Map(MapType),
+}
+
+impl fmt::Display for Type {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ match self {
+ Type::Primitive(primitive) => write!(f, "{}", primitive),
+ Type::Struct(_) => write!(f, "struct"),
+ Type::List(_) => write!(f, "list"),
+ Type::Map(_) => write!(f, "map"),
+ }
+ }
+}
+
+/// Primitive data types
+#[derive(Debug, Serialize, Deserialize, PartialEq, Eq, Clone)]
+#[serde(rename_all = "lowercase", remote = "Self")]
+pub enum PrimitiveType {
+ /// True or False
+ Boolean,
+ /// 32-bit signed integer
+ Int,
+ /// 64-bit signed integer
+ Long,
+ /// 32-bit IEEE 753 floating bit.
+ Float,
+ /// 64-bit IEEE 753 floating bit.
+ Double,
+ /// Fixed point decimal
+ Decimal {
+ /// Precision
+ precision: u32,
+ /// Scale
+ scale: u32,
+ },
+ /// Calendar date without timezone or time.
+ Date,
+ /// Time of day without date or timezone.
+ Time,
+ /// Timestamp without timezone
+ Timestamp,
+ /// Timestamp with timezone
+ Timestamptz,
+ /// Arbitrary-length character sequences
+ String,
+ /// Universally Unique Identifiers
+ Uuid,
+ /// Fixed length byte array
+ Fixed(u64),
+ /// Arbitrary-length byte array.
+ Binary,
+}
+
+impl<'de> Deserialize<'de> for PrimitiveType {
+ fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
+ where
+ D: Deserializer<'de>,
+ {
+ let s = String::deserialize(deserializer)?;
+ if s.starts_with("decimal") {
+ deserialize_decimal(s.into_deserializer())
+ } else if s.starts_with("fixed") {
+ deserialize_fixed(s.into_deserializer())
+ } else {
+ PrimitiveType::deserialize(s.into_deserializer())
+ }
+ }
+}
+
+impl Serialize for PrimitiveType {
+ fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
+ where
+ S: serde::Serializer,
+ {
+ match self {
+ PrimitiveType::Decimal { precision, scale } => {
+ serialize_decimal(precision, scale, serializer)
+ }
+ PrimitiveType::Fixed(l) => serialize_fixed(l, serializer),
+ _ => PrimitiveType::serialize(self, serializer),
+ }
+ }
+}
+
+fn deserialize_decimal<'de, D>(deserializer: D) -> Result<PrimitiveType,
D::Error>
+where
+ D: Deserializer<'de>,
+{
+ let s = String::deserialize(deserializer)?;
+ let (precision, scale) = s
+ .trim_start_matches(r"decimal(")
+ .trim_end_matches(')')
+ .split_once(',')
+ .ok_or_else(|| D::Error::custom("Decimal requires precision and scale:
{s}"))?;
+
+ Ok(PrimitiveType::Decimal {
+ precision: precision.trim().parse().map_err(D::Error::custom)?,
+ scale: scale.trim().parse().map_err(D::Error::custom)?,
+ })
+}
+
+fn serialize_decimal<S>(precision: &u32, scale: &u32, serializer: S) ->
Result<S::Ok, S::Error>
+where
+ S: Serializer,
+{
+ serializer.serialize_str(&format!("decimal({precision},{scale})"))
+}
+
+fn deserialize_fixed<'de, D>(deserializer: D) -> Result<PrimitiveType,
D::Error>
+where
+ D: Deserializer<'de>,
+{
+ let fixed = String::deserialize(deserializer)?
+ .trim_start_matches(r"fixed[")
+ .trim_end_matches(']')
+ .to_owned();
+
+ fixed
+ .parse()
+ .map(PrimitiveType::Fixed)
+ .map_err(D::Error::custom)
+}
+
+fn serialize_fixed<S>(value: &u64, serializer: S) -> Result<S::Ok, S::Error>
+where
+ S: Serializer,
+{
+ serializer.serialize_str(&format!("fixed[{value}]"))
+}
+
+impl fmt::Display for PrimitiveType {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ match self {
+ PrimitiveType::Boolean => write!(f, "boolean"),
+ PrimitiveType::Int => write!(f, "int"),
+ PrimitiveType::Long => write!(f, "long"),
+ PrimitiveType::Float => write!(f, "float"),
+ PrimitiveType::Double => write!(f, "double"),
+ PrimitiveType::Decimal {
+ precision: _,
+ scale: _,
+ } => write!(f, "decimal"),
+ PrimitiveType::Date => write!(f, "date"),
+ PrimitiveType::Time => write!(f, "time"),
+ PrimitiveType::Timestamp => write!(f, "timestamp"),
+ PrimitiveType::Timestamptz => write!(f, "timestamptz"),
+ PrimitiveType::String => write!(f, "string"),
+ PrimitiveType::Uuid => write!(f, "uuid"),
+ PrimitiveType::Fixed(_) => write!(f, "fixed"),
+ PrimitiveType::Binary => write!(f, "binary"),
+ }
+ }
+}
+
+/// DataType for a specific struct
+#[derive(Debug, Serialize, Deserialize, PartialEq, Eq, Clone)]
+#[serde(rename = "struct", tag = "type")]
+pub struct StructType {
+ /// Struct fields
+ pub fields: Vec<StructField>,
Review Comment:
> I think it's not a block of merging this PR. We can add a new issue to
track this instead.
I'm ok with putting lookup table in later PR, just suggest to make `fields`
private for now.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]