liurenjie1024 commented on code in PR #169:
URL: https://github.com/apache/iceberg-rust/pull/169#discussion_r1503606792


##########
crates/iceberg/src/spec/values.rs:
##########
@@ -70,6 +79,571 @@ pub enum PrimitiveLiteral {
     Decimal(i128),
 }
 
+/// Literal associated with its type. The value and type pair is checked when 
construction, so the type and value is
+/// guaranteed to be correct when used.
+///
+/// By default we decouple the type and value of a literal, so we can use 
avoid the cost of storing extra type info
+/// for each literal. But associate type with literal can be useful in some 
cases, for example, in unbound expression.
+#[derive(Debug)]
+pub struct Datum {
+    r#type: PrimitiveType,
+    literal: PrimitiveLiteral,
+}
+
+impl Display for Datum {
+    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+        match (&self.r#type, &self.literal) {
+            (_, PrimitiveLiteral::Boolean(val)) => write!(f, "{}", val),
+            (_, PrimitiveLiteral::Int(val)) => write!(f, "{}", val),
+            (_, PrimitiveLiteral::Long(val)) => write!(f, "{}", val),
+            (_, PrimitiveLiteral::Float(val)) => write!(f, "{}", val),
+            (_, PrimitiveLiteral::Double(val)) => write!(f, "{}", val),
+            (_, PrimitiveLiteral::Date(val)) => write!(f, "{}", 
days_to_date(*val)),
+            (_, PrimitiveLiteral::Time(val)) => write!(f, "{}", 
microseconds_to_time(*val)),
+            (_, PrimitiveLiteral::Timestamp(val)) => {
+                write!(f, "{}", microseconds_to_datetime(*val))
+            }
+            (_, PrimitiveLiteral::TimestampTZ(val)) => {
+                write!(f, "{}", microseconds_to_datetimetz(*val))
+            }
+            (_, PrimitiveLiteral::String(val)) => write!(f, "{}", val),
+            (_, PrimitiveLiteral::UUID(val)) => write!(f, "{}", val),
+            (_, PrimitiveLiteral::Fixed(val)) => display_bytes(val, f),
+            (_, PrimitiveLiteral::Binary(val)) => display_bytes(val, f),
+            (
+                PrimitiveType::Decimal {
+                    precision: _,
+                    scale,
+                },
+                PrimitiveLiteral::Decimal(val),
+            ) => {
+                write!(f, "{}", Decimal::from_i128_with_scale(*val, *scale))
+            }
+            (_, _) => {
+                unreachable!()
+            }
+        }
+    }
+}
+
+fn display_bytes(bytes: &[u8], f: &mut Formatter<'_>) -> std::fmt::Result {
+    let mut s = String::with_capacity(bytes.len() * 2);
+    for b in bytes {
+        s.push_str(&format!("{:02X}", b));
+    }
+    f.write_str(&s)
+}
+
+impl From<Datum> for Literal {
+    fn from(value: Datum) -> Self {
+        Literal::Primitive(value.literal)
+    }
+}
+
+impl Datum {
+    /// Creates a boolean value.
+    ///
+    /// Example:
+    /// ```rust
+    /// use iceberg::spec::{Literal, PrimitiveLiteral, Datum};
+    /// let t = Datum::bool(true);
+    ///
+    /// assert_eq!(format!("{}", t), "true".to_string());
+    /// assert_eq!(Literal::from(t), 
Literal::Primitive(PrimitiveLiteral::Boolean(true)));
+    /// ```
+    pub fn bool<T: Into<bool>>(t: T) -> Self {
+        Self {
+            r#type: PrimitiveType::Boolean,
+            literal: PrimitiveLiteral::Boolean(t.into()),
+        }
+    }
+
+    /// Creates a boolean value from string.
+    /// See [Parse bool from 
str](https://doc.rust-lang.org/stable/std/primitive.bool.html#impl-FromStr-for-bool)
 for reference.
+    ///
+    /// Example:
+    /// ```rust
+    /// use iceberg::spec::{Literal, PrimitiveLiteral, Datum};
+    /// let t = Datum::bool_from_str("false").unwrap();
+    ///
+    /// assert_eq!(&format!("{}", t), "false");
+    /// assert_eq!(Literal::Primitive(PrimitiveLiteral::Boolean(false)), 
t.into());
+    /// ```
+    pub fn bool_from_str<S: AsRef<str>>(s: S) -> Result<Self> {
+        let v = s.as_ref().parse::<bool>().map_err(|e| {
+            Error::new(ErrorKind::DataInvalid, "Can't parse string to 
bool.").with_source(e)
+        })?;
+        Ok(Self::bool(v))
+    }
+
+    /// Creates an 32bit integer.
+    ///
+    /// Example:
+    /// ```rust
+    /// use iceberg::spec::{Literal, PrimitiveLiteral, Datum};
+    /// let t = Datum::int(23i8);
+    ///
+    /// assert_eq!(&format!("{}", t), "23");
+    /// assert_eq!(Literal::Primitive(PrimitiveLiteral::Int(23)), t.into());
+    /// ```
+    pub fn int<T: Into<i32>>(t: T) -> Self {
+        Self {
+            r#type: PrimitiveType::Int,
+            literal: PrimitiveLiteral::Int(t.into()),
+        }
+    }
+
+    /// Creates an 64bit integer.
+    ///
+    /// Example:
+    /// ```rust
+    /// use iceberg::spec::{Literal, PrimitiveLiteral, Datum};
+    /// let t = Datum::long(24i8);
+    ///
+    /// assert_eq!(&format!("{t}"), "24");
+    /// assert_eq!(Literal::Primitive(PrimitiveLiteral::Long(24)), t.into());
+    /// ```
+    pub fn long<T: Into<i64>>(t: T) -> Self {
+        Self {
+            r#type: PrimitiveType::Long,
+            literal: PrimitiveLiteral::Long(t.into()),
+        }
+    }
+
+    /// Creates an 32bit floating point number.
+    ///
+    /// Example:
+    /// ```rust
+    /// use ordered_float::OrderedFloat;
+    /// use iceberg::spec::{Literal, PrimitiveLiteral, Datum};
+    /// let t = Datum::float( 32.1f32 );
+    ///
+    /// assert_eq!(&format!("{t}"), "32.1");
+    /// 
assert_eq!(Literal::Primitive(PrimitiveLiteral::Float(OrderedFloat(32.1))), 
t.into());
+    /// ```
+    pub fn float<T: Into<f32>>(t: T) -> Self {
+        Self {
+            r#type: PrimitiveType::Float,
+            literal: PrimitiveLiteral::Float(OrderedFloat(t.into())),
+        }
+    }
+
+    /// Creates an 32bit floating point number.
+    ///
+    /// Example:
+    /// ```rust
+    /// use ordered_float::OrderedFloat;
+    /// use iceberg::spec::{Literal, PrimitiveLiteral, Datum};
+    /// let t = Datum::double( 32.1f64 );
+    ///
+    /// assert_eq!(&format!("{t}"), "32.1");
+    /// 
assert_eq!(Literal::Primitive(PrimitiveLiteral::Double(OrderedFloat(32.1))), 
t.into());
+    /// ```
+    pub fn double<T: Into<f64>>(t: T) -> Self {
+        Self {
+            r#type: PrimitiveType::Double,
+            literal: PrimitiveLiteral::Double(OrderedFloat(t.into())),
+        }
+    }
+
+    /// Creates date literal from number of days from unix epoch directly.
+    ///
+    /// Example:
+    /// ```rust
+    ///
+    /// use iceberg::spec::{Literal, PrimitiveLiteral, Datum};
+    /// // 2 days after 1970-01-01
+    /// let t = Datum::date(2);
+    ///
+    /// assert_eq!(&format!("{t}"), "1970-01-03");
+    /// assert_eq!(Literal::Primitive(PrimitiveLiteral::Date(2)), t.into());
+    /// ```
+    pub fn date(days: i32) -> Self {
+        Self {
+            r#type: PrimitiveType::Date,
+            literal: PrimitiveLiteral::Date(days),
+        }
+    }
+
+    /// Creates a date in `%Y-%m-%d` format, assume in utc timezone.
+    ///
+    /// See [`NaiveDate::from_str`].
+    ///
+    /// Example
+    /// ```rust
+    /// use iceberg::spec::{Literal, Datum};
+    /// let t = Datum::date_from_str("1970-01-05").unwrap();
+    ///
+    /// assert_eq!(&format!("{t}"), "1970-01-05");
+    /// assert_eq!(Literal::date(4), t.into());
+    /// ```
+    pub fn date_from_str<S: AsRef<str>>(s: S) -> Result<Self> {
+        let t = s.as_ref().parse::<NaiveDate>().map_err(|e| {
+            Error::new(
+                ErrorKind::DataInvalid,
+                format!("Can't parse date from string: {}", s.as_ref()),
+            )
+            .with_source(e)
+        })?;
+
+        Ok(Self::date(date_from_naive_date(t)))
+    }
+
+    /// Create a date from calendar date (year, month and day).
+    ///
+    /// See [`NaiveDate::from_ymd_opt`].
+    ///
+    /// Example:
+    ///
+    ///```rust
+    /// use iceberg::spec::{Literal, Datum};
+    /// let t = Datum::date_from_ymd(1970, 1, 5).unwrap();
+    ///
+    /// assert_eq!(&format!("{t}"), "1970-01-05");
+    /// assert_eq!(Literal::date(4), t.into());
+    /// ```
+    pub fn date_from_ymd(year: i32, month: u32, day: u32) -> Result<Self> {
+        let t = NaiveDate::from_ymd_opt(year, month, day).ok_or_else(|| {
+            Error::new(
+                ErrorKind::DataInvalid,
+                format!("Can't create date from year: {year}, month: {month}, 
day: {day}"),
+            )
+        })?;
+
+        Ok(Self::date(date_from_naive_date(t)))
+    }
+
+    /// Creates time in microseconds directly.
+    ///
+    /// It will returns error when it's negative or too large to fit in 24 
hours.
+    ///
+    /// Example:
+    ///
+    /// ```rust
+    /// use iceberg::spec::{Literal, Datum};
+    /// let micro_secs = {
+    ///     1 * 3600 * 1_000_000 + // 1 hour
+    ///     2 * 60 * 1_000_000 +   // 2 minutes
+    ///     1 * 1_000_000 + // 1 second
+    ///     888999  // microseconds
+    ///  };
+    ///
+    /// let t = Datum::time(micro_secs).unwrap();
+    ///
+    /// assert_eq!(&format!("{t}"), "01:02:01.888999");
+    /// assert_eq!(Literal::time(micro_secs), t.into());
+    ///
+    /// let negative_value = -100;
+    /// assert!(Datum::time(negative_value).is_err());
+    ///
+    /// let too_large_value = 36 * 60 * 60 * 1_000_000; // Too large to fit in 
24 hours.
+    /// assert!(Datum::time(too_large_value).is_err());
+    /// ```
+    pub fn time(value: i64) -> Result<Self> {
+        ensure_data_valid!(
+            (0..=MAX_TIME_VALUE).contains(&value),
+            "Invalid value for Time type: {}",
+            value
+        );
+
+        Ok(Self {
+            r#type: PrimitiveType::Time,
+            literal: PrimitiveLiteral::Time(value),
+        })
+    }
+
+    /// Creates time literal from [`chrono::NaiveTime`].
+    fn time_from_naive_time(t: NaiveTime) -> Self {
+        let duration = t - unix_epoch().time();
+        // It's safe to unwrap here since less than 24 hours will never 
overflow.
+        let micro_secs = duration.num_microseconds().unwrap();
+
+        Self {
+            r#type: PrimitiveType::Time,
+            literal: PrimitiveLiteral::Time(micro_secs),
+        }
+    }
+
+    /// Creates time in microseconds in `%H:%M:%S:.f` format.
+    ///
+    /// See [`NaiveTime::from_str`] for details.
+    ///
+    /// Example:
+    /// ```rust
+    /// use iceberg::spec::{Literal, Datum};
+    /// let t = Datum::time_from_str("01:02:01.888999777").unwrap();
+    ///
+    /// assert_eq!(&format!("{t}"), "01:02:01.888999");
+    /// ```
+    pub fn time_from_str<S: AsRef<str>>(s: S) -> Result<Self> {
+        let t = s.as_ref().parse::<NaiveTime>().map_err(|e| {
+            Error::new(
+                ErrorKind::DataInvalid,
+                format!("Can't parse time from string: {}", s.as_ref()),
+            )
+            .with_source(e)
+        })?;
+
+        Ok(Self::time_from_naive_time(t))
+    }
+
+    /// Creates time literal from hour, minute, second, and microseconds.
+    ///
+    /// See [`NaiveTime::from_hms_micro_opt`].
+    ///
+    /// Example:
+    /// ```rust
+    ///
+    /// use iceberg::spec::{Literal, Datum};
+    /// let t = Datum::time_from_hms_micro(22, 15, 33, 111).unwrap();
+    ///
+    /// assert_eq!(&format!("{t}"), "22:15:33.000111");
+    /// ```
+    pub fn time_from_hms_micro(hour: u32, min: u32, sec: u32, micro: u32) -> 
Result<Self> {
+        let t = NaiveTime::from_hms_micro_opt(hour, min, sec, micro)
+            .ok_or_else(|| Error::new(
+                ErrorKind::DataInvalid,
+                format!("Can't create time from hour: {hour}, min: {min}, 
second: {sec}, microsecond: {micro}"),
+            ))?;
+        Ok(Self::time_from_naive_time(t))
+    }
+
+    /// Creates a timestamp from unix epoch in microseconds.
+    ///
+    /// Example:
+    ///
+    /// ```rust
+    ///
+    /// use iceberg::spec::Datum;
+    /// let t = Datum::timestamp(1000);
+    ///
+    /// assert_eq!(&format!("{t}"), "1970-01-01 00:00:00.001");
+    /// ```
+    pub fn timestamp(value: i64) -> Self {
+        Self {
+            r#type: PrimitiveType::Timestamp,
+            literal: PrimitiveLiteral::Timestamp(value),
+        }
+    }
+
+    /// Creates a timestamp from [`DateTime`].
+    ///
+    /// Example:
+    ///
+    /// ```rust
+    ///
+    /// use chrono::{TimeZone, Utc};
+    /// use iceberg::spec::Datum;
+    /// let t = Datum::timestamp_from_datetime(Utc.timestamp_opt(1000, 
0).unwrap());
+    ///
+    /// assert_eq!(&format!("{t}"), "1970-01-01 00:16:40");
+    /// ```
+    pub fn timestamp_from_datetime<T: TimeZone>(dt: DateTime<T>) -> Self {
+        Self::timestamp(dt.with_timezone(&Utc).timestamp_micros())
+    }
+
+    /// Parse a timestamp in RFC3339 format.
+    ///
+    /// See [`DateTime<Utc>::from_str`].
+    ///
+    /// Example:
+    ///
+    /// ```rust
+    /// use chrono::{DateTime, FixedOffset, NaiveDate, NaiveDateTime, 
NaiveTime};
+    /// use iceberg::spec::{Literal, Datum};
+    /// let t = Datum::timestamp_from_str("2012-12-12 
12:12:12.8899-04:00").unwrap();
+    ///
+    /// assert_eq!(&format!("{t}"), "2012-12-12 16:12:12.889900");
+    /// ```
+    pub fn timestamp_from_str<S: AsRef<str>>(s: S) -> Result<Self> {

Review Comment:
   > Another option is to detect if there is a timezone postfix (-4:00) and 
based on that return a timestamp or timestamptz
   
   Good suggest, I think we should provide another api for this option, while 
keeping the original since it's also useful.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org
For additional commands, e-mail: issues-h...@iceberg.apache.org

Reply via email to