Xuanwo commented on code in PR #89: URL: https://github.com/apache/iceberg-rust/pull/89#discussion_r1377416089
########## crates/catalog/rest/src/catalog.rs: ########## @@ -312,11 +316,43 @@ impl Catalog for RestCatalog { } /// Load table from the catalog. - async fn load_table(&self, _table: &TableIdent) -> Result<Table> { - Err(Error::new( - ErrorKind::FeatureUnsupported, - "Creating table not supported yet!", - )) + async fn load_table(&self, table: &TableIdent) -> Result<Table> { + let request = self + .client + .0 + .get(self.config.table_endpoint(table)) + .build()?; + + let resp = self + .client + .query::<LoadTableResponse, ErrorResponse, OK>(request) + .await?; + + let mut props = self.config.props.clone(); + if let Some(config) = resp.config { + props.extend(config); + } + + let file_io = match self + .config + .warehouse + .as_ref() + .or_else(|| resp.metadata_location.as_ref()) + { + Some(url) => FileIO::from_path(url)?.with_props(props).build()?, + None => FileIOBuilder::new("s3").with_props(props).build()?, Review Comment: Take `s3` as default seems not cool, how could we support azblob, gcs and so on? ########## crates/iceberg/src/spec/table_metadata.rs: ########## @@ -109,50 +110,139 @@ pub struct TableMetadata { } impl TableMetadata { + /// Returns format version of this metadata. + #[inline] + pub fn format_version(&self) -> FormatVersion { Review Comment: Maybe we can make `FormatVersion` copy? ########## crates/iceberg/src/spec/table_metadata.rs: ########## @@ -109,50 +110,139 @@ pub struct TableMetadata { } impl TableMetadata { + /// Returns format version of this metadata. + #[inline] + pub fn format_version(&self) -> FormatVersion { + self.format_version.clone() + } + + /// Returns uuid of current table. + #[inline] + pub fn uuid(&self) -> &Uuid { Review Comment: `Uuid` is Copy. ########## crates/iceberg/src/spec/table_metadata.rs: ########## @@ -109,50 +110,139 @@ pub struct TableMetadata { } impl TableMetadata { + /// Returns format version of this metadata. + #[inline] + pub fn format_version(&self) -> FormatVersion { + self.format_version.clone() + } + + /// Returns uuid of current table. + #[inline] + pub fn uuid(&self) -> &Uuid { + &self.table_uuid + } + + /// Returns table location. + #[inline] + pub fn location(&self) -> &str { + self.location.as_str() + } + + /// Returns last sequence number. + #[inline] + pub fn last_sequence_number(&self) -> i64 { + self.last_sequence_number + } + + /// Returns last updated time. + #[inline] + pub fn last_updated_ms(&self) -> i64 { Review Comment: Could we return a Datetime for simpler usage? ########## crates/iceberg/src/spec/table_metadata.rs: ########## @@ -109,50 +110,139 @@ pub struct TableMetadata { } impl TableMetadata { + /// Returns format version of this metadata. + #[inline] + pub fn format_version(&self) -> FormatVersion { + self.format_version.clone() + } + + /// Returns uuid of current table. + #[inline] + pub fn uuid(&self) -> &Uuid { + &self.table_uuid + } + + /// Returns table location. + #[inline] + pub fn location(&self) -> &str { + self.location.as_str() + } + + /// Returns last sequence number. + #[inline] + pub fn last_sequence_number(&self) -> i64 { + self.last_sequence_number + } + + /// Returns last updated time. + #[inline] + pub fn last_updated_ms(&self) -> i64 { + self.last_updated_ms + } + + /// Returns schemas + #[inline] + pub fn schemas(&self) -> impl Iterator<Item = &SchemaRef> { + self.schemas.values() + } + + /// Lookup schema by id. + #[inline] + pub fn schema_by_id(&self, schema_id: i32) -> Option<&SchemaRef> { + self.schemas.get(&schema_id) + } + /// Get current schema #[inline] - pub fn current_schema(&self) -> Result<Arc<Schema>, Error> { - self.schemas - .get(&self.current_schema_id) - .ok_or_else(|| { - Error::new( - ErrorKind::DataInvalid, - format!("Schema id {} not found!", self.current_schema_id), - ) - }) - .cloned() + pub fn current_schema(&self) -> &SchemaRef { + self.schema_by_id(self.current_schema_id) + .expect("Current schema id set, but not found in table metadata") + } + + /// Returns all partition specs. + #[inline] + pub fn partition_specs(&self) -> impl Iterator<Item = &PartitionSpecRef> { + self.partition_specs.values() } + + /// Lookup partition spec by id. + #[inline] + pub fn partition_spec_by_id(&self, spec_id: i32) -> Option<&PartitionSpecRef> { + self.partition_specs.get(&spec_id) + } + /// Get default partition spec #[inline] - pub fn default_partition_spec(&self) -> Result<&PartitionSpec, Error> { - self.partition_specs - .get(&self.default_spec_id) - .ok_or_else(|| { - Error::new( - ErrorKind::DataInvalid, - format!("Partition spec id {} not found!", self.default_spec_id), - ) - }) + pub fn default_partition_spec(&self) -> Option<&PartitionSpecRef> { + if self.default_spec_id == DEFAULT_SPEC_ID { + self.partition_spec_by_id(DEFAULT_SPEC_ID) + } else { + Some( + self.partition_spec_by_id(DEFAULT_SPEC_ID) + .expect("Default partition spec id set, but not found in table metadata"), + ) + } + } + + /// Returns all snapshots + #[inline] + pub fn snapshots(&self) -> impl Iterator<Item = &SnapshotRef> { Review Comment: Will `impl Iterator<Item = SnapshotRef>` simpler to use? Or just returns `Vec<SnapshotRef>`? It there a case that users want to have `snapshots` but not collect them? Or we could provide `snapshots` and `snapshots_iter`? The same question applies to all public API about table metadata. ########## crates/iceberg/src/spec/table_metadata.rs: ########## @@ -109,50 +110,139 @@ pub struct TableMetadata { } impl TableMetadata { + /// Returns format version of this metadata. + #[inline] + pub fn format_version(&self) -> FormatVersion { + self.format_version.clone() + } + + /// Returns uuid of current table. + #[inline] + pub fn uuid(&self) -> &Uuid { + &self.table_uuid + } + + /// Returns table location. + #[inline] + pub fn location(&self) -> &str { + self.location.as_str() + } + + /// Returns last sequence number. + #[inline] + pub fn last_sequence_number(&self) -> i64 { + self.last_sequence_number + } + + /// Returns last updated time. + #[inline] + pub fn last_updated_ms(&self) -> i64 { + self.last_updated_ms + } + + /// Returns schemas + #[inline] + pub fn schemas(&self) -> impl Iterator<Item = &SchemaRef> { + self.schemas.values() + } + + /// Lookup schema by id. + #[inline] + pub fn schema_by_id(&self, schema_id: i32) -> Option<&SchemaRef> { + self.schemas.get(&schema_id) + } + /// Get current schema #[inline] - pub fn current_schema(&self) -> Result<Arc<Schema>, Error> { - self.schemas - .get(&self.current_schema_id) - .ok_or_else(|| { - Error::new( - ErrorKind::DataInvalid, - format!("Schema id {} not found!", self.current_schema_id), - ) - }) - .cloned() + pub fn current_schema(&self) -> &SchemaRef { + self.schema_by_id(self.current_schema_id) + .expect("Current schema id set, but not found in table metadata") + } + + /// Returns all partition specs. + #[inline] + pub fn partition_specs(&self) -> impl Iterator<Item = &PartitionSpecRef> { + self.partition_specs.values() } + + /// Lookup partition spec by id. + #[inline] + pub fn partition_spec_by_id(&self, spec_id: i32) -> Option<&PartitionSpecRef> { Review Comment: The same as `SchemaRef`. ########## crates/iceberg/src/spec/table_metadata.rs: ########## @@ -109,50 +110,139 @@ pub struct TableMetadata { } impl TableMetadata { + /// Returns format version of this metadata. + #[inline] + pub fn format_version(&self) -> FormatVersion { + self.format_version.clone() + } + + /// Returns uuid of current table. + #[inline] + pub fn uuid(&self) -> &Uuid { + &self.table_uuid + } + + /// Returns table location. + #[inline] + pub fn location(&self) -> &str { + self.location.as_str() + } + + /// Returns last sequence number. + #[inline] + pub fn last_sequence_number(&self) -> i64 { + self.last_sequence_number + } + + /// Returns last updated time. + #[inline] + pub fn last_updated_ms(&self) -> i64 { + self.last_updated_ms + } + + /// Returns schemas + #[inline] + pub fn schemas(&self) -> impl Iterator<Item = &SchemaRef> { + self.schemas.values() + } + + /// Lookup schema by id. + #[inline] + pub fn schema_by_id(&self, schema_id: i32) -> Option<&SchemaRef> { + self.schemas.get(&schema_id) + } + /// Get current schema #[inline] - pub fn current_schema(&self) -> Result<Arc<Schema>, Error> { - self.schemas - .get(&self.current_schema_id) - .ok_or_else(|| { - Error::new( - ErrorKind::DataInvalid, - format!("Schema id {} not found!", self.current_schema_id), - ) - }) - .cloned() + pub fn current_schema(&self) -> &SchemaRef { + self.schema_by_id(self.current_schema_id) + .expect("Current schema id set, but not found in table metadata") + } + + /// Returns all partition specs. + #[inline] + pub fn partition_specs(&self) -> impl Iterator<Item = &PartitionSpecRef> { + self.partition_specs.values() } + + /// Lookup partition spec by id. + #[inline] + pub fn partition_spec_by_id(&self, spec_id: i32) -> Option<&PartitionSpecRef> { + self.partition_specs.get(&spec_id) + } + /// Get default partition spec #[inline] - pub fn default_partition_spec(&self) -> Result<&PartitionSpec, Error> { - self.partition_specs - .get(&self.default_spec_id) - .ok_or_else(|| { - Error::new( - ErrorKind::DataInvalid, - format!("Partition spec id {} not found!", self.default_spec_id), - ) - }) + pub fn default_partition_spec(&self) -> Option<&PartitionSpecRef> { + if self.default_spec_id == DEFAULT_SPEC_ID { + self.partition_spec_by_id(DEFAULT_SPEC_ID) + } else { + Some( + self.partition_spec_by_id(DEFAULT_SPEC_ID) + .expect("Default partition spec id set, but not found in table metadata"), + ) + } + } + + /// Returns all snapshots + #[inline] + pub fn snapshots(&self) -> impl Iterator<Item = &SnapshotRef> { + self.snapshots.values() + } + + /// Lookup snapshot by id. + #[inline] + pub fn snapshot_by_id(&self, snapshot_id: i64) -> Option<&SnapshotRef> { + self.snapshots.get(&snapshot_id) + } + + /// Returns snapshot history. + #[inline] + pub fn history(&self) -> &[SnapshotLog] { + &self.snapshot_log } /// Get current snapshot #[inline] - pub fn current_snapshot(&self) -> Result<Option<Arc<Snapshot>>, Error> { - match (&self.current_snapshot_id, &self.snapshots) { - (Some(snapshot_id), Some(snapshots)) => Ok(snapshots.get(snapshot_id).cloned()), - (Some(-1), None) => Ok(None), - (None, None) => Ok(None), - (Some(_), None) => Err(Error::new( - ErrorKind::DataInvalid, - "Snapshot id is provided but there are no snapshots".to_string(), - )), - (None, Some(_)) => Err(Error::new( - ErrorKind::DataInvalid, - "There are snapshots but no snapshot id is provided".to_string(), - )), + pub fn current_snapshot(&self) -> Option<&SnapshotRef> { + self.current_snapshot_id.map(|s| { + self.snapshot_by_id(s) + .expect("Current snapshot id has been set, but doesn't exist in metadata") + }) + } + + /// Return all sort orders. + #[inline] + pub fn sort_orders(&self) -> impl Iterator<Item = &SortOrderRef> { + self.sort_orders.values() + } + + /// Lookup sort order by id. + #[inline] + pub fn sort_order_by_id(&self, sort_order_id: i64) -> Option<&SortOrderRef> { + self.sort_orders.get(&sort_order_id) + } + + /// Returns default sort order id. + #[inline] + pub fn default_sort_order(&self) -> Option<&SortOrderRef> { + if self.default_sort_order_id == DEFAULT_SORT_ORDER_ID { + self.sort_orders.get(&DEFAULT_SORT_ORDER_ID) + } else { + Some( + self.sort_orders + .get(&self.default_sort_order_id) + .expect("Default order id has been set, but not found in table metadata!"), + ) } } + /// Returns properties of table. + #[inline] + pub fn properties(&self) -> &HashMap<String, String> { Review Comment: I readly want rust has read_only public fields... ########## crates/iceberg/src/spec/table_metadata.rs: ########## @@ -174,23 +264,9 @@ impl TableMetadata { ) }); - if let Some(snapshots) = &mut self.snapshots { - self.snapshot_log.push(snapshot.log()); - snapshots.insert(snapshot.snapshot_id(), Arc::new(snapshot)); - } else { - if !self.snapshot_log.is_empty() { - return Err(Error::new( - ErrorKind::DataInvalid, - "Snapshot logs is empty while snapshots is not!", - )); - } - - self.snapshot_log = vec![snapshot.log()]; - self.snapshots = Some(HashMap::from_iter(vec![( - snapshot.snapshot_id(), - Arc::new(snapshot), - )])); - } + self.snapshot_log.push(snapshot.log()); + self.snapshots + .insert(snapshot.snapshot_id(), Arc::new(snapshot)); Review Comment: Maybe we can change this API into non-failable? ########## crates/iceberg/src/spec/table_metadata.rs: ########## @@ -109,50 +110,139 @@ pub struct TableMetadata { } impl TableMetadata { + /// Returns format version of this metadata. + #[inline] + pub fn format_version(&self) -> FormatVersion { + self.format_version.clone() + } + + /// Returns uuid of current table. + #[inline] + pub fn uuid(&self) -> &Uuid { + &self.table_uuid + } + + /// Returns table location. + #[inline] + pub fn location(&self) -> &str { + self.location.as_str() + } + + /// Returns last sequence number. + #[inline] + pub fn last_sequence_number(&self) -> i64 { + self.last_sequence_number + } + + /// Returns last updated time. + #[inline] + pub fn last_updated_ms(&self) -> i64 { + self.last_updated_ms + } + + /// Returns schemas + #[inline] + pub fn schemas(&self) -> impl Iterator<Item = &SchemaRef> { + self.schemas.values() + } + + /// Lookup schema by id. + #[inline] + pub fn schema_by_id(&self, schema_id: i32) -> Option<&SchemaRef> { Review Comment: `SchemaRef` is a `Arc` that cheap to clone, how about returning `SchemaRef` directly instead of `&SchemaRef`? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org