wgtmac commented on code in PR #111: URL: https://github.com/apache/iceberg-cpp/pull/111#discussion_r2156289324
########## src/iceberg/type_fwd.h: ########## @@ -99,6 +99,9 @@ class TransformFunction; struct PartitionStatisticsFile; struct Snapshot; struct SnapshotRef; +struct SnapshotLogEntry; +struct MetadataLogEntry; Review Comment: Please either sort them alphabetically or add a blank line before `SnapshotLogEntry` so we can put these two entries together. ########## src/iceberg/table_metadata.cc: ########## @@ -76,6 +87,19 @@ Result<std::shared_ptr<SortOrder>> TableMetadata::SortOrder() const { return *iter; } +Result<std::shared_ptr<Snapshot>> TableMetadata::Snapshot() const { + if (current_snapshot_id == Snapshot::kInvalidSnapshotId) { + return NotFound("Current snapshot is not defined for this table"); + } Review Comment: ```suggestion ``` We don't need this special check because `kInvalidSnapshotId` can result in `NotFound` too. ########## src/iceberg/table.cc: ########## @@ -0,0 +1,130 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include "iceberg/table.h" + +#include "iceberg/partition_spec.h" +#include "iceberg/schema.h" +#include "iceberg/sort_order.h" +#include "iceberg/table_metadata.h" + +namespace iceberg { + +const std::string& Table::uuid() const { return metadata_->table_uuid; } + +const std::shared_ptr<Schema>& Table::schema() const { + if (!schema_) { + const static std::shared_ptr<Schema> kEmptySchema = + std::make_shared<Schema>(std::vector<SchemaField>{}); + auto schema = metadata_->Schema(); + if (schema.has_value()) { + schema_ = schema.value(); + } else { + schema_ = kEmptySchema; + } + } + return schema_; +} + +const std::unordered_map<int32_t, std::shared_ptr<Schema>>& Table::schemas() const { + std::call_once(init_schemas_once_, [this]() { Review Comment: What about removing the once flag and use `std::shared_ptr<std::unordered_map<int32_t, std::shared_ptr<Schema>>>` instead? ########## src/iceberg/table_metadata.h: ########## @@ -129,12 +117,17 @@ struct ICEBERG_EXPORT TableMetadata { /// A `long` higher than all assigned row IDs int64_t next_row_id; + /// \brief Used for lazy initialization of schema + mutable std::once_flag init_schema_once; + Review Comment: ```suggestion ``` ########## src/iceberg/snapshot.h: ########## @@ -273,4 +273,20 @@ struct ICEBERG_EXPORT Snapshot { bool Equals(const Snapshot& other) const; }; +/// \brief Represents a snapshot log entry Review Comment: Why moving `SnapshotLogEntry` to this file? I think it is part of the table metadata and should still be there. ########## src/iceberg/table.h: ########## @@ -35,77 +36,96 @@ class ICEBERG_EXPORT Table { public: virtual ~Table() = default; - /// \brief Return the full name for this table - virtual const std::string& name() const = 0; + /// \brief Construct a table. + /// \param[in] identifier The identifier of the table. + /// \param[in] metadata The metadata for the table. + /// \param[in] metadata_location The location of the table metadata file. + /// \param[in] io The FileIO to read and write table data and metadata files. + /// \param[in] catalog The catalog that this table belongs to. + Table(TableIdentifier identifier, std::shared_ptr<TableMetadata> metadata, + std::string metadata_location, std::shared_ptr<FileIO> io, + std::shared_ptr<Catalog> catalog) + : identifier_(std::move(identifier)), + metadata_(std::move(metadata)), + metadata_location_(std::move(metadata_location)), + io_(std::move(io)), + catalog_(std::move(catalog)) {}; + + /// \brief Return the identifier of this table + const TableIdentifier& name() const { return identifier_; } /// \brief Returns the UUID of the table - virtual const std::string& uuid() const = 0; + const std::string& uuid() const; - /// \brief Refresh the current table metadata - virtual Status Refresh() = 0; - - /// \brief Return the schema for this table - virtual const std::shared_ptr<Schema>& schema() const = 0; + /// \brief Return the schema for this table, return empty schema if not found + const std::shared_ptr<Schema>& schema() const; /// \brief Return a map of schema for this table - virtual const std::unordered_map<int32_t, std::shared_ptr<Schema>>& schemas() const = 0; + const std::unordered_map<int32_t, std::shared_ptr<Schema>>& schemas() const; - /// \brief Return the partition spec for this table - virtual const std::shared_ptr<PartitionSpec>& spec() const = 0; + /// \brief Return the partition spec for this table, return null if default spec is not + /// found + const std::shared_ptr<PartitionSpec>& spec() const; /// \brief Return a map of partition specs for this table - virtual const std::unordered_map<int32_t, std::shared_ptr<PartitionSpec>>& specs() - const = 0; + const std::unordered_map<int32_t, std::shared_ptr<PartitionSpec>>& specs() const; - /// \brief Return the sort order for this table - virtual const std::shared_ptr<SortOrder>& sort_order() const = 0; + /// \brief Return the sort order for this table, return null if default sort order is + /// not found + const std::shared_ptr<SortOrder>& sort_order() const; /// \brief Return a map of sort order IDs to sort orders for this table - virtual const std::unordered_map<int32_t, std::shared_ptr<SortOrder>>& sort_orders() - const = 0; + const std::unordered_map<int32_t, std::shared_ptr<SortOrder>>& sort_orders() const; /// \brief Return a map of string properties for this table - virtual const std::unordered_map<std::string, std::string>& properties() const = 0; + const std::unordered_map<std::string, std::string>& properties() const; /// \brief Return the table's base location - virtual const std::string& location() const = 0; + const std::string& location() const; - /// \brief Return the table's current snapshot - virtual const std::shared_ptr<Snapshot>& current_snapshot() const = 0; + /// \brief Return the table's current snapshot, return null if not found + std::shared_ptr<Snapshot> current_snapshot() const; Review Comment: ```suggestion const std::shared_ptr<Snapshot>& current_snapshot() const; ``` Every table should have at least one snapshot. I think we can safely return a reference. ########## src/iceberg/table_metadata.h: ########## @@ -94,6 +80,8 @@ struct ICEBERG_EXPORT TableMetadata { TimePointMs last_updated_ms; /// The highest assigned column ID for the table int32_t last_column_id; + /// The current schema for the table, or null if not set + mutable std::shared_ptr<Schema> schema; Review Comment: ```suggestion ``` ########## src/iceberg/table.cc: ########## @@ -0,0 +1,130 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include "iceberg/table.h" + +#include "iceberg/partition_spec.h" +#include "iceberg/schema.h" +#include "iceberg/sort_order.h" +#include "iceberg/table_metadata.h" + +namespace iceberg { + +const std::string& Table::uuid() const { return metadata_->table_uuid; } + +const std::shared_ptr<Schema>& Table::schema() const { + if (!schema_) { + const static std::shared_ptr<Schema> kEmptySchema = + std::make_shared<Schema>(std::vector<SchemaField>{}); + auto schema = metadata_->Schema(); + if (schema.has_value()) { + schema_ = schema.value(); Review Comment: ```suggestion schema_ = std::move(schema.value()); ``` ########## src/iceberg/table.h: ########## @@ -35,77 +36,96 @@ class ICEBERG_EXPORT Table { public: virtual ~Table() = default; - /// \brief Return the full name for this table - virtual const std::string& name() const = 0; + /// \brief Construct a table. + /// \param[in] identifier The identifier of the table. + /// \param[in] metadata The metadata for the table. + /// \param[in] metadata_location The location of the table metadata file. + /// \param[in] io The FileIO to read and write table data and metadata files. + /// \param[in] catalog The catalog that this table belongs to. Review Comment: Should we add a comment to say that the table is read-only if catalog is nullptr? ########## src/iceberg/table_metadata.cc: ########## @@ -47,13 +47,24 @@ std::string ToString(const MetadataLogEntry& entry) { } Result<std::shared_ptr<Schema>> TableMetadata::Schema() const { Review Comment: Please revert changes to `Schema()` so it is consistent with other similar functions in this file. We can cache these returned values in the `Table` object. ########## src/iceberg/table.h: ########## @@ -35,77 +36,96 @@ class ICEBERG_EXPORT Table { public: virtual ~Table() = default; - /// \brief Return the full name for this table - virtual const std::string& name() const = 0; + /// \brief Construct a table. + /// \param[in] identifier The identifier of the table. + /// \param[in] metadata The metadata for the table. + /// \param[in] metadata_location The location of the table metadata file. + /// \param[in] io The FileIO to read and write table data and metadata files. + /// \param[in] catalog The catalog that this table belongs to. + Table(TableIdentifier identifier, std::shared_ptr<TableMetadata> metadata, + std::string metadata_location, std::shared_ptr<FileIO> io, + std::shared_ptr<Catalog> catalog) + : identifier_(std::move(identifier)), + metadata_(std::move(metadata)), + metadata_location_(std::move(metadata_location)), + io_(std::move(io)), + catalog_(std::move(catalog)) {}; + + /// \brief Return the identifier of this table + const TableIdentifier& name() const { return identifier_; } /// \brief Returns the UUID of the table - virtual const std::string& uuid() const = 0; + const std::string& uuid() const; - /// \brief Refresh the current table metadata - virtual Status Refresh() = 0; - - /// \brief Return the schema for this table - virtual const std::shared_ptr<Schema>& schema() const = 0; + /// \brief Return the schema for this table, return empty schema if not found + const std::shared_ptr<Schema>& schema() const; /// \brief Return a map of schema for this table - virtual const std::unordered_map<int32_t, std::shared_ptr<Schema>>& schemas() const = 0; + const std::unordered_map<int32_t, std::shared_ptr<Schema>>& schemas() const; - /// \brief Return the partition spec for this table - virtual const std::shared_ptr<PartitionSpec>& spec() const = 0; + /// \brief Return the partition spec for this table, return null if default spec is not + /// found + const std::shared_ptr<PartitionSpec>& spec() const; /// \brief Return a map of partition specs for this table - virtual const std::unordered_map<int32_t, std::shared_ptr<PartitionSpec>>& specs() - const = 0; + const std::unordered_map<int32_t, std::shared_ptr<PartitionSpec>>& specs() const; - /// \brief Return the sort order for this table - virtual const std::shared_ptr<SortOrder>& sort_order() const = 0; + /// \brief Return the sort order for this table, return null if default sort order is + /// not found + const std::shared_ptr<SortOrder>& sort_order() const; /// \brief Return a map of sort order IDs to sort orders for this table - virtual const std::unordered_map<int32_t, std::shared_ptr<SortOrder>>& sort_orders() - const = 0; + const std::unordered_map<int32_t, std::shared_ptr<SortOrder>>& sort_orders() const; /// \brief Return a map of string properties for this table - virtual const std::unordered_map<std::string, std::string>& properties() const = 0; + const std::unordered_map<std::string, std::string>& properties() const; /// \brief Return the table's base location - virtual const std::string& location() const = 0; + const std::string& location() const; - /// \brief Return the table's current snapshot - virtual const std::shared_ptr<Snapshot>& current_snapshot() const = 0; + /// \brief Return the table's current snapshot, return null if not found + std::shared_ptr<Snapshot> current_snapshot() const; /// \brief Get the snapshot of this table with the given id, or null if there is no /// matching snapshot /// /// \param snapshot_id the ID of the snapshot to get /// \return the Snapshot with the given id - virtual Result<std::shared_ptr<Snapshot>> snapshot(int64_t snapshot_id) const = 0; + std::shared_ptr<Snapshot> SnapshotById(int64_t snapshot_id) const; /// \brief Get the snapshots of this table - virtual const std::vector<std::shared_ptr<Snapshot>>& snapshots() const = 0; + const std::vector<std::shared_ptr<Snapshot>>& snapshots() const; /// \brief Get the snapshot history of this table /// /// \return a vector of history entries - virtual const std::vector<std::shared_ptr<HistoryEntry>>& history() const = 0; + const std::vector<SnapshotLogEntry>& history() const; - /// \brief Create a new table scan for this table - /// - /// Once a table scan is created, it can be refined to project columns and filter data. - virtual std::unique_ptr<TableScan> NewScan() const = 0; + private: + const TableIdentifier identifier_; + const std::shared_ptr<TableMetadata> metadata_; + const std::string metadata_location_; + std::shared_ptr<FileIO> io_; + std::shared_ptr<Catalog> catalog_; + + mutable std::shared_ptr<Schema> schema_; + mutable std::unordered_map<int32_t, std::shared_ptr<Schema>> schemas_map_; - /// \brief Create a new append API to add files to this table and commit - virtual std::shared_ptr<AppendFiles> NewAppend() = 0; + mutable std::shared_ptr<PartitionSpec> partition_spec_; + mutable std::unordered_map<int32_t, std::shared_ptr<PartitionSpec>> partition_spec_map_; - /// \brief Create a new transaction API to commit multiple table operations at once - virtual std::unique_ptr<Transaction> NewTransaction() = 0; + mutable std::shared_ptr<SortOrder> sort_order_; + mutable std::unordered_map<int32_t, std::shared_ptr<SortOrder>> sort_orders_map_; - /// TODO(wgtmac): design of FileIO is not finalized yet. We intend to use an - /// IO-less design in the core library. - // /// \brief Returns a FileIO to read and write table data and metadata files - // virtual std::shared_ptr<FileIO> io() const = 0; Review Comment: I think we need to keep `io()`? ########## src/iceberg/table.h: ########## @@ -35,77 +36,96 @@ class ICEBERG_EXPORT Table { public: virtual ~Table() = default; - /// \brief Return the full name for this table - virtual const std::string& name() const = 0; + /// \brief Construct a table. + /// \param[in] identifier The identifier of the table. + /// \param[in] metadata The metadata for the table. + /// \param[in] metadata_location The location of the table metadata file. + /// \param[in] io The FileIO to read and write table data and metadata files. + /// \param[in] catalog The catalog that this table belongs to. + Table(TableIdentifier identifier, std::shared_ptr<TableMetadata> metadata, + std::string metadata_location, std::shared_ptr<FileIO> io, + std::shared_ptr<Catalog> catalog) + : identifier_(std::move(identifier)), + metadata_(std::move(metadata)), + metadata_location_(std::move(metadata_location)), + io_(std::move(io)), + catalog_(std::move(catalog)) {}; + + /// \brief Return the identifier of this table + const TableIdentifier& name() const { return identifier_; } /// \brief Returns the UUID of the table - virtual const std::string& uuid() const = 0; + const std::string& uuid() const; - /// \brief Refresh the current table metadata - virtual Status Refresh() = 0; - - /// \brief Return the schema for this table - virtual const std::shared_ptr<Schema>& schema() const = 0; + /// \brief Return the schema for this table, return empty schema if not found + const std::shared_ptr<Schema>& schema() const; /// \brief Return a map of schema for this table - virtual const std::unordered_map<int32_t, std::shared_ptr<Schema>>& schemas() const = 0; + const std::unordered_map<int32_t, std::shared_ptr<Schema>>& schemas() const; - /// \brief Return the partition spec for this table - virtual const std::shared_ptr<PartitionSpec>& spec() const = 0; + /// \brief Return the partition spec for this table, return null if default spec is not + /// found + const std::shared_ptr<PartitionSpec>& spec() const; /// \brief Return a map of partition specs for this table - virtual const std::unordered_map<int32_t, std::shared_ptr<PartitionSpec>>& specs() - const = 0; + const std::unordered_map<int32_t, std::shared_ptr<PartitionSpec>>& specs() const; - /// \brief Return the sort order for this table - virtual const std::shared_ptr<SortOrder>& sort_order() const = 0; + /// \brief Return the sort order for this table, return null if default sort order is + /// not found + const std::shared_ptr<SortOrder>& sort_order() const; /// \brief Return a map of sort order IDs to sort orders for this table - virtual const std::unordered_map<int32_t, std::shared_ptr<SortOrder>>& sort_orders() - const = 0; + const std::unordered_map<int32_t, std::shared_ptr<SortOrder>>& sort_orders() const; /// \brief Return a map of string properties for this table - virtual const std::unordered_map<std::string, std::string>& properties() const = 0; + const std::unordered_map<std::string, std::string>& properties() const; /// \brief Return the table's base location - virtual const std::string& location() const = 0; + const std::string& location() const; - /// \brief Return the table's current snapshot - virtual const std::shared_ptr<Snapshot>& current_snapshot() const = 0; + /// \brief Return the table's current snapshot, return null if not found + std::shared_ptr<Snapshot> current_snapshot() const; /// \brief Get the snapshot of this table with the given id, or null if there is no /// matching snapshot /// /// \param snapshot_id the ID of the snapshot to get /// \return the Snapshot with the given id - virtual Result<std::shared_ptr<Snapshot>> snapshot(int64_t snapshot_id) const = 0; + std::shared_ptr<Snapshot> SnapshotById(int64_t snapshot_id) const; Review Comment: What not returning `Result<std::shared_ptr<Snapshot>>`? It might return `NotFound` IMO. ########## src/iceberg/table.h: ########## @@ -35,77 +36,96 @@ class ICEBERG_EXPORT Table { public: virtual ~Table() = default; - /// \brief Return the full name for this table - virtual const std::string& name() const = 0; + /// \brief Construct a table. + /// \param[in] identifier The identifier of the table. + /// \param[in] metadata The metadata for the table. + /// \param[in] metadata_location The location of the table metadata file. + /// \param[in] io The FileIO to read and write table data and metadata files. + /// \param[in] catalog The catalog that this table belongs to. + Table(TableIdentifier identifier, std::shared_ptr<TableMetadata> metadata, + std::string metadata_location, std::shared_ptr<FileIO> io, + std::shared_ptr<Catalog> catalog) + : identifier_(std::move(identifier)), + metadata_(std::move(metadata)), + metadata_location_(std::move(metadata_location)), + io_(std::move(io)), + catalog_(std::move(catalog)) {}; + + /// \brief Return the identifier of this table + const TableIdentifier& name() const { return identifier_; } /// \brief Returns the UUID of the table - virtual const std::string& uuid() const = 0; + const std::string& uuid() const; - /// \brief Refresh the current table metadata - virtual Status Refresh() = 0; - - /// \brief Return the schema for this table - virtual const std::shared_ptr<Schema>& schema() const = 0; + /// \brief Return the schema for this table, return empty schema if not found + const std::shared_ptr<Schema>& schema() const; /// \brief Return a map of schema for this table - virtual const std::unordered_map<int32_t, std::shared_ptr<Schema>>& schemas() const = 0; + const std::unordered_map<int32_t, std::shared_ptr<Schema>>& schemas() const; - /// \brief Return the partition spec for this table - virtual const std::shared_ptr<PartitionSpec>& spec() const = 0; + /// \brief Return the partition spec for this table, return null if default spec is not + /// found + const std::shared_ptr<PartitionSpec>& spec() const; /// \brief Return a map of partition specs for this table - virtual const std::unordered_map<int32_t, std::shared_ptr<PartitionSpec>>& specs() - const = 0; + const std::unordered_map<int32_t, std::shared_ptr<PartitionSpec>>& specs() const; - /// \brief Return the sort order for this table - virtual const std::shared_ptr<SortOrder>& sort_order() const = 0; + /// \brief Return the sort order for this table, return null if default sort order is + /// not found + const std::shared_ptr<SortOrder>& sort_order() const; /// \brief Return a map of sort order IDs to sort orders for this table - virtual const std::unordered_map<int32_t, std::shared_ptr<SortOrder>>& sort_orders() - const = 0; + const std::unordered_map<int32_t, std::shared_ptr<SortOrder>>& sort_orders() const; /// \brief Return a map of string properties for this table - virtual const std::unordered_map<std::string, std::string>& properties() const = 0; + const std::unordered_map<std::string, std::string>& properties() const; /// \brief Return the table's base location - virtual const std::string& location() const = 0; + const std::string& location() const; - /// \brief Return the table's current snapshot - virtual const std::shared_ptr<Snapshot>& current_snapshot() const = 0; + /// \brief Return the table's current snapshot, return null if not found + std::shared_ptr<Snapshot> current_snapshot() const; /// \brief Get the snapshot of this table with the given id, or null if there is no /// matching snapshot /// /// \param snapshot_id the ID of the snapshot to get /// \return the Snapshot with the given id - virtual Result<std::shared_ptr<Snapshot>> snapshot(int64_t snapshot_id) const = 0; + std::shared_ptr<Snapshot> SnapshotById(int64_t snapshot_id) const; /// \brief Get the snapshots of this table - virtual const std::vector<std::shared_ptr<Snapshot>>& snapshots() const = 0; + const std::vector<std::shared_ptr<Snapshot>>& snapshots() const; /// \brief Get the snapshot history of this table /// /// \return a vector of history entries - virtual const std::vector<std::shared_ptr<HistoryEntry>>& history() const = 0; + const std::vector<SnapshotLogEntry>& history() const; - /// \brief Create a new table scan for this table - /// - /// Once a table scan is created, it can be refined to project columns and filter data. - virtual std::unique_ptr<TableScan> NewScan() const = 0; + private: + const TableIdentifier identifier_; + const std::shared_ptr<TableMetadata> metadata_; + const std::string metadata_location_; + std::shared_ptr<FileIO> io_; + std::shared_ptr<Catalog> catalog_; + + mutable std::shared_ptr<Schema> schema_; + mutable std::unordered_map<int32_t, std::shared_ptr<Schema>> schemas_map_; - /// \brief Create a new append API to add files to this table and commit - virtual std::shared_ptr<AppendFiles> NewAppend() = 0; + mutable std::shared_ptr<PartitionSpec> partition_spec_; + mutable std::unordered_map<int32_t, std::shared_ptr<PartitionSpec>> partition_spec_map_; - /// \brief Create a new transaction API to commit multiple table operations at once - virtual std::unique_ptr<Transaction> NewTransaction() = 0; + mutable std::shared_ptr<SortOrder> sort_order_; + mutable std::unordered_map<int32_t, std::shared_ptr<SortOrder>> sort_orders_map_; - /// TODO(wgtmac): design of FileIO is not finalized yet. We intend to use an - /// IO-less design in the core library. - // /// \brief Returns a FileIO to read and write table data and metadata files - // virtual std::shared_ptr<FileIO> io() const = 0; + mutable std::shared_ptr<Snapshot> current_snapshot_; - /// \brief Returns a LocationProvider to provide locations for new data files - virtual std::unique_ptr<LocationProvider> location_provider() const = 0; + // once_flags Review Comment: I think a main shortcoming of these once flags is that if later we add a new `Table::Refresh()` function to be in sync with the latest metadata, these flags should also be cleared. I would suggest to remove these flags and solely rely on whether the shared_ptr is null to decide if it is initialized. ########## src/iceberg/table.cc: ########## @@ -0,0 +1,130 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include "iceberg/table.h" + +#include "iceberg/partition_spec.h" +#include "iceberg/schema.h" +#include "iceberg/sort_order.h" +#include "iceberg/table_metadata.h" + +namespace iceberg { + +const std::string& Table::uuid() const { return metadata_->table_uuid; } + +const std::shared_ptr<Schema>& Table::schema() const { + if (!schema_) { + const static std::shared_ptr<Schema> kEmptySchema = + std::make_shared<Schema>(std::vector<SchemaField>{}); + auto schema = metadata_->Schema(); + if (schema.has_value()) { + schema_ = schema.value(); + } else { + schema_ = kEmptySchema; + } + } + return schema_; +} + +const std::unordered_map<int32_t, std::shared_ptr<Schema>>& Table::schemas() const { + std::call_once(init_schemas_once_, [this]() { + for (const auto& schema : metadata_->schemas) { + if (schema->schema_id()) { + schemas_map_.emplace(schema->schema_id().value(), schema); + } + } + }); + return schemas_map_; +} + +const std::shared_ptr<PartitionSpec>& Table::spec() const { + std::call_once(init_partition_spec_once_, [this]() { Review Comment: I'd suggest to follow similar implementation as `Table::schema()`. In this function, we can cache and return `PartitionSpec::Unpartitioned()` when missing. ########## src/iceberg/table_metadata.h: ########## @@ -94,6 +80,8 @@ struct ICEBERG_EXPORT TableMetadata { TimePointMs last_updated_ms; /// The highest assigned column ID for the table int32_t last_column_id; + /// The current schema for the table, or null if not set + mutable std::shared_ptr<Schema> schema; Review Comment: See my comment in `table_metadata.cc` to revert these changes. ########## src/iceberg/table.cc: ########## @@ -0,0 +1,130 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include "iceberg/table.h" + +#include "iceberg/partition_spec.h" +#include "iceberg/schema.h" +#include "iceberg/sort_order.h" +#include "iceberg/table_metadata.h" + +namespace iceberg { + +const std::string& Table::uuid() const { return metadata_->table_uuid; } + +const std::shared_ptr<Schema>& Table::schema() const { + if (!schema_) { + const static std::shared_ptr<Schema> kEmptySchema = + std::make_shared<Schema>(std::vector<SchemaField>{}); + auto schema = metadata_->Schema(); + if (schema.has_value()) { + schema_ = schema.value(); + } else { + schema_ = kEmptySchema; + } + } + return schema_; +} + +const std::unordered_map<int32_t, std::shared_ptr<Schema>>& Table::schemas() const { + std::call_once(init_schemas_once_, [this]() { + for (const auto& schema : metadata_->schemas) { + if (schema->schema_id()) { + schemas_map_.emplace(schema->schema_id().value(), schema); + } + } + }); + return schemas_map_; +} + +const std::shared_ptr<PartitionSpec>& Table::spec() const { + std::call_once(init_partition_spec_once_, [this]() { + auto partition_spec = metadata_->PartitionSpec(); + if (partition_spec.has_value()) { + partition_spec_ = partition_spec.value(); + } + }); + return partition_spec_; +} + +const std::unordered_map<int32_t, std::shared_ptr<PartitionSpec>>& Table::specs() const { + std::call_once(init_partition_specs_once_, [this]() { + for (const auto& spec : metadata_->partition_specs) { + partition_spec_map_[spec->spec_id()] = spec; + } + }); + return partition_spec_map_; +} + +const std::shared_ptr<SortOrder>& Table::sort_order() const { Review Comment: Same as above but return `SortOrder::Unsorted()` when missing. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org