HuaHuaY commented on code in PR #386:
URL: https://github.com/apache/iceberg-cpp/pull/386#discussion_r2593272962
##########
src/iceberg/table_metadata.cc:
##########
@@ -258,13 +290,95 @@ Result<std::unique_ptr<TableMetadata>>
TableMetadataUtil::Read(
return TableMetadataFromJson(json);
}
+Status TableMetadataUtil::Write(FileIO& io, const TableMetadata* base,
Review Comment:
How about using reference instead of raw pointer?
##########
src/iceberg/catalog/memory/in_memory_catalog.cc:
##########
@@ -394,7 +411,32 @@ Result<std::unique_ptr<Table>>
InMemoryCatalog::UpdateTable(
const TableIdentifier& identifier,
const std::vector<std::unique_ptr<TableRequirement>>& requirements,
const std::vector<std::unique_ptr<TableUpdate>>& updates) {
- return NotImplemented("update table");
+ std::unique_lock lock(mutex_);
Review Comment:
```suggestion
std::lock_guard guard(mutex_);
```
Use `std::lock_guard` instead of `std::unique_lock` if we don't need to move
the guard. Anyone interested can help modify the other `std::unique_lock` in
this file.
##########
src/iceberg/table_metadata.h:
##########
@@ -458,14 +467,41 @@ enum class ICEBERG_EXPORT MetadataFileCodecType {
kGzip,
};
-/// \brief Utility class for table metadata
-struct ICEBERG_EXPORT TableMetadataUtil {
+struct ICEBERG_EXPORT CodecTypeUtil {
+ /// \brief Returns the MetadataFileCodecType corresponding to the given
string.
+ ///
+ /// \param name The string to parse.
+ /// \return The MetadataFileCodecType corresponding to the given string.
+ static Result<MetadataFileCodecType> CodecFromString(const std::string_view&
name);
+
/// \brief Get the codec type from the table metadata file name.
///
/// \param file_name The name of the table metadata file.
/// \return The codec type of the table metadata file.
static Result<MetadataFileCodecType> CodecFromFileName(std::string_view
file_name);
+ /// \brief Get the file extension from the codec type.
+ /// \param codec The codec name.
+ /// \return The file extension of the codec.
+ static Result<std::string> CodecNameToFileExtension(const std::string_view&
codec);
+
+ /// \brief Get the file extension from the codec type.
+ /// \param codec The codec type.
+ /// \return The file extension of the codec.
+ static std::string CodecTypeToFileExtension(MetadataFileCodecType codec);
+
+ inline static constexpr std::string_view kTableMetadataFileSuffix =
".metadata.json";
Review Comment:
`static constexpr` is implicitly `inline` and we can remove `inline` here.
##########
src/iceberg/catalog/memory/in_memory_catalog.cc:
##########
@@ -314,6 +324,13 @@ Result<std::string>
InMemoryNamespace::GetTableMetadataLocation(
return it->second;
}
+Status InMemoryNamespace::UpdateTableMetadataLocation(
+ TableIdentifier const& table_ident, std::string const& metadata_location) {
Review Comment:
```suggestion
const TableIdentifier& table_ident, const std::string&
metadata_location) {
```
I think it's better to keep the same style `const T&` as function
declaration.
##########
src/iceberg/table_metadata.cc:
##########
@@ -664,7 +781,32 @@ Result<std::unique_ptr<TableMetadata>>
TableMetadataBuilder::Build() {
std::chrono::system_clock::now().time_since_epoch())};
}
- // 4. Create and return the TableMetadata
+ // 4. Buildup metadata_log from base metadata
+ int32_t max_metadata_log_size =
TableProperties::kMetadataPreviousVersionsMax.value();
+ if (auto iter = impl_->metadata.properties.find(
+ TableProperties::kMetadataPreviousVersionsMax.key());
+ iter != impl_->metadata.properties.end()) {
+ try {
+ max_metadata_log_size = std::stoi(iter->second);
Review Comment:
ditto
##########
src/iceberg/catalog/memory/in_memory_catalog.cc:
##########
@@ -394,7 +411,32 @@ Result<std::unique_ptr<Table>>
InMemoryCatalog::UpdateTable(
const TableIdentifier& identifier,
const std::vector<std::unique_ptr<TableRequirement>>& requirements,
const std::vector<std::unique_ptr<TableUpdate>>& updates) {
- return NotImplemented("update table");
+ std::unique_lock lock(mutex_);
+ ICEBERG_ASSIGN_OR_RAISE(auto metadata_location,
+
root_namespace_->GetTableMetadataLocation(identifier));
+
+ ICEBERG_ASSIGN_OR_RAISE(auto base,
+ TableMetadataUtil::Read(*file_io_,
metadata_location));
+ base->metadata_file_location = metadata_location;
Review Comment:
```suggestion
base->metadata_file_location = std::move(metadata_location);
```
##########
src/iceberg/table_metadata.cc:
##########
@@ -258,13 +290,95 @@ Result<std::unique_ptr<TableMetadata>>
TableMetadataUtil::Read(
return TableMetadataFromJson(json);
}
+Status TableMetadataUtil::Write(FileIO& io, const TableMetadata* base,
+ TableMetadata* metadata) {
+ ICEBERG_CHECK(metadata != nullptr, "The metadata is nullptr.");
+
+ int version = -1;
+ if (base != nullptr && !base->metadata_file_location.empty()) {
+ // parse current version from location
+ version = ParseVersionFromLocation(base->metadata_file_location);
+ }
+
+ ICEBERG_ASSIGN_OR_RAISE(std::string new_file_location,
+ NewTableMetadataFilePath(*metadata, version + 1));
+ ICEBERG_RETURN_UNEXPECTED(Write(io, new_file_location, *metadata));
+ metadata->metadata_file_location = std::move(new_file_location);
+ return {};
+}
+
Status TableMetadataUtil::Write(FileIO& io, const std::string& location,
const TableMetadata& metadata) {
auto json = ToJson(metadata);
ICEBERG_ASSIGN_OR_RAISE(auto json_string, ToJsonString(json));
return io.WriteFile(location, json_string);
}
+void TableMetadataUtil::DeleteRemovedMetadataFiles(FileIO& io, const
TableMetadata* base,
+ const TableMetadata*
metadata) {
+ if (!base) {
+ return;
+ }
+
+ bool delete_after_commit =
TableProperties::kMetadataDeleteAfterCommitEnabled.value();
+ if (auto it = metadata->properties.find(
+ TableProperties::kMetadataDeleteAfterCommitEnabled.key());
+ it != metadata->properties.end()) {
+ delete_after_commit = StringUtils::ToLower(it->second) == "true" ||
it->second == "1";
Review Comment:
```suggestion
delete_after_commit = StringUtils::EqualsIgnoreCase(it->second, "true")
|| it->second == "1";
```
`StringUtils::ToLower` allocates new memory and
`StringUtils::EqualsIgnoreCase` doesn't.
##########
src/iceberg/table_metadata.cc:
##########
@@ -258,13 +290,95 @@ Result<std::unique_ptr<TableMetadata>>
TableMetadataUtil::Read(
return TableMetadataFromJson(json);
}
+Status TableMetadataUtil::Write(FileIO& io, const TableMetadata* base,
+ TableMetadata* metadata) {
+ ICEBERG_CHECK(metadata != nullptr, "The metadata is nullptr.");
+
+ int version = -1;
+ if (base != nullptr && !base->metadata_file_location.empty()) {
+ // parse current version from location
+ version = ParseVersionFromLocation(base->metadata_file_location);
+ }
+
+ ICEBERG_ASSIGN_OR_RAISE(std::string new_file_location,
+ NewTableMetadataFilePath(*metadata, version + 1));
Review Comment:
How about using `0` as init value and removing `+ 1`?
##########
src/iceberg/table_metadata.cc:
##########
@@ -258,13 +290,95 @@ Result<std::unique_ptr<TableMetadata>>
TableMetadataUtil::Read(
return TableMetadataFromJson(json);
}
+Status TableMetadataUtil::Write(FileIO& io, const TableMetadata* base,
+ TableMetadata* metadata) {
+ ICEBERG_CHECK(metadata != nullptr, "The metadata is nullptr.");
+
+ int version = -1;
+ if (base != nullptr && !base->metadata_file_location.empty()) {
+ // parse current version from location
+ version = ParseVersionFromLocation(base->metadata_file_location);
+ }
+
+ ICEBERG_ASSIGN_OR_RAISE(std::string new_file_location,
+ NewTableMetadataFilePath(*metadata, version + 1));
+ ICEBERG_RETURN_UNEXPECTED(Write(io, new_file_location, *metadata));
+ metadata->metadata_file_location = std::move(new_file_location);
+ return {};
+}
+
Status TableMetadataUtil::Write(FileIO& io, const std::string& location,
const TableMetadata& metadata) {
auto json = ToJson(metadata);
ICEBERG_ASSIGN_OR_RAISE(auto json_string, ToJsonString(json));
return io.WriteFile(location, json_string);
}
+void TableMetadataUtil::DeleteRemovedMetadataFiles(FileIO& io, const
TableMetadata* base,
+ const TableMetadata*
metadata) {
+ if (!base) {
+ return;
+ }
+
+ bool delete_after_commit =
TableProperties::kMetadataDeleteAfterCommitEnabled.value();
+ if (auto it = metadata->properties.find(
+ TableProperties::kMetadataDeleteAfterCommitEnabled.key());
+ it != metadata->properties.end()) {
+ delete_after_commit = StringUtils::ToLower(it->second) == "true" ||
it->second == "1";
+ }
+
+ if (delete_after_commit) {
+ std::ranges::for_each(
+ base->metadata_log |
+ std::views::filter(
+ [current_files =
+ metadata->metadata_log |
+ std::ranges::to<std::unordered_set<MetadataLogEntry,
+
MetadataLogEntry::Hasher>>()](
+ const auto& entry) { return
!current_files.contains(entry); }),
+ [&io](const auto& entry) { auto status =
io.DeleteFile(entry.metadata_file); });
Review Comment:
```suggestion
auto current_files =
metadata->metadata_log |
std::ranges::to<std::unordered_set<MetadataLogEntry,
MetadataLogEntry::Hasher>>();
std::ranges::for_each(
base->metadata_log | std::views::filter([¤t_files](const auto&
entry) {
return !current_files.contains(entry);
}),
[&io](const auto& entry) { std::ignore =
io.DeleteFile(entry.metadata_file); });
```
##########
src/iceberg/table_metadata.cc:
##########
@@ -258,13 +290,95 @@ Result<std::unique_ptr<TableMetadata>>
TableMetadataUtil::Read(
return TableMetadataFromJson(json);
}
+Status TableMetadataUtil::Write(FileIO& io, const TableMetadata* base,
+ TableMetadata* metadata) {
+ ICEBERG_CHECK(metadata != nullptr, "The metadata is nullptr.");
+
+ int version = -1;
+ if (base != nullptr && !base->metadata_file_location.empty()) {
+ // parse current version from location
+ version = ParseVersionFromLocation(base->metadata_file_location);
+ }
+
+ ICEBERG_ASSIGN_OR_RAISE(std::string new_file_location,
+ NewTableMetadataFilePath(*metadata, version + 1));
+ ICEBERG_RETURN_UNEXPECTED(Write(io, new_file_location, *metadata));
+ metadata->metadata_file_location = std::move(new_file_location);
+ return {};
+}
+
Status TableMetadataUtil::Write(FileIO& io, const std::string& location,
const TableMetadata& metadata) {
auto json = ToJson(metadata);
ICEBERG_ASSIGN_OR_RAISE(auto json_string, ToJsonString(json));
return io.WriteFile(location, json_string);
}
+void TableMetadataUtil::DeleteRemovedMetadataFiles(FileIO& io, const
TableMetadata* base,
+ const TableMetadata*
metadata) {
+ if (!base) {
+ return;
+ }
+
+ bool delete_after_commit =
TableProperties::kMetadataDeleteAfterCommitEnabled.value();
+ if (auto it = metadata->properties.find(
+ TableProperties::kMetadataDeleteAfterCommitEnabled.key());
+ it != metadata->properties.end()) {
+ delete_after_commit = StringUtils::ToLower(it->second) == "true" ||
it->second == "1";
+ }
+
+ if (delete_after_commit) {
+ std::ranges::for_each(
+ base->metadata_log |
+ std::views::filter(
+ [current_files =
+ metadata->metadata_log |
+ std::ranges::to<std::unordered_set<MetadataLogEntry,
+
MetadataLogEntry::Hasher>>()](
+ const auto& entry) { return
!current_files.contains(entry); }),
+ [&io](const auto& entry) { auto status =
io.DeleteFile(entry.metadata_file); });
+ }
+}
+
+int TableMetadataUtil::ParseVersionFromLocation(const std::string&
metadata_location) {
+ size_t version_start = metadata_location.find_last_of('/') + 1;
+ size_t version_end = metadata_location.find('-', version_start);
+
+ if (version_end == std::string::npos) {
+ // found filesystem table's metadata
+ return -1;
+ }
+
+ try {
+ return std::stoi(
+ metadata_location.substr(version_start, version_end - version_start));
+ } catch (const std::exception& e) {
+ // Unable to parse version from metadata location
+ return -1;
+ }
+}
+
+Result<std::string> TableMetadataUtil::NewTableMetadataFilePath(const
TableMetadata& meta,
+ int
new_version) {
+ std::string codec_name = "none";
Review Comment:
```suggestion
std::string_view codec_name = "none";
```
##########
src/iceberg/table_metadata.cc:
##########
@@ -258,13 +290,95 @@ Result<std::unique_ptr<TableMetadata>>
TableMetadataUtil::Read(
return TableMetadataFromJson(json);
}
+Status TableMetadataUtil::Write(FileIO& io, const TableMetadata* base,
+ TableMetadata* metadata) {
+ ICEBERG_CHECK(metadata != nullptr, "The metadata is nullptr.");
+
+ int version = -1;
+ if (base != nullptr && !base->metadata_file_location.empty()) {
+ // parse current version from location
+ version = ParseVersionFromLocation(base->metadata_file_location);
+ }
+
+ ICEBERG_ASSIGN_OR_RAISE(std::string new_file_location,
+ NewTableMetadataFilePath(*metadata, version + 1));
+ ICEBERG_RETURN_UNEXPECTED(Write(io, new_file_location, *metadata));
+ metadata->metadata_file_location = std::move(new_file_location);
+ return {};
+}
+
Status TableMetadataUtil::Write(FileIO& io, const std::string& location,
const TableMetadata& metadata) {
auto json = ToJson(metadata);
ICEBERG_ASSIGN_OR_RAISE(auto json_string, ToJsonString(json));
return io.WriteFile(location, json_string);
}
+void TableMetadataUtil::DeleteRemovedMetadataFiles(FileIO& io, const
TableMetadata* base,
+ const TableMetadata*
metadata) {
+ if (!base) {
+ return;
+ }
+
+ bool delete_after_commit =
TableProperties::kMetadataDeleteAfterCommitEnabled.value();
+ if (auto it = metadata->properties.find(
+ TableProperties::kMetadataDeleteAfterCommitEnabled.key());
+ it != metadata->properties.end()) {
+ delete_after_commit = StringUtils::ToLower(it->second) == "true" ||
it->second == "1";
+ }
+
+ if (delete_after_commit) {
+ std::ranges::for_each(
+ base->metadata_log |
+ std::views::filter(
+ [current_files =
+ metadata->metadata_log |
+ std::ranges::to<std::unordered_set<MetadataLogEntry,
+
MetadataLogEntry::Hasher>>()](
+ const auto& entry) { return
!current_files.contains(entry); }),
+ [&io](const auto& entry) { auto status =
io.DeleteFile(entry.metadata_file); });
+ }
+}
+
+int TableMetadataUtil::ParseVersionFromLocation(const std::string&
metadata_location) {
+ size_t version_start = metadata_location.find_last_of('/') + 1;
+ size_t version_end = metadata_location.find('-', version_start);
+
+ if (version_end == std::string::npos) {
+ // found filesystem table's metadata
+ return -1;
+ }
+
+ try {
+ return std::stoi(
Review Comment:
If we ensure there are only numbers here, we can use `std::from_chars` here.
It doesn't need to use `substr` to create a new string and is faster than
`std::stoi`.
```cpp
#include <charconv>
int result;
if (std::from_chars(metadata_location.data() + version_start,
metadata_location.data() + version_end, result)
.ec == std::errc()) {
return result;
} else {
return -1;
}
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]