wgtmac commented on code in PR #180:
URL: https://github.com/apache/iceberg-cpp/pull/180#discussion_r2295521040
##########
src/iceberg/schema.cc:
##########
@@ -44,4 +78,151 @@ bool Schema::Equals(const Schema& other) const {
return schema_id_ == other.schema_id_ && fields_ == other.fields_;
}
+Result<std::optional<std::reference_wrapper<const SchemaField>>>
Schema::FindFieldByName(
+ std::string_view name, bool case_sensitive) const {
+ if (case_sensitive) {
+ ICEBERG_RETURN_UNEXPECTED(InitNameToIndexMap());
+ auto it = name_to_id_.find(std::string(name));
+ if (it == name_to_id_.end()) return std::nullopt;
+ return FindFieldById(it->second);
+ }
+ ICEBERG_RETURN_UNEXPECTED(InitLowerCaseNameToIndexMap());
+ std::string lower_name(name);
+ std::ranges::transform(lower_name, lower_name.begin(), ::tolower);
+ auto it = lowercase_name_to_id_.find(lower_name);
+ if (it == lowercase_name_to_id_.end()) return std::nullopt;
+ return FindFieldById(it->second);
+}
+
+Result<Status> Schema::InitIdToIndexMap() const {
+ if (!id_to_field_.empty()) {
+ return {};
+ }
+ IdToFieldVisitor visitor(id_to_field_);
+ ICEBERG_RETURN_UNEXPECTED(VisitTypeInline(*this, &visitor));
+ return {};
+}
+
+Result<Status> Schema::InitNameToIndexMap() const {
+ if (!name_to_id_.empty()) {
+ return {};
+ }
+ std::string path, short_path;
+ NametoIdVisitor visitor(name_to_id_, true);
+ ICEBERG_RETURN_UNEXPECTED(VisitTypeInline(*this, &visitor, path,
short_path));
+ return {};
+}
+
+Result<Status> Schema::InitLowerCaseNameToIndexMap() const {
+ if (!lowercase_name_to_id_.empty()) {
+ return {};
+ }
+ std::string path, short_path;
+ NametoIdVisitor visitor(lowercase_name_to_id_, false);
+ ICEBERG_RETURN_UNEXPECTED(VisitTypeInline(*this, &visitor, path,
short_path));
+ return {};
+}
+
+Result<std::optional<std::reference_wrapper<const SchemaField>>>
Schema::FindFieldById(
+ int32_t field_id) const {
+ ICEBERG_RETURN_UNEXPECTED(InitIdToIndexMap());
+ auto it = id_to_field_.find(field_id);
+ if (it == id_to_field_.end()) {
+ return std::nullopt;
+ }
+ return it->second;
+}
+
+IdToFieldVisitor::IdToFieldVisitor(
+ std::unordered_map<int32_t, std::reference_wrapper<const SchemaField>>&
id_to_field)
+ : id_to_field_(id_to_field) {}
+
+Status IdToFieldVisitor::Visit(const Type& type) {
+ if (type.is_nested()) {
+ ICEBERG_RETURN_UNEXPECTED(VisitNestedType(type));
+ }
+ return {};
+}
+
+Status IdToFieldVisitor::VisitNestedType(const Type& type) {
+ const auto& nested = iceberg::internal::checked_cast<const
NestedType&>(type);
+ const auto& fields = nested.fields();
+ for (const auto& field : fields) {
+ id_to_field_.emplace(field.field_id(), std::cref(field));
+ ICEBERG_RETURN_UNEXPECTED(Visit(*field.type()));
+ }
+ return {};
+}
+
+NametoIdVisitor::NametoIdVisitor(std::unordered_map<std::string, int32_t>&
name_to_id,
+ bool case_sensitive)
+ : name_to_id_(name_to_id), case_sensitive_(case_sensitive) {}
+
+Status NametoIdVisitor::Visit(const ListType& type, const std::string& path,
+ const std::string& short_path) {
+ const auto& field = type.fields()[0];
+ std::string new_path = BuildPath(path, field.name(), case_sensitive_);
+ std::string new_short_path;
+ if (field.type()->type_id() == TypeId::kStruct) {
+ new_short_path = short_path;
+ } else {
+ new_short_path = BuildPath(short_path, field.name(), case_sensitive_);
+ }
+ name_to_id_[new_path] = field.field_id();
Review Comment:
When will `if new_path confict with prev new_short_path, we should discard
new_short_path` this happen? Could you provide a concrete example?
If it indeed happens, the `new_short_path` is wrong so we should actually
pass `new_path` as the `short_path` argument to the Visit call of its child?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]