nullccxsy commented on code in PR #180:
URL: https://github.com/apache/iceberg-cpp/pull/180#discussion_r2295547882


##########
src/iceberg/schema.cc:
##########
@@ -44,4 +78,151 @@ bool Schema::Equals(const Schema& other) const {
   return schema_id_ == other.schema_id_ && fields_ == other.fields_;
 }
 
+Result<std::optional<std::reference_wrapper<const SchemaField>>> 
Schema::FindFieldByName(
+    std::string_view name, bool case_sensitive) const {
+  if (case_sensitive) {
+    ICEBERG_RETURN_UNEXPECTED(InitNameToIndexMap());
+    auto it = name_to_id_.find(std::string(name));
+    if (it == name_to_id_.end()) return std::nullopt;
+    return FindFieldById(it->second);
+  }
+  ICEBERG_RETURN_UNEXPECTED(InitLowerCaseNameToIndexMap());
+  std::string lower_name(name);
+  std::ranges::transform(lower_name, lower_name.begin(), ::tolower);
+  auto it = lowercase_name_to_id_.find(lower_name);
+  if (it == lowercase_name_to_id_.end()) return std::nullopt;
+  return FindFieldById(it->second);
+}
+
+Result<Status> Schema::InitIdToIndexMap() const {
+  if (!id_to_field_.empty()) {
+    return {};
+  }
+  IdToFieldVisitor visitor(id_to_field_);
+  ICEBERG_RETURN_UNEXPECTED(VisitTypeInline(*this, &visitor));
+  return {};
+}
+
+Result<Status> Schema::InitNameToIndexMap() const {
+  if (!name_to_id_.empty()) {
+    return {};
+  }
+  std::string path, short_path;
+  NametoIdVisitor visitor(name_to_id_, true);
+  ICEBERG_RETURN_UNEXPECTED(VisitTypeInline(*this, &visitor, path, 
short_path));
+  return {};
+}
+
+Result<Status> Schema::InitLowerCaseNameToIndexMap() const {
+  if (!lowercase_name_to_id_.empty()) {
+    return {};
+  }
+  std::string path, short_path;
+  NametoIdVisitor visitor(lowercase_name_to_id_, false);
+  ICEBERG_RETURN_UNEXPECTED(VisitTypeInline(*this, &visitor, path, 
short_path));
+  return {};
+}
+
+Result<std::optional<std::reference_wrapper<const SchemaField>>> 
Schema::FindFieldById(
+    int32_t field_id) const {
+  ICEBERG_RETURN_UNEXPECTED(InitIdToIndexMap());
+  auto it = id_to_field_.find(field_id);
+  if (it == id_to_field_.end()) {
+    return std::nullopt;
+  }
+  return it->second;
+}
+
+IdToFieldVisitor::IdToFieldVisitor(
+    std::unordered_map<int32_t, std::reference_wrapper<const SchemaField>>& 
id_to_field)
+    : id_to_field_(id_to_field) {}
+
+Status IdToFieldVisitor::Visit(const Type& type) {
+  if (type.is_nested()) {
+    ICEBERG_RETURN_UNEXPECTED(VisitNestedType(type));
+  }
+  return {};
+}
+
+Status IdToFieldVisitor::VisitNestedType(const Type& type) {
+  const auto& nested = iceberg::internal::checked_cast<const 
NestedType&>(type);
+  const auto& fields = nested.fields();
+  for (const auto& field : fields) {
+    id_to_field_.emplace(field.field_id(), std::cref(field));
+    ICEBERG_RETURN_UNEXPECTED(Visit(*field.type()));
+  }
+  return {};
+}
+
+NametoIdVisitor::NametoIdVisitor(std::unordered_map<std::string, int32_t>& 
name_to_id,
+                                 bool case_sensitive)
+    : name_to_id_(name_to_id), case_sensitive_(case_sensitive) {}
+
+Status NametoIdVisitor::Visit(const ListType& type, const std::string& path,
+                              const std::string& short_path) {
+  const auto& field = type.fields()[0];
+  std::string new_path = BuildPath(path, field.name(), case_sensitive_);
+  std::string new_short_path;
+  if (field.type()->type_id() == TypeId::kStruct) {
+    new_short_path = short_path;
+  } else {
+    new_short_path = BuildPath(short_path, field.name(), case_sensitive_);
+  }
+  name_to_id_[new_path] = field.field_id();

Review Comment:
   Let's see java
   ```
   public Map<String, Integer> byName() {
       ImmutableMap.Builder<String, Integer> builder = ImmutableMap.builder();
       builder.putAll(nameToId);
       // add all short names that do not conflict with canonical names
       shortNameToId.entrySet().stream()
           .filter(entry -> !nameToId.containsKey(entry.getKey()))
           .forEach(builder::put);
       return builder.build();
     }
   ```
   `nameToId` contail all new_path, and `shortNameToId` contail all short_path, 
   This codes mean if new_short_path don't conflict with all canonical path, 
then it will be added to nameToId. so
   use `emplace` to add shortpath.
   
   use `[]` to add canonical path, then we can remain all canonical path.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to