wgtmac commented on code in PR #180:
URL: https://github.com/apache/iceberg-cpp/pull/180#discussion_r2292928827


##########
src/iceberg/schema.cc:
##########
@@ -19,13 +19,44 @@
 
 #include "iceberg/schema.h"
 
+#include <algorithm>
 #include <format>
 
 #include "iceberg/type.h"
 #include "iceberg/util/formatter.h"  // IWYU pragma: keep
-
+#include "iceberg/util/macros.h"
+#include "iceberg/util/visit_type.h"
 namespace iceberg {
+class IdVisitor {
+ public:
+  explicit IdVisitor(
+      std::unordered_map<int32_t, std::reference_wrapper<const SchemaField>>&
+          id_to_field);
+  Status Visit(const Type& type);
+  Status VisitNestedType(const Type& type);
+
+ private:
+  std::unordered_map<int32_t, std::reference_wrapper<const SchemaField>>& 
id_to_field_;
+};
+class NametoIdVisitor {

Review Comment:
   ```suggestion
   
   class NameToIdVisitor {
   ```



##########
src/iceberg/schema.cc:
##########
@@ -19,13 +19,44 @@
 
 #include "iceberg/schema.h"
 
+#include <algorithm>
 #include <format>
 
 #include "iceberg/type.h"
 #include "iceberg/util/formatter.h"  // IWYU pragma: keep
-
+#include "iceberg/util/macros.h"
+#include "iceberg/util/visit_type.h"
 namespace iceberg {
+class IdVisitor {

Review Comment:
   ```suggestion
   
   class IdToFieldVisitor {
   ```



##########
src/iceberg/schema.cc:
##########
@@ -19,13 +19,44 @@
 
 #include "iceberg/schema.h"
 
+#include <algorithm>
 #include <format>
 
 #include "iceberg/type.h"
 #include "iceberg/util/formatter.h"  // IWYU pragma: keep
-
+#include "iceberg/util/macros.h"
+#include "iceberg/util/visit_type.h"

Review Comment:
   ```suggestion
   #include "iceberg/util/visit_type.h"
   
   ```



##########
src/iceberg/schema.cc:
##########
@@ -19,13 +19,44 @@
 
 #include "iceberg/schema.h"
 
+#include <algorithm>
 #include <format>
 
 #include "iceberg/type.h"
 #include "iceberg/util/formatter.h"  // IWYU pragma: keep
-
+#include "iceberg/util/macros.h"
+#include "iceberg/util/visit_type.h"
 namespace iceberg {
+class IdVisitor {
+ public:
+  explicit IdVisitor(
+      std::unordered_map<int32_t, std::reference_wrapper<const SchemaField>>&
+          id_to_field);
+  Status Visit(const Type& type);
+  Status VisitNestedType(const Type& type);
+
+ private:
+  std::unordered_map<int32_t, std::reference_wrapper<const SchemaField>>& 
id_to_field_;
+};
+class NametoIdVisitor {
+ public:
+  explicit NametoIdVisitor(std::unordered_map<std::string, size_t>& name_to_id,
+                           bool case_sensitive_ = true);
+  Status Visit(const ListType& type, const std::string& path,
+               const std::string& short_path);
+  Status Visit(const MapType& type, const std::string& path,
+               const std::string& short_path);
+  Status Visit(const StructType& type, const std::string& path,
+               const std::string& short_path);
+  Status Visit(const PrimitiveType& type, const std::string& path,
+               const std::string& short_path);
+  static std::string GetPath(const std::string& last_path, const std::string& 
field_name,
+                             bool case_sensitive = true);
 
+ private:
+  bool case_sensitive_;
+  std::unordered_map<std::string, size_t>& name_to_id_;
+};

Review Comment:
   ```suggestion
   };
   
   ```



##########
src/iceberg/schema.cc:
##########
@@ -19,13 +19,44 @@
 
 #include "iceberg/schema.h"
 
+#include <algorithm>
 #include <format>
 
 #include "iceberg/type.h"
 #include "iceberg/util/formatter.h"  // IWYU pragma: keep
-
+#include "iceberg/util/macros.h"
+#include "iceberg/util/visit_type.h"
 namespace iceberg {
+class IdVisitor {
+ public:
+  explicit IdVisitor(
+      std::unordered_map<int32_t, std::reference_wrapper<const SchemaField>>&
+          id_to_field);
+  Status Visit(const Type& type);
+  Status VisitNestedType(const Type& type);
+
+ private:
+  std::unordered_map<int32_t, std::reference_wrapper<const SchemaField>>& 
id_to_field_;
+};
+class NametoIdVisitor {
+ public:
+  explicit NametoIdVisitor(std::unordered_map<std::string, size_t>& name_to_id,
+                           bool case_sensitive_ = true);
+  Status Visit(const ListType& type, const std::string& path,
+               const std::string& short_path);
+  Status Visit(const MapType& type, const std::string& path,
+               const std::string& short_path);
+  Status Visit(const StructType& type, const std::string& path,
+               const std::string& short_path);
+  Status Visit(const PrimitiveType& type, const std::string& path,
+               const std::string& short_path);
+  static std::string GetPath(const std::string& last_path, const std::string& 
field_name,
+                             bool case_sensitive = true);

Review Comment:
   ```suggestion
                                bool case_sensitive);
   ```
   
   We don't need default parameter for GetPath.



##########
src/iceberg/schema.cc:
##########
@@ -44,4 +75,156 @@ bool Schema::Equals(const Schema& other) const {
   return schema_id_ == other.schema_id_ && fields_ == other.fields_;
 }
 
+Result<std::optional<std::reference_wrapper<const SchemaField>>> 
Schema::FindFieldByName(
+    std::string_view name, bool case_sensitive) const {
+  if (case_sensitive) {
+    ICEBERG_RETURN_UNEXPECTED(InitNameToIndexMap());
+    auto it = name_to_id_.find(std::string(name));
+    if (it == name_to_id_.end()) return std::nullopt;
+    return FindFieldById(it->second);
+  }
+  ICEBERG_RETURN_UNEXPECTED(InitLowerCaseNameToIndexMap());
+  std::string lower_name(name);
+  std::ranges::transform(lower_name, lower_name.begin(), ::tolower);
+  auto it = lowercase_name_to_id_.find(lower_name);
+  if (it == lowercase_name_to_id_.end()) return std::nullopt;
+  return FindFieldById(it->second);
+}
+
+Result<std::optional<std::reference_wrapper<const SchemaField>>> 
Schema::FindFieldByName(
+    std::string_view name) const {
+  return FindFieldByName(name, /*case_sensitive*/ true);
+}
+
+Result<Status> Schema::InitIdToIndexMap() const {
+  if (!id_to_field_.empty()) {
+    return {};
+  }
+  IdVisitor visitor(id_to_field_);
+  ICEBERG_RETURN_UNEXPECTED(VisitTypeInline(*this, &visitor));
+  return {};
+}
+
+Result<Status> Schema::InitNameToIndexMap() const {
+  if (!name_to_id_.empty()) {
+    return {};
+  }
+  std::string path, short_path;
+  NametoIdVisitor visitor(name_to_id_, true);
+  ICEBERG_RETURN_UNEXPECTED(VisitTypeInline(*this, &visitor, path, 
short_path));
+  return {};
+}
+
+Result<Status> Schema::InitLowerCaseNameToIndexMap() const {
+  if (!lowercase_name_to_id_.empty()) {
+    return {};
+  }
+  std::string path, short_path;
+  NametoIdVisitor visitor(lowercase_name_to_id_, false);
+  ICEBERG_RETURN_UNEXPECTED(VisitTypeInline(*this, &visitor, path, 
short_path));
+  return {};
+}
+
+Result<std::optional<std::reference_wrapper<const SchemaField>>> 
Schema::FindFieldById(
+    int32_t field_id) const {
+  ICEBERG_RETURN_UNEXPECTED(InitIdToIndexMap());
+  auto it = id_to_field_.find(field_id);
+  if (it == id_to_field_.end()) {
+    return std::nullopt;
+  }
+  return it->second;
+}
+
+IdVisitor::IdVisitor(
+    std::unordered_map<int32_t, std::reference_wrapper<const SchemaField>>& 
id_to_field)
+    : id_to_field_(id_to_field) {}
+
+Status IdVisitor::Visit(const Type& type) {
+  if (type.is_nested()) {
+    ICEBERG_RETURN_UNEXPECTED(VisitNestedType(type));
+  }
+  return {};
+}
+
+Status IdVisitor::VisitNestedType(const Type& type) {
+  const auto& nested = iceberg::internal::checked_cast<const 
NestedType&>(type);
+  const auto& fields = nested.fields();
+  for (const auto& field : fields) {
+    id_to_field_.emplace(field.field_id(), std::cref(field));
+    ICEBERG_RETURN_UNEXPECTED(Visit(*field.type()));
+  }
+  return {};
+}
+
+NametoIdVisitor::NametoIdVisitor(std::unordered_map<std::string, size_t>& 
name_to_id,
+                                 bool case_sensitive)
+    : name_to_id_(name_to_id), case_sensitive_(case_sensitive) {}
+
+Status NametoIdVisitor::Visit(const ListType& type, const std::string& path,
+                              const std::string& short_path) {
+  const auto& field = type.fields()[0];
+  std::string full_path = GetPath(path, std::string(field.name()), 
case_sensitive_);
+  std::string short_full_path;
+  if (field.type()->type_id() == TypeId::kStruct) {
+    short_full_path = short_path;
+  } else {
+    short_full_path = GetPath(short_path, std::string(field.name()), 
case_sensitive_);
+  }
+  name_to_id_[full_path] = field.field_id();
+  name_to_id_.emplace(short_full_path, field.field_id());
+  ICEBERG_RETURN_UNEXPECTED(
+      VisitTypeInline(*field.type(), this, full_path, short_full_path));
+  return {};
+}
+
+Status NametoIdVisitor::Visit(const MapType& type, const std::string& path,
+                              const std::string& short_path) {
+  std::string full_path, short_full_path;
+  const auto& fields = type.fields();
+  for (const auto& field : fields) {
+    full_path = GetPath(path, std::string(field.name()), case_sensitive_);
+    if (field.name() == MapType::kValueName &&

Review Comment:
   Why not shortening the path for `MapType::kKeyName`?



##########
src/iceberg/schema.cc:
##########
@@ -44,4 +75,156 @@ bool Schema::Equals(const Schema& other) const {
   return schema_id_ == other.schema_id_ && fields_ == other.fields_;
 }
 
+Result<std::optional<std::reference_wrapper<const SchemaField>>> 
Schema::FindFieldByName(
+    std::string_view name, bool case_sensitive) const {
+  if (case_sensitive) {
+    ICEBERG_RETURN_UNEXPECTED(InitNameToIndexMap());
+    auto it = name_to_id_.find(std::string(name));
+    if (it == name_to_id_.end()) return std::nullopt;
+    return FindFieldById(it->second);
+  }
+  ICEBERG_RETURN_UNEXPECTED(InitLowerCaseNameToIndexMap());
+  std::string lower_name(name);
+  std::ranges::transform(lower_name, lower_name.begin(), ::tolower);
+  auto it = lowercase_name_to_id_.find(lower_name);
+  if (it == lowercase_name_to_id_.end()) return std::nullopt;
+  return FindFieldById(it->second);
+}
+
+Result<std::optional<std::reference_wrapper<const SchemaField>>> 
Schema::FindFieldByName(
+    std::string_view name) const {
+  return FindFieldByName(name, /*case_sensitive*/ true);
+}
+
+Result<Status> Schema::InitIdToIndexMap() const {
+  if (!id_to_field_.empty()) {
+    return {};
+  }
+  IdVisitor visitor(id_to_field_);
+  ICEBERG_RETURN_UNEXPECTED(VisitTypeInline(*this, &visitor));
+  return {};
+}
+
+Result<Status> Schema::InitNameToIndexMap() const {
+  if (!name_to_id_.empty()) {
+    return {};
+  }
+  std::string path, short_path;
+  NametoIdVisitor visitor(name_to_id_, true);
+  ICEBERG_RETURN_UNEXPECTED(VisitTypeInline(*this, &visitor, path, 
short_path));
+  return {};
+}
+
+Result<Status> Schema::InitLowerCaseNameToIndexMap() const {
+  if (!lowercase_name_to_id_.empty()) {
+    return {};
+  }
+  std::string path, short_path;
+  NametoIdVisitor visitor(lowercase_name_to_id_, false);
+  ICEBERG_RETURN_UNEXPECTED(VisitTypeInline(*this, &visitor, path, 
short_path));
+  return {};
+}
+
+Result<std::optional<std::reference_wrapper<const SchemaField>>> 
Schema::FindFieldById(
+    int32_t field_id) const {
+  ICEBERG_RETURN_UNEXPECTED(InitIdToIndexMap());
+  auto it = id_to_field_.find(field_id);
+  if (it == id_to_field_.end()) {
+    return std::nullopt;
+  }
+  return it->second;
+}
+
+IdVisitor::IdVisitor(
+    std::unordered_map<int32_t, std::reference_wrapper<const SchemaField>>& 
id_to_field)
+    : id_to_field_(id_to_field) {}
+
+Status IdVisitor::Visit(const Type& type) {
+  if (type.is_nested()) {
+    ICEBERG_RETURN_UNEXPECTED(VisitNestedType(type));
+  }
+  return {};
+}
+
+Status IdVisitor::VisitNestedType(const Type& type) {
+  const auto& nested = iceberg::internal::checked_cast<const 
NestedType&>(type);
+  const auto& fields = nested.fields();
+  for (const auto& field : fields) {
+    id_to_field_.emplace(field.field_id(), std::cref(field));
+    ICEBERG_RETURN_UNEXPECTED(Visit(*field.type()));
+  }
+  return {};
+}
+
+NametoIdVisitor::NametoIdVisitor(std::unordered_map<std::string, size_t>& 
name_to_id,
+                                 bool case_sensitive)
+    : name_to_id_(name_to_id), case_sensitive_(case_sensitive) {}
+
+Status NametoIdVisitor::Visit(const ListType& type, const std::string& path,
+                              const std::string& short_path) {
+  const auto& field = type.fields()[0];
+  std::string full_path = GetPath(path, std::string(field.name()), 
case_sensitive_);
+  std::string short_full_path;
+  if (field.type()->type_id() == TypeId::kStruct) {
+    short_full_path = short_path;
+  } else {
+    short_full_path = GetPath(short_path, std::string(field.name()), 
case_sensitive_);
+  }
+  name_to_id_[full_path] = field.field_id();
+  name_to_id_.emplace(short_full_path, field.field_id());

Review Comment:
   We need to check its return value just in case a duplicate path exists.



##########
src/iceberg/schema.cc:
##########
@@ -44,4 +75,156 @@ bool Schema::Equals(const Schema& other) const {
   return schema_id_ == other.schema_id_ && fields_ == other.fields_;
 }
 
+Result<std::optional<std::reference_wrapper<const SchemaField>>> 
Schema::FindFieldByName(
+    std::string_view name, bool case_sensitive) const {
+  if (case_sensitive) {
+    ICEBERG_RETURN_UNEXPECTED(InitNameToIndexMap());
+    auto it = name_to_id_.find(std::string(name));
+    if (it == name_to_id_.end()) return std::nullopt;
+    return FindFieldById(it->second);
+  }
+  ICEBERG_RETURN_UNEXPECTED(InitLowerCaseNameToIndexMap());
+  std::string lower_name(name);
+  std::ranges::transform(lower_name, lower_name.begin(), ::tolower);
+  auto it = lowercase_name_to_id_.find(lower_name);
+  if (it == lowercase_name_to_id_.end()) return std::nullopt;
+  return FindFieldById(it->second);
+}
+
+Result<std::optional<std::reference_wrapper<const SchemaField>>> 
Schema::FindFieldByName(
+    std::string_view name) const {
+  return FindFieldByName(name, /*case_sensitive*/ true);
+}
+
+Result<Status> Schema::InitIdToIndexMap() const {
+  if (!id_to_field_.empty()) {
+    return {};
+  }
+  IdVisitor visitor(id_to_field_);
+  ICEBERG_RETURN_UNEXPECTED(VisitTypeInline(*this, &visitor));
+  return {};
+}
+
+Result<Status> Schema::InitNameToIndexMap() const {
+  if (!name_to_id_.empty()) {
+    return {};
+  }
+  std::string path, short_path;
+  NametoIdVisitor visitor(name_to_id_, true);
+  ICEBERG_RETURN_UNEXPECTED(VisitTypeInline(*this, &visitor, path, 
short_path));
+  return {};
+}
+
+Result<Status> Schema::InitLowerCaseNameToIndexMap() const {
+  if (!lowercase_name_to_id_.empty()) {
+    return {};
+  }
+  std::string path, short_path;
+  NametoIdVisitor visitor(lowercase_name_to_id_, false);
+  ICEBERG_RETURN_UNEXPECTED(VisitTypeInline(*this, &visitor, path, 
short_path));
+  return {};
+}
+
+Result<std::optional<std::reference_wrapper<const SchemaField>>> 
Schema::FindFieldById(
+    int32_t field_id) const {
+  ICEBERG_RETURN_UNEXPECTED(InitIdToIndexMap());
+  auto it = id_to_field_.find(field_id);
+  if (it == id_to_field_.end()) {
+    return std::nullopt;
+  }
+  return it->second;
+}
+
+IdVisitor::IdVisitor(
+    std::unordered_map<int32_t, std::reference_wrapper<const SchemaField>>& 
id_to_field)
+    : id_to_field_(id_to_field) {}
+
+Status IdVisitor::Visit(const Type& type) {
+  if (type.is_nested()) {
+    ICEBERG_RETURN_UNEXPECTED(VisitNestedType(type));
+  }
+  return {};
+}
+
+Status IdVisitor::VisitNestedType(const Type& type) {
+  const auto& nested = iceberg::internal::checked_cast<const 
NestedType&>(type);
+  const auto& fields = nested.fields();
+  for (const auto& field : fields) {
+    id_to_field_.emplace(field.field_id(), std::cref(field));
+    ICEBERG_RETURN_UNEXPECTED(Visit(*field.type()));
+  }
+  return {};
+}
+
+NametoIdVisitor::NametoIdVisitor(std::unordered_map<std::string, size_t>& 
name_to_id,
+                                 bool case_sensitive)
+    : name_to_id_(name_to_id), case_sensitive_(case_sensitive) {}
+
+Status NametoIdVisitor::Visit(const ListType& type, const std::string& path,
+                              const std::string& short_path) {
+  const auto& field = type.fields()[0];
+  std::string full_path = GetPath(path, std::string(field.name()), 
case_sensitive_);
+  std::string short_full_path;

Review Comment:
   ```suggestion
     std::string new_path = GetPath(path, std::string(field.name()), 
case_sensitive_);
     std::string new_short_path;
   ```
   
   The `short_full_path` is a little bit confusing. I'd suggest renaming them 
like this.



##########
src/iceberg/schema.cc:
##########
@@ -19,13 +19,44 @@
 
 #include "iceberg/schema.h"
 
+#include <algorithm>
 #include <format>
 
 #include "iceberg/type.h"
 #include "iceberg/util/formatter.h"  // IWYU pragma: keep
-
+#include "iceberg/util/macros.h"
+#include "iceberg/util/visit_type.h"
 namespace iceberg {
+class IdVisitor {

Review Comment:
   Let's have a clear name



##########
src/iceberg/schema.cc:
##########
@@ -19,13 +19,44 @@
 
 #include "iceberg/schema.h"
 
+#include <algorithm>
 #include <format>
 
 #include "iceberg/type.h"
 #include "iceberg/util/formatter.h"  // IWYU pragma: keep
-
+#include "iceberg/util/macros.h"
+#include "iceberg/util/visit_type.h"
 namespace iceberg {
+class IdVisitor {
+ public:
+  explicit IdVisitor(
+      std::unordered_map<int32_t, std::reference_wrapper<const SchemaField>>&
+          id_to_field);
+  Status Visit(const Type& type);
+  Status VisitNestedType(const Type& type);
+
+ private:
+  std::unordered_map<int32_t, std::reference_wrapper<const SchemaField>>& 
id_to_field_;
+};
+class NametoIdVisitor {
+ public:
+  explicit NametoIdVisitor(std::unordered_map<std::string, size_t>& name_to_id,
+                           bool case_sensitive_ = true);
+  Status Visit(const ListType& type, const std::string& path,
+               const std::string& short_path);
+  Status Visit(const MapType& type, const std::string& path,
+               const std::string& short_path);
+  Status Visit(const StructType& type, const std::string& path,
+               const std::string& short_path);
+  Status Visit(const PrimitiveType& type, const std::string& path,
+               const std::string& short_path);
+  static std::string GetPath(const std::string& last_path, const std::string& 
field_name,
+                             bool case_sensitive = true);
 
+ private:
+  bool case_sensitive_;
+  std::unordered_map<std::string, size_t>& name_to_id_;

Review Comment:
   ```suggestion
     std::unordered_map<std::string, int32_t>& name_to_id_;
   ```
   
   field id should use int32_t



##########
src/iceberg/schema.h:
##########
@@ -54,13 +55,40 @@ class ICEBERG_EXPORT Schema : public StructType {
 
   [[nodiscard]] std::string ToString() const override;
 
+  ///\brief Find thd SchemaField By field name
+  ///
+  /// Short names for maps and lists are included for any name that does not 
conflict with
+  /// a canonical name. For example, a list, 'l', of structs with field 'x' 
will produce
+  /// short name 'l.x' in addition to canonical name 'l.element.x'. a map 'm', 
if its
+  /// value include a structs with field 'x' wil produce short name 'm.x' in 
addition to
+  /// canonical name 'm.value.x'
+  [[nodiscard]] Result<std::optional<std::reference_wrapper<const 
SchemaField>>>
+  FindFieldByName(std::string_view name, bool case_sensitive) const;
+
+  [[nodiscard]] Result<std::optional<std::reference_wrapper<const 
SchemaField>>>
+  FindFieldByName(std::string_view name) const;
+
+  [[nodiscard]] Result<std::optional<std::reference_wrapper<const 
SchemaField>>>
+  FindFieldById(int32_t field_id) const;
+
   friend bool operator==(const Schema& lhs, const Schema& rhs) { return 
lhs.Equals(rhs); }
 
+  /// Mapping from field id to field.
+  mutable std::unordered_map<int32_t, std::reference_wrapper<const 
SchemaField>>
+      id_to_field_;
+  /// Mapping from field name to id of field.
+  mutable std::unordered_map<std::string, size_t> name_to_id_;
+  /// Mapping from field lowercase_name(suppoert case_insensitive query) to id 
of field
+  mutable std::unordered_map<std::string, size_t> lowercase_name_to_id_;

Review Comment:
   ```suggestion
     mutable std::unordered_map<int32_t, std::reference_wrapper<const 
SchemaField>>
         id_to_field_;
     /// Mapping from field name to field id.
     mutable std::unordered_map<std::string, int32_t> name_to_id_;
     /// Mapping from lowercased field name to field id
     mutable std::unordered_map<std::string, int32_t> lowercase_name_to_id_;
   ```
   
   Field id should use int32_t per the spec



##########
src/iceberg/schema.cc:
##########
@@ -44,4 +75,156 @@ bool Schema::Equals(const Schema& other) const {
   return schema_id_ == other.schema_id_ && fields_ == other.fields_;
 }
 
+Result<std::optional<std::reference_wrapper<const SchemaField>>> 
Schema::FindFieldByName(
+    std::string_view name, bool case_sensitive) const {
+  if (case_sensitive) {
+    ICEBERG_RETURN_UNEXPECTED(InitNameToIndexMap());
+    auto it = name_to_id_.find(std::string(name));
+    if (it == name_to_id_.end()) return std::nullopt;
+    return FindFieldById(it->second);
+  }
+  ICEBERG_RETURN_UNEXPECTED(InitLowerCaseNameToIndexMap());
+  std::string lower_name(name);
+  std::ranges::transform(lower_name, lower_name.begin(), ::tolower);
+  auto it = lowercase_name_to_id_.find(lower_name);
+  if (it == lowercase_name_to_id_.end()) return std::nullopt;
+  return FindFieldById(it->second);
+}
+
+Result<std::optional<std::reference_wrapper<const SchemaField>>> 
Schema::FindFieldByName(
+    std::string_view name) const {
+  return FindFieldByName(name, /*case_sensitive*/ true);
+}
+
+Result<Status> Schema::InitIdToIndexMap() const {
+  if (!id_to_field_.empty()) {
+    return {};
+  }
+  IdVisitor visitor(id_to_field_);
+  ICEBERG_RETURN_UNEXPECTED(VisitTypeInline(*this, &visitor));
+  return {};
+}
+
+Result<Status> Schema::InitNameToIndexMap() const {
+  if (!name_to_id_.empty()) {
+    return {};
+  }
+  std::string path, short_path;
+  NametoIdVisitor visitor(name_to_id_, true);
+  ICEBERG_RETURN_UNEXPECTED(VisitTypeInline(*this, &visitor, path, 
short_path));
+  return {};
+}
+
+Result<Status> Schema::InitLowerCaseNameToIndexMap() const {
+  if (!lowercase_name_to_id_.empty()) {
+    return {};
+  }
+  std::string path, short_path;
+  NametoIdVisitor visitor(lowercase_name_to_id_, false);
+  ICEBERG_RETURN_UNEXPECTED(VisitTypeInline(*this, &visitor, path, 
short_path));
+  return {};
+}
+
+Result<std::optional<std::reference_wrapper<const SchemaField>>> 
Schema::FindFieldById(
+    int32_t field_id) const {
+  ICEBERG_RETURN_UNEXPECTED(InitIdToIndexMap());
+  auto it = id_to_field_.find(field_id);
+  if (it == id_to_field_.end()) {
+    return std::nullopt;
+  }
+  return it->second;
+}
+
+IdVisitor::IdVisitor(
+    std::unordered_map<int32_t, std::reference_wrapper<const SchemaField>>& 
id_to_field)
+    : id_to_field_(id_to_field) {}
+
+Status IdVisitor::Visit(const Type& type) {
+  if (type.is_nested()) {
+    ICEBERG_RETURN_UNEXPECTED(VisitNestedType(type));
+  }
+  return {};
+}
+
+Status IdVisitor::VisitNestedType(const Type& type) {
+  const auto& nested = iceberg::internal::checked_cast<const 
NestedType&>(type);
+  const auto& fields = nested.fields();
+  for (const auto& field : fields) {
+    id_to_field_.emplace(field.field_id(), std::cref(field));
+    ICEBERG_RETURN_UNEXPECTED(Visit(*field.type()));
+  }
+  return {};
+}
+
+NametoIdVisitor::NametoIdVisitor(std::unordered_map<std::string, size_t>& 
name_to_id,
+                                 bool case_sensitive)
+    : name_to_id_(name_to_id), case_sensitive_(case_sensitive) {}
+
+Status NametoIdVisitor::Visit(const ListType& type, const std::string& path,
+                              const std::string& short_path) {
+  const auto& field = type.fields()[0];
+  std::string full_path = GetPath(path, std::string(field.name()), 
case_sensitive_);
+  std::string short_full_path;
+  if (field.type()->type_id() == TypeId::kStruct) {
+    short_full_path = short_path;
+  } else {
+    short_full_path = GetPath(short_path, std::string(field.name()), 
case_sensitive_);
+  }
+  name_to_id_[full_path] = field.field_id();
+  name_to_id_.emplace(short_full_path, field.field_id());
+  ICEBERG_RETURN_UNEXPECTED(
+      VisitTypeInline(*field.type(), this, full_path, short_full_path));
+  return {};
+}
+
+Status NametoIdVisitor::Visit(const MapType& type, const std::string& path,
+                              const std::string& short_path) {
+  std::string full_path, short_full_path;
+  const auto& fields = type.fields();
+  for (const auto& field : fields) {
+    full_path = GetPath(path, std::string(field.name()), case_sensitive_);
+    if (field.name() == MapType::kValueName &&
+        field.type()->type_id() == TypeId::kStruct) {
+      short_full_path = short_path;
+    } else {
+      short_full_path = GetPath(path, std::string(field.name()), 
case_sensitive_);
+    }
+    name_to_id_[full_path] = field.field_id();
+    name_to_id_.emplace(short_full_path, field.field_id());
+    ICEBERG_RETURN_UNEXPECTED(
+        VisitTypeInline(*field.type(), this, full_path, short_full_path));
+  }
+  return {};
+}
+
+Status NametoIdVisitor::Visit(const StructType& type, const std::string& path,
+                              const std::string& short_path) {
+  const auto& fields = type.fields();
+  std::string full_path, short_full_path;
+  for (const auto& field : fields) {
+    full_path =
+        NametoIdVisitor::GetPath(path, std::string(field.name()), 
case_sensitive_);
+    short_full_path = GetPath(short_path, std::string(field.name()), 
case_sensitive_);
+    name_to_id_[full_path] = field.field_id();
+    name_to_id_.emplace(short_full_path, field.field_id());

Review Comment:
   Check the return value and error out if failed.



##########
src/iceberg/schema.h:
##########
@@ -54,13 +55,40 @@ class ICEBERG_EXPORT Schema : public StructType {
 
   [[nodiscard]] std::string ToString() const override;
 
+  ///\brief Find thd SchemaField By field name
+  ///
+  /// Short names for maps and lists are included for any name that does not 
conflict with
+  /// a canonical name. For example, a list, 'l', of structs with field 'x' 
will produce
+  /// short name 'l.x' in addition to canonical name 'l.element.x'. a map 'm', 
if its
+  /// value include a structs with field 'x' wil produce short name 'm.x' in 
addition to
+  /// canonical name 'm.value.x'
+  [[nodiscard]] Result<std::optional<std::reference_wrapper<const 
SchemaField>>>
+  FindFieldByName(std::string_view name, bool case_sensitive) const;
+
+  [[nodiscard]] Result<std::optional<std::reference_wrapper<const 
SchemaField>>>
+  FindFieldByName(std::string_view name) const;
+
+  [[nodiscard]] Result<std::optional<std::reference_wrapper<const 
SchemaField>>>
+  FindFieldById(int32_t field_id) const;
+
   friend bool operator==(const Schema& lhs, const Schema& rhs) { return 
lhs.Equals(rhs); }
 
+  /// Mapping from field id to field.
+  mutable std::unordered_map<int32_t, std::reference_wrapper<const 
SchemaField>>

Review Comment:
   ```suggestion
    private:
     /// Mapping from field id to field.
     mutable std::unordered_map<int32_t, std::reference_wrapper<const 
SchemaField>>
   ```
   
   We should mark these variables as private.



##########
test/schema_test.cc:
##########
@@ -81,3 +81,171 @@ TEST(SchemaTest, Equality) {
   ASSERT_EQ(schema1, schema5);
   ASSERT_EQ(schema5, schema1);
 }
+
+TEST(SchemaTest, NestedType) {

Review Comment:
   These test cases are a little bit hard to review. I'd suggest do the 
following:
   
   1. Give it a better name, like `TEST(SchemaTest, FindFieldByName)` or 
similar. If the test fails in the future, it might be hard to know what error 
it is at first glance.
   2. Break down them into smaller test cases and each one focuses on a single 
feature. For example, we can at least split them by features: FindFieldByName, 
FindFieldById. Or even better, we can split them into cases of different types 
including SimpleStruct, SimpleMap, SimpleList, StructOfStruct, MapOfStruct, 
ListOfStruct, etc. In that way, readers may know better what each case is for. 
If you want to reuse the complex schema, you can create a function to call it 
in each case.
   4. There should be some cases for invalid cases, like fields cannot be found 
by id or field name.



##########
src/iceberg/schema.cc:
##########
@@ -44,4 +75,156 @@ bool Schema::Equals(const Schema& other) const {
   return schema_id_ == other.schema_id_ && fields_ == other.fields_;
 }
 
+Result<std::optional<std::reference_wrapper<const SchemaField>>> 
Schema::FindFieldByName(
+    std::string_view name, bool case_sensitive) const {
+  if (case_sensitive) {
+    ICEBERG_RETURN_UNEXPECTED(InitNameToIndexMap());
+    auto it = name_to_id_.find(std::string(name));
+    if (it == name_to_id_.end()) return std::nullopt;
+    return FindFieldById(it->second);
+  }
+  ICEBERG_RETURN_UNEXPECTED(InitLowerCaseNameToIndexMap());
+  std::string lower_name(name);
+  std::ranges::transform(lower_name, lower_name.begin(), ::tolower);
+  auto it = lowercase_name_to_id_.find(lower_name);
+  if (it == lowercase_name_to_id_.end()) return std::nullopt;
+  return FindFieldById(it->second);
+}
+
+Result<std::optional<std::reference_wrapper<const SchemaField>>> 
Schema::FindFieldByName(
+    std::string_view name) const {
+  return FindFieldByName(name, /*case_sensitive*/ true);
+}
+
+Result<Status> Schema::InitIdToIndexMap() const {
+  if (!id_to_field_.empty()) {
+    return {};
+  }
+  IdVisitor visitor(id_to_field_);
+  ICEBERG_RETURN_UNEXPECTED(VisitTypeInline(*this, &visitor));
+  return {};
+}
+
+Result<Status> Schema::InitNameToIndexMap() const {
+  if (!name_to_id_.empty()) {
+    return {};
+  }
+  std::string path, short_path;
+  NametoIdVisitor visitor(name_to_id_, true);
+  ICEBERG_RETURN_UNEXPECTED(VisitTypeInline(*this, &visitor, path, 
short_path));
+  return {};
+}
+
+Result<Status> Schema::InitLowerCaseNameToIndexMap() const {
+  if (!lowercase_name_to_id_.empty()) {
+    return {};
+  }
+  std::string path, short_path;
+  NametoIdVisitor visitor(lowercase_name_to_id_, false);
+  ICEBERG_RETURN_UNEXPECTED(VisitTypeInline(*this, &visitor, path, 
short_path));
+  return {};
+}
+
+Result<std::optional<std::reference_wrapper<const SchemaField>>> 
Schema::FindFieldById(
+    int32_t field_id) const {
+  ICEBERG_RETURN_UNEXPECTED(InitIdToIndexMap());
+  auto it = id_to_field_.find(field_id);
+  if (it == id_to_field_.end()) {
+    return std::nullopt;
+  }
+  return it->second;
+}
+
+IdVisitor::IdVisitor(
+    std::unordered_map<int32_t, std::reference_wrapper<const SchemaField>>& 
id_to_field)
+    : id_to_field_(id_to_field) {}
+
+Status IdVisitor::Visit(const Type& type) {
+  if (type.is_nested()) {
+    ICEBERG_RETURN_UNEXPECTED(VisitNestedType(type));
+  }
+  return {};
+}
+
+Status IdVisitor::VisitNestedType(const Type& type) {
+  const auto& nested = iceberg::internal::checked_cast<const 
NestedType&>(type);
+  const auto& fields = nested.fields();
+  for (const auto& field : fields) {
+    id_to_field_.emplace(field.field_id(), std::cref(field));
+    ICEBERG_RETURN_UNEXPECTED(Visit(*field.type()));
+  }
+  return {};
+}
+
+NametoIdVisitor::NametoIdVisitor(std::unordered_map<std::string, size_t>& 
name_to_id,
+                                 bool case_sensitive)
+    : name_to_id_(name_to_id), case_sensitive_(case_sensitive) {}
+
+Status NametoIdVisitor::Visit(const ListType& type, const std::string& path,
+                              const std::string& short_path) {
+  const auto& field = type.fields()[0];
+  std::string full_path = GetPath(path, std::string(field.name()), 
case_sensitive_);
+  std::string short_full_path;
+  if (field.type()->type_id() == TypeId::kStruct) {
+    short_full_path = short_path;
+  } else {
+    short_full_path = GetPath(short_path, std::string(field.name()), 
case_sensitive_);
+  }
+  name_to_id_[full_path] = field.field_id();
+  name_to_id_.emplace(short_full_path, field.field_id());
+  ICEBERG_RETURN_UNEXPECTED(
+      VisitTypeInline(*field.type(), this, full_path, short_full_path));
+  return {};
+}
+
+Status NametoIdVisitor::Visit(const MapType& type, const std::string& path,
+                              const std::string& short_path) {
+  std::string full_path, short_full_path;
+  const auto& fields = type.fields();
+  for (const auto& field : fields) {
+    full_path = GetPath(path, std::string(field.name()), case_sensitive_);
+    if (field.name() == MapType::kValueName &&
+        field.type()->type_id() == TypeId::kStruct) {
+      short_full_path = short_path;
+    } else {
+      short_full_path = GetPath(path, std::string(field.name()), 
case_sensitive_);
+    }
+    name_to_id_[full_path] = field.field_id();
+    name_to_id_.emplace(short_full_path, field.field_id());
+    ICEBERG_RETURN_UNEXPECTED(
+        VisitTypeInline(*field.type(), this, full_path, short_full_path));
+  }
+  return {};
+}
+
+Status NametoIdVisitor::Visit(const StructType& type, const std::string& path,
+                              const std::string& short_path) {
+  const auto& fields = type.fields();
+  std::string full_path, short_full_path;
+  for (const auto& field : fields) {
+    full_path =
+        NametoIdVisitor::GetPath(path, std::string(field.name()), 
case_sensitive_);
+    short_full_path = GetPath(short_path, std::string(field.name()), 
case_sensitive_);
+    name_to_id_[full_path] = field.field_id();
+    name_to_id_.emplace(short_full_path, field.field_id());
+    ICEBERG_RETURN_UNEXPECTED(
+        VisitTypeInline(*field.type(), this, full_path, short_full_path));
+  }
+  return {};
+}
+
+Status NametoIdVisitor::Visit(const PrimitiveType& type, const std::string& 
path,
+                              const std::string& short_path) {
+  return {};
+}
+
+std::string NametoIdVisitor::GetPath(const std::string& last_path,
+                                     const std::string& field_name, bool 
case_sensitive) {

Review Comment:
   Should we use `std::string_view` for `field_name`? Sometimes field_name will 
be joined with last_path so it is wasteful to convert to string too early.
   
   In addition, `last_path` is better renamed to `prefix`.
   
   The function is actually to create or build a new path, not get.
   
   Putting them together, the signature might be `BuildPath(std::string_view 
prefix, std::string_view field_name, bool case_sensitive)`



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to