wgtmac commented on code in PR #180:
URL: https://github.com/apache/iceberg-cpp/pull/180#discussion_r2284108623


##########
src/iceberg/type.h:
##########
@@ -24,14 +24,20 @@
 /// iceberg/type_fwd.h for the enum defining the list of types.
 
 #include <array>
+#include <cctype>
+#include <cstddef>
 #include <cstdint>
+#include <functional>
 #include <memory>
 #include <optional>
 #include <span>
 #include <string>
+#include <string_view>
 #include <unordered_map>
 #include <vector>
 
+#include <iceberg/expression/expression.h>
+

Review Comment:
   ```suggestion
   ```



##########
src/iceberg/type.h:
##########
@@ -145,7 +150,9 @@ class ICEBERG_EXPORT ListType : public NestedType {
   std::optional<std::reference_wrapper<const SchemaField>> GetFieldByIndex(
       int32_t index) const override;
   std::optional<std::reference_wrapper<const SchemaField>> GetFieldByName(
-      std::string_view name) const override;
+      std::string_view name, bool case_sensitive) const override;
+  std::optional<std::reference_wrapper<const SchemaField>> GetFieldByName(
+      std::string_view name) const;

Review Comment:
   ```suggestion
   ```



##########
src/iceberg/type.cc:
##########
@@ -27,6 +27,10 @@
 #include "iceberg/util/formatter.h"  // IWYU pragma: keep
 
 namespace iceberg {
+std::optional<std::reference_wrapper<const SchemaField>> 
NestedType::GetFieldByName(
+    std::string_view name) const {
+  return GetFieldByName(name, true);

Review Comment:
   ```suggestion
     return GetFieldByName(name, /*case_sensitive=*/true);
   ```
   
   For better readability.



##########
src/iceberg/schema.cc:
##########
@@ -21,13 +21,15 @@
 
 #include <format>
 
+#include "iceberg/exception.h"
 #include "iceberg/type.h"
 #include "iceberg/util/formatter.h"  // IWYU pragma: keep
-
 namespace iceberg {
 
 Schema::Schema(std::vector<SchemaField> fields, std::optional<int32_t> 
schema_id)
-    : StructType(std::move(fields)), schema_id_(schema_id) {}
+    : StructType(std::move(fields)), schema_id_(schema_id) {
+  InitIdToIndexMap();

Review Comment:
   Can we call it lazily?



##########
src/iceberg/type.h:
##########
@@ -114,7 +117,9 @@ class ICEBERG_EXPORT StructType : public NestedType {
   std::optional<std::reference_wrapper<const SchemaField>> GetFieldByIndex(
       int32_t index) const override;
   std::optional<std::reference_wrapper<const SchemaField>> GetFieldByName(
-      std::string_view name) const override;
+      std::string_view name, bool case_sensitive) const override;
+  std::optional<std::reference_wrapper<const SchemaField>> GetFieldByName(
+      std::string_view name) const;

Review Comment:
   It is sufficient to add this to the parent `NestedType` at line 96 above. 
Subclasses will all have it.



##########
src/iceberg/schema.h:
##########
@@ -54,13 +56,42 @@ class ICEBERG_EXPORT Schema : public StructType {
 
   [[nodiscard]] std::string ToString() const override;
 
+  [[nodiscard]] std::optional<std::reference_wrapper<const SchemaField>> 
GetFieldByName(

Review Comment:
   Add some docstrings to explain what kinds of `name` are supported? 
Especially for nested fields.



##########
src/iceberg/type.cc:
##########
@@ -120,7 +128,7 @@ std::optional<std::reference_wrapper<const SchemaField>> 
ListType::GetFieldByInd
   return std::nullopt;
 }
 std::optional<std::reference_wrapper<const SchemaField>> 
ListType::GetFieldByName(
-    std::string_view name) const {
+    std::string_view name, bool case_sensitive) const {
   if (name == element_.name()) {

Review Comment:
   Do we need to call `std::tolower(name)` if case_sensitive is false?



##########
src/iceberg/type.cc:
##########
@@ -178,7 +190,7 @@ std::optional<std::reference_wrapper<const SchemaField>> 
MapType::GetFieldByInde
   return std::nullopt;
 }
 std::optional<std::reference_wrapper<const SchemaField>> 
MapType::GetFieldByName(
-    std::string_view name) const {
+    std::string_view name, bool case_sensitive) const {
   if (name == kKeyName) {

Review Comment:
   ditto



##########
src/iceberg/schema.h:
##########
@@ -54,13 +56,42 @@ class ICEBERG_EXPORT Schema : public StructType {
 
   [[nodiscard]] std::string ToString() const override;
 
+  [[nodiscard]] std::optional<std::reference_wrapper<const SchemaField>> 
GetFieldByName(

Review Comment:
   ```suggestion
     std::optional<std::reference_wrapper<const SchemaField>> FindFieldByName(
   ```
   
   I'd suggest using another name to avoid overriding the function with same 
name by accident. Otherwise, it may surprise users that 
`Schema::GetFieldByName` takes effect but users expect the behavior to be 
`StructType::GetFieldByName`.



##########
src/iceberg/type.h:
##########
@@ -114,7 +117,9 @@ class ICEBERG_EXPORT StructType : public NestedType {
   std::optional<std::reference_wrapper<const SchemaField>> GetFieldByIndex(
       int32_t index) const override;
   std::optional<std::reference_wrapper<const SchemaField>> GetFieldByName(
-      std::string_view name) const override;
+      std::string_view name, bool case_sensitive) const override;
+  std::optional<std::reference_wrapper<const SchemaField>> GetFieldByName(
+      std::string_view name) const;

Review Comment:
   ```suggestion
   ```



##########
src/iceberg/type.h:
##########
@@ -81,17 +87,44 @@ class ICEBERG_EXPORT NestedType : public Type {
   [[nodiscard]] virtual std::optional<std::reference_wrapper<const 
SchemaField>>
   GetFieldById(int32_t field_id) const = 0;
   /// \brief Get a field by index.
+

Review Comment:
   Please remove unrelated changes.



##########
src/iceberg/schema.h:
##########
@@ -54,13 +56,42 @@ class ICEBERG_EXPORT Schema : public StructType {
 
   [[nodiscard]] std::string ToString() const override;
 
+  [[nodiscard]] std::optional<std::reference_wrapper<const SchemaField>> 
GetFieldByName(
+      std::string_view name, bool case_sensitive) const override;
+
+  [[nodiscard]] std::optional<std::reference_wrapper<const SchemaField>> 
GetFieldByName(
+      std::string_view name) const;
+
+  [[nodiscard]] std::optional<std::reference_wrapper<const SchemaField>> 
GetFieldById(

Review Comment:
   ```suggestion
     std::optional<std::reference_wrapper<const SchemaField>> FindFieldById(
   ```



##########
src/iceberg/schema.h:
##########
@@ -54,13 +56,42 @@ class ICEBERG_EXPORT Schema : public StructType {
 
   [[nodiscard]] std::string ToString() const override;
 
+  [[nodiscard]] std::optional<std::reference_wrapper<const SchemaField>> 
GetFieldByName(
+      std::string_view name, bool case_sensitive) const override;
+
+  [[nodiscard]] std::optional<std::reference_wrapper<const SchemaField>> 
GetFieldByName(

Review Comment:
   ```suggestion
     std::optional<std::reference_wrapper<const SchemaField>> FindFieldByName(
   ```



##########
src/iceberg/schema.h:
##########
@@ -54,13 +56,42 @@ class ICEBERG_EXPORT Schema : public StructType {
 
   [[nodiscard]] std::string ToString() const override;
 
+  [[nodiscard]] std::optional<std::reference_wrapper<const SchemaField>> 
GetFieldByName(
+      std::string_view name, bool case_sensitive) const override;
+
+  [[nodiscard]] std::optional<std::reference_wrapper<const SchemaField>> 
GetFieldByName(
+      std::string_view name) const;
+
+  [[nodiscard]] std::optional<std::reference_wrapper<const SchemaField>> 
GetFieldById(
+      int32_t field_id) const override;
+
   friend bool operator==(const Schema& lhs, const Schema& rhs) { return 
lhs.Equals(rhs); }
 
+  mutable std::unordered_map<int, size_t> id_to_index_;
+  mutable std::unordered_map<std::string, size_t> name_to_index_;
+  mutable std::unordered_map<std::string, size_t> lowercase_name_to_index_;
+  mutable std::vector<SchemaField> full_schemafield_;
+
  private:
   /// \brief Compare two schemas for equality.
   [[nodiscard]] bool Equals(const Schema& other) const;
 
   const std::optional<int32_t> schema_id_;
+
+  void InitIdToIndexMap() const;
+  void InitNameToIndexMap() const;
+  void InitLowerCaseNameToIndexMap() const;
 };
 
+class SchemaFieldVisitor {

Review Comment:
   Can we declare it in the schema.cc only? This class is not supposed to be 
used by users.



##########
src/iceberg/type.h:
##########
@@ -177,7 +184,9 @@ class ICEBERG_EXPORT MapType : public NestedType {
   std::optional<std::reference_wrapper<const SchemaField>> GetFieldByIndex(
       int32_t index) const override;
   std::optional<std::reference_wrapper<const SchemaField>> GetFieldByName(
-      std::string_view name) const override;
+      std::string_view name, bool case_sensitive) const override;
+  std::optional<std::reference_wrapper<const SchemaField>> GetFieldByName(
+      std::string_view name) const;

Review Comment:
   ```suggestion
   ```



##########
src/iceberg/schema.h:
##########
@@ -54,13 +56,42 @@ class ICEBERG_EXPORT Schema : public StructType {
 
   [[nodiscard]] std::string ToString() const override;
 
+  [[nodiscard]] std::optional<std::reference_wrapper<const SchemaField>> 
GetFieldByName(
+      std::string_view name, bool case_sensitive) const override;
+
+  [[nodiscard]] std::optional<std::reference_wrapper<const SchemaField>> 
GetFieldByName(
+      std::string_view name) const;
+
+  [[nodiscard]] std::optional<std::reference_wrapper<const SchemaField>> 
GetFieldById(
+      int32_t field_id) const override;
+
   friend bool operator==(const Schema& lhs, const Schema& rhs) { return 
lhs.Equals(rhs); }
 
+  mutable std::unordered_map<int, size_t> id_to_index_;

Review Comment:
   ```suggestion
     /// Mapping from field id to index of `full_schemafield_`.
     mutable std::unordered_map<int32_t, size_t> id_to_index_;
   ```
   
   It would be good to add some comment to help readers understand it since it 
is not obvious.



##########
src/iceberg/schema.h:
##########
@@ -54,13 +56,42 @@ class ICEBERG_EXPORT Schema : public StructType {
 
   [[nodiscard]] std::string ToString() const override;
 
+  [[nodiscard]] std::optional<std::reference_wrapper<const SchemaField>> 
GetFieldByName(
+      std::string_view name, bool case_sensitive) const override;
+
+  [[nodiscard]] std::optional<std::reference_wrapper<const SchemaField>> 
GetFieldByName(
+      std::string_view name) const;
+
+  [[nodiscard]] std::optional<std::reference_wrapper<const SchemaField>> 
GetFieldById(
+      int32_t field_id) const override;
+
   friend bool operator==(const Schema& lhs, const Schema& rhs) { return 
lhs.Equals(rhs); }
 
+  mutable std::unordered_map<int, size_t> id_to_index_;
+  mutable std::unordered_map<std::string, size_t> name_to_index_;
+  mutable std::unordered_map<std::string, size_t> lowercase_name_to_index_;
+  mutable std::vector<SchemaField> full_schemafield_;

Review Comment:
   ```suggestion
     mutable std::vector<std::reference_wrapper<const SchemaField>> 
full_schemafield_;
   ```
   
   We may want to use const reference or pointer to avoid copy anything from 
the SchemaField. 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to