wgtmac commented on code in PR #180:
URL: https://github.com/apache/iceberg-cpp/pull/180#discussion_r2295465180
##########
src/iceberg/schema.cc:
##########
@@ -19,12 +19,46 @@
#include "iceberg/schema.h"
+#include <algorithm>
#include <format>
#include "iceberg/type.h"
#include "iceberg/util/formatter.h" // IWYU pragma: keep
+#include "iceberg/util/macros.h"
+#include "iceberg/util/visit_type.h"
namespace iceberg {
+class IdToFieldVisitor {
Review Comment:
```suggestion
class IdToFieldVisitor {
```
##########
test/schema_test.cc:
##########
@@ -81,3 +81,260 @@ TEST(SchemaTest, Equality) {
ASSERT_EQ(schema1, schema5);
ASSERT_EQ(schema5, schema1);
}
+
+class NestedTypeTest : public ::testing::Test {
+ protected:
+ void SetUp() override {
+ field1_ = iceberg::SchemaField(1, "Foo", iceberg::int32(), true);
+ field2_ = iceberg::SchemaField(2, "Bar", iceberg::string(), true);
+ field3_ = iceberg::SchemaField(3, "Foobar", iceberg::int32(), true);
+
+ iceberg::StructType structtype =
+ iceberg::StructType(std::vector<iceberg::SchemaField>{
+ field1_.value(), field2_.value(), field3_.value()});
+
+ auto listype = iceberg::ListType(iceberg::SchemaField::MakeRequired(
+ 4, "element", std::make_shared<iceberg::StructType>(structtype)));
+
+ auto maptype =
+ iceberg::MapType(iceberg::SchemaField::MakeRequired(5, "key",
iceberg::int32()),
+ iceberg::SchemaField::MakeRequired(
+ 6, "value",
std::make_shared<iceberg::ListType>(listype)));
+
+ field4_ = iceberg::SchemaField::MakeRequired(
+ 4, "element", std::make_shared<iceberg::StructType>(structtype));
+ field5_ = iceberg::SchemaField::MakeRequired(5, "key", iceberg::int32());
+ field6_ = iceberg::SchemaField::MakeRequired(
+ 6, "value", std::make_shared<iceberg::ListType>(listype));
+ field7_ = iceberg::SchemaField::MakeRequired(
+ 7, "Value", std::make_shared<iceberg::MapType>(maptype));
+
+ schema_ = std::make_shared<iceberg::Schema>(
+ std::vector<iceberg::SchemaField>{field7_.value()}, 1);
+ }
+
+ std::shared_ptr<iceberg::Schema> schema_;
+ std::optional<iceberg::SchemaField> field1_;
+ std::optional<iceberg::SchemaField> field2_;
+ std::optional<iceberg::SchemaField> field3_;
+ std::optional<iceberg::SchemaField> field4_;
+ std::optional<iceberg::SchemaField> field5_;
+ std::optional<iceberg::SchemaField> field6_;
+ std::optional<iceberg::SchemaField> field7_;
+};
Review Comment:
```suggestion
class MapOfListOfStructTest : public ::testing::Test {
protected:
void SetUp() override {
field1_ = iceberg::SchemaField::MakeOptional(1, "Foo", iceberg::int32());
field2_ = iceberg::SchemaField::MakeOptional(2, "Bar",
iceberg::string());
field3_ = iceberg::SchemaField::MakeOptional(3, "Foobar",
iceberg::int32());
auto struct_type = std::make_shared<iceberg::StructType>(
std::vector<iceberg::SchemaField>{field1_, field2_, field3_});
field4_ = iceberg::SchemaField::MakeRequired(4, "element", struct_type);
auto list_type = std::make_shared<iceberg::ListType>(field4_);
field5_ = iceberg::SchemaField::MakeRequired(5, "key", iceberg::int32());
field6_ = iceberg::SchemaField::MakeRequired(6, "value", list_type);
auto map_type = std::make_shared<iceberg::MapType>(field5_, field6_);
field7_ = iceberg::SchemaField::MakeRequired(7, "Value", map_type);
schema_ =
std::make_shared<iceberg::Schema>(std::vector<iceberg::SchemaField>{field7_});
}
std::shared_ptr<iceberg::Schema> schema_;
iceberg::SchemaField field1_;
iceberg::SchemaField field2_;
iceberg::SchemaField field3_;
iceberg::SchemaField field4_;
iceberg::SchemaField field5_;
iceberg::SchemaField field6_;
iceberg::SchemaField field7_;
};
```
##########
src/iceberg/schema.cc:
##########
@@ -44,4 +78,151 @@ bool Schema::Equals(const Schema& other) const {
return schema_id_ == other.schema_id_ && fields_ == other.fields_;
}
+Result<std::optional<std::reference_wrapper<const SchemaField>>>
Schema::FindFieldByName(
+ std::string_view name, bool case_sensitive) const {
+ if (case_sensitive) {
+ ICEBERG_RETURN_UNEXPECTED(InitNameToIndexMap());
+ auto it = name_to_id_.find(std::string(name));
+ if (it == name_to_id_.end()) return std::nullopt;
+ return FindFieldById(it->second);
+ }
+ ICEBERG_RETURN_UNEXPECTED(InitLowerCaseNameToIndexMap());
+ std::string lower_name(name);
+ std::ranges::transform(lower_name, lower_name.begin(), ::tolower);
+ auto it = lowercase_name_to_id_.find(lower_name);
+ if (it == lowercase_name_to_id_.end()) return std::nullopt;
+ return FindFieldById(it->second);
+}
+
+Result<Status> Schema::InitIdToIndexMap() const {
+ if (!id_to_field_.empty()) {
+ return {};
+ }
+ IdToFieldVisitor visitor(id_to_field_);
+ ICEBERG_RETURN_UNEXPECTED(VisitTypeInline(*this, &visitor));
+ return {};
+}
+
+Result<Status> Schema::InitNameToIndexMap() const {
+ if (!name_to_id_.empty()) {
+ return {};
+ }
+ std::string path, short_path;
+ NametoIdVisitor visitor(name_to_id_, true);
Review Comment:
```suggestion
NametoIdVisitor visitor(name_to_id_, /*case_sensitive=*/true);
```
##########
src/iceberg/schema.cc:
##########
@@ -44,4 +78,151 @@ bool Schema::Equals(const Schema& other) const {
return schema_id_ == other.schema_id_ && fields_ == other.fields_;
}
+Result<std::optional<std::reference_wrapper<const SchemaField>>>
Schema::FindFieldByName(
+ std::string_view name, bool case_sensitive) const {
+ if (case_sensitive) {
+ ICEBERG_RETURN_UNEXPECTED(InitNameToIndexMap());
+ auto it = name_to_id_.find(std::string(name));
+ if (it == name_to_id_.end()) return std::nullopt;
+ return FindFieldById(it->second);
+ }
+ ICEBERG_RETURN_UNEXPECTED(InitLowerCaseNameToIndexMap());
+ std::string lower_name(name);
+ std::ranges::transform(lower_name, lower_name.begin(), ::tolower);
+ auto it = lowercase_name_to_id_.find(lower_name);
+ if (it == lowercase_name_to_id_.end()) return std::nullopt;
+ return FindFieldById(it->second);
+}
+
+Result<Status> Schema::InitIdToIndexMap() const {
+ if (!id_to_field_.empty()) {
+ return {};
+ }
+ IdToFieldVisitor visitor(id_to_field_);
+ ICEBERG_RETURN_UNEXPECTED(VisitTypeInline(*this, &visitor));
+ return {};
+}
+
+Result<Status> Schema::InitNameToIndexMap() const {
+ if (!name_to_id_.empty()) {
+ return {};
+ }
+ std::string path, short_path;
+ NametoIdVisitor visitor(name_to_id_, true);
+ ICEBERG_RETURN_UNEXPECTED(VisitTypeInline(*this, &visitor, path,
short_path));
+ return {};
+}
+
+Result<Status> Schema::InitLowerCaseNameToIndexMap() const {
+ if (!lowercase_name_to_id_.empty()) {
+ return {};
+ }
+ std::string path, short_path;
+ NametoIdVisitor visitor(lowercase_name_to_id_, false);
+ ICEBERG_RETURN_UNEXPECTED(VisitTypeInline(*this, &visitor, path,
short_path));
+ return {};
+}
+
+Result<std::optional<std::reference_wrapper<const SchemaField>>>
Schema::FindFieldById(
+ int32_t field_id) const {
+ ICEBERG_RETURN_UNEXPECTED(InitIdToIndexMap());
+ auto it = id_to_field_.find(field_id);
+ if (it == id_to_field_.end()) {
+ return std::nullopt;
+ }
+ return it->second;
+}
+
+IdToFieldVisitor::IdToFieldVisitor(
+ std::unordered_map<int32_t, std::reference_wrapper<const SchemaField>>&
id_to_field)
+ : id_to_field_(id_to_field) {}
+
+Status IdToFieldVisitor::Visit(const Type& type) {
+ if (type.is_nested()) {
+ ICEBERG_RETURN_UNEXPECTED(VisitNestedType(type));
+ }
+ return {};
+}
+
+Status IdToFieldVisitor::VisitNestedType(const Type& type) {
+ const auto& nested = iceberg::internal::checked_cast<const
NestedType&>(type);
+ const auto& fields = nested.fields();
+ for (const auto& field : fields) {
+ id_to_field_.emplace(field.field_id(), std::cref(field));
+ ICEBERG_RETURN_UNEXPECTED(Visit(*field.type()));
+ }
+ return {};
+}
+
+NametoIdVisitor::NametoIdVisitor(std::unordered_map<std::string, int32_t>&
name_to_id,
+ bool case_sensitive)
+ : name_to_id_(name_to_id), case_sensitive_(case_sensitive) {}
+
+Status NametoIdVisitor::Visit(const ListType& type, const std::string& path,
+ const std::string& short_path) {
+ const auto& field = type.fields()[0];
+ std::string new_path = BuildPath(path, field.name(), case_sensitive_);
+ std::string new_short_path;
+ if (field.type()->type_id() == TypeId::kStruct) {
+ new_short_path = short_path;
+ } else {
+ new_short_path = BuildPath(short_path, field.name(), case_sensitive_);
+ }
+ name_to_id_[new_path] = field.field_id();
+ name_to_id_.emplace(new_short_path, field.field_id());
+ ICEBERG_RETURN_UNEXPECTED(
+ VisitTypeInline(*field.type(), this, new_path, new_short_path));
+ return {};
+}
+
+Status NametoIdVisitor::Visit(const MapType& type, const std::string& path,
+ const std::string& short_path) {
+ std::string new_path, new_short_path;
+ const auto& fields = type.fields();
+ for (const auto& field : fields) {
+ new_path = BuildPath(path, field.name(), case_sensitive_);
+ if (field.name() == MapType::kValueName &&
+ field.type()->type_id() == TypeId::kStruct) {
+ new_short_path = short_path;
+ } else {
+ new_short_path = BuildPath(short_path, field.name(), case_sensitive_);
+ }
+ name_to_id_[new_path] = field.field_id();
+ name_to_id_.emplace(new_short_path, field.field_id());
+ ICEBERG_RETURN_UNEXPECTED(
+ VisitTypeInline(*field.type(), this, new_path, new_short_path));
+ }
+ return {};
+}
+
+Status NametoIdVisitor::Visit(const StructType& type, const std::string& path,
+ const std::string& short_path) {
+ const auto& fields = type.fields();
+ std::string new_path, new_short_path;
+ for (const auto& field : fields) {
+ new_path = BuildPath(path, field.name(), case_sensitive_);
+ new_short_path = BuildPath(short_path, field.name(), case_sensitive_);
+ name_to_id_[new_path] = field.field_id();
Review Comment:
```suggestion
name_to_id_.emplace(new_path, field.field_id());
```
##########
src/iceberg/schema.cc:
##########
@@ -44,4 +78,151 @@ bool Schema::Equals(const Schema& other) const {
return schema_id_ == other.schema_id_ && fields_ == other.fields_;
}
+Result<std::optional<std::reference_wrapper<const SchemaField>>>
Schema::FindFieldByName(
+ std::string_view name, bool case_sensitive) const {
+ if (case_sensitive) {
+ ICEBERG_RETURN_UNEXPECTED(InitNameToIndexMap());
+ auto it = name_to_id_.find(std::string(name));
+ if (it == name_to_id_.end()) return std::nullopt;
+ return FindFieldById(it->second);
+ }
+ ICEBERG_RETURN_UNEXPECTED(InitLowerCaseNameToIndexMap());
+ std::string lower_name(name);
+ std::ranges::transform(lower_name, lower_name.begin(), ::tolower);
+ auto it = lowercase_name_to_id_.find(lower_name);
+ if (it == lowercase_name_to_id_.end()) return std::nullopt;
+ return FindFieldById(it->second);
+}
+
+Result<Status> Schema::InitIdToIndexMap() const {
+ if (!id_to_field_.empty()) {
+ return {};
+ }
+ IdToFieldVisitor visitor(id_to_field_);
+ ICEBERG_RETURN_UNEXPECTED(VisitTypeInline(*this, &visitor));
+ return {};
+}
+
+Result<Status> Schema::InitNameToIndexMap() const {
+ if (!name_to_id_.empty()) {
+ return {};
+ }
+ std::string path, short_path;
+ NametoIdVisitor visitor(name_to_id_, true);
+ ICEBERG_RETURN_UNEXPECTED(VisitTypeInline(*this, &visitor, path,
short_path));
+ return {};
+}
+
+Result<Status> Schema::InitLowerCaseNameToIndexMap() const {
+ if (!lowercase_name_to_id_.empty()) {
+ return {};
+ }
+ std::string path, short_path;
+ NametoIdVisitor visitor(lowercase_name_to_id_, false);
+ ICEBERG_RETURN_UNEXPECTED(VisitTypeInline(*this, &visitor, path,
short_path));
+ return {};
+}
+
+Result<std::optional<std::reference_wrapper<const SchemaField>>>
Schema::FindFieldById(
+ int32_t field_id) const {
+ ICEBERG_RETURN_UNEXPECTED(InitIdToIndexMap());
+ auto it = id_to_field_.find(field_id);
+ if (it == id_to_field_.end()) {
+ return std::nullopt;
+ }
+ return it->second;
+}
+
+IdToFieldVisitor::IdToFieldVisitor(
+ std::unordered_map<int32_t, std::reference_wrapper<const SchemaField>>&
id_to_field)
+ : id_to_field_(id_to_field) {}
+
+Status IdToFieldVisitor::Visit(const Type& type) {
+ if (type.is_nested()) {
+ ICEBERG_RETURN_UNEXPECTED(VisitNestedType(type));
+ }
+ return {};
+}
+
+Status IdToFieldVisitor::VisitNestedType(const Type& type) {
+ const auto& nested = iceberg::internal::checked_cast<const
NestedType&>(type);
+ const auto& fields = nested.fields();
+ for (const auto& field : fields) {
+ id_to_field_.emplace(field.field_id(), std::cref(field));
+ ICEBERG_RETURN_UNEXPECTED(Visit(*field.type()));
+ }
+ return {};
+}
+
+NametoIdVisitor::NametoIdVisitor(std::unordered_map<std::string, int32_t>&
name_to_id,
+ bool case_sensitive)
+ : name_to_id_(name_to_id), case_sensitive_(case_sensitive) {}
+
+Status NametoIdVisitor::Visit(const ListType& type, const std::string& path,
+ const std::string& short_path) {
+ const auto& field = type.fields()[0];
+ std::string new_path = BuildPath(path, field.name(), case_sensitive_);
+ std::string new_short_path;
+ if (field.type()->type_id() == TypeId::kStruct) {
+ new_short_path = short_path;
+ } else {
+ new_short_path = BuildPath(short_path, field.name(), case_sensitive_);
+ }
+ name_to_id_[new_path] = field.field_id();
+ name_to_id_.emplace(new_short_path, field.field_id());
+ ICEBERG_RETURN_UNEXPECTED(
+ VisitTypeInline(*field.type(), this, new_path, new_short_path));
+ return {};
+}
+
+Status NametoIdVisitor::Visit(const MapType& type, const std::string& path,
+ const std::string& short_path) {
+ std::string new_path, new_short_path;
+ const auto& fields = type.fields();
+ for (const auto& field : fields) {
+ new_path = BuildPath(path, field.name(), case_sensitive_);
+ if (field.name() == MapType::kValueName &&
+ field.type()->type_id() == TypeId::kStruct) {
+ new_short_path = short_path;
+ } else {
+ new_short_path = BuildPath(short_path, field.name(), case_sensitive_);
+ }
+ name_to_id_[new_path] = field.field_id();
Review Comment:
```suggestion
name_to_id_.emplace(new_path, field.field_id());
```
##########
src/iceberg/schema.cc:
##########
@@ -44,4 +78,151 @@ bool Schema::Equals(const Schema& other) const {
return schema_id_ == other.schema_id_ && fields_ == other.fields_;
}
+Result<std::optional<std::reference_wrapper<const SchemaField>>>
Schema::FindFieldByName(
+ std::string_view name, bool case_sensitive) const {
+ if (case_sensitive) {
+ ICEBERG_RETURN_UNEXPECTED(InitNameToIndexMap());
+ auto it = name_to_id_.find(std::string(name));
+ if (it == name_to_id_.end()) return std::nullopt;
+ return FindFieldById(it->second);
+ }
+ ICEBERG_RETURN_UNEXPECTED(InitLowerCaseNameToIndexMap());
+ std::string lower_name(name);
+ std::ranges::transform(lower_name, lower_name.begin(), ::tolower);
+ auto it = lowercase_name_to_id_.find(lower_name);
+ if (it == lowercase_name_to_id_.end()) return std::nullopt;
+ return FindFieldById(it->second);
+}
+
+Result<Status> Schema::InitIdToIndexMap() const {
+ if (!id_to_field_.empty()) {
+ return {};
+ }
+ IdToFieldVisitor visitor(id_to_field_);
+ ICEBERG_RETURN_UNEXPECTED(VisitTypeInline(*this, &visitor));
+ return {};
+}
+
+Result<Status> Schema::InitNameToIndexMap() const {
+ if (!name_to_id_.empty()) {
+ return {};
+ }
+ std::string path, short_path;
+ NametoIdVisitor visitor(name_to_id_, true);
+ ICEBERG_RETURN_UNEXPECTED(VisitTypeInline(*this, &visitor, path,
short_path));
+ return {};
+}
+
+Result<Status> Schema::InitLowerCaseNameToIndexMap() const {
+ if (!lowercase_name_to_id_.empty()) {
+ return {};
+ }
+ std::string path, short_path;
+ NametoIdVisitor visitor(lowercase_name_to_id_, false);
Review Comment:
```suggestion
NametoIdVisitor visitor(lowercase_name_to_id_, /*case_sensitive=*/false);
```
##########
src/iceberg/schema.cc:
##########
@@ -44,4 +78,151 @@ bool Schema::Equals(const Schema& other) const {
return schema_id_ == other.schema_id_ && fields_ == other.fields_;
}
+Result<std::optional<std::reference_wrapper<const SchemaField>>>
Schema::FindFieldByName(
+ std::string_view name, bool case_sensitive) const {
+ if (case_sensitive) {
+ ICEBERG_RETURN_UNEXPECTED(InitNameToIndexMap());
+ auto it = name_to_id_.find(std::string(name));
+ if (it == name_to_id_.end()) return std::nullopt;
+ return FindFieldById(it->second);
+ }
+ ICEBERG_RETURN_UNEXPECTED(InitLowerCaseNameToIndexMap());
+ std::string lower_name(name);
+ std::ranges::transform(lower_name, lower_name.begin(), ::tolower);
+ auto it = lowercase_name_to_id_.find(lower_name);
+ if (it == lowercase_name_to_id_.end()) return std::nullopt;
+ return FindFieldById(it->second);
+}
+
+Result<Status> Schema::InitIdToIndexMap() const {
+ if (!id_to_field_.empty()) {
+ return {};
+ }
+ IdToFieldVisitor visitor(id_to_field_);
+ ICEBERG_RETURN_UNEXPECTED(VisitTypeInline(*this, &visitor));
+ return {};
+}
+
+Result<Status> Schema::InitNameToIndexMap() const {
+ if (!name_to_id_.empty()) {
+ return {};
+ }
+ std::string path, short_path;
+ NametoIdVisitor visitor(name_to_id_, true);
+ ICEBERG_RETURN_UNEXPECTED(VisitTypeInline(*this, &visitor, path,
short_path));
+ return {};
+}
+
+Result<Status> Schema::InitLowerCaseNameToIndexMap() const {
+ if (!lowercase_name_to_id_.empty()) {
+ return {};
+ }
+ std::string path, short_path;
+ NametoIdVisitor visitor(lowercase_name_to_id_, false);
+ ICEBERG_RETURN_UNEXPECTED(VisitTypeInline(*this, &visitor, path,
short_path));
+ return {};
+}
+
+Result<std::optional<std::reference_wrapper<const SchemaField>>>
Schema::FindFieldById(
+ int32_t field_id) const {
+ ICEBERG_RETURN_UNEXPECTED(InitIdToIndexMap());
+ auto it = id_to_field_.find(field_id);
+ if (it == id_to_field_.end()) {
+ return std::nullopt;
+ }
+ return it->second;
+}
+
+IdToFieldVisitor::IdToFieldVisitor(
+ std::unordered_map<int32_t, std::reference_wrapper<const SchemaField>>&
id_to_field)
+ : id_to_field_(id_to_field) {}
+
+Status IdToFieldVisitor::Visit(const Type& type) {
+ if (type.is_nested()) {
+ ICEBERG_RETURN_UNEXPECTED(VisitNestedType(type));
+ }
+ return {};
+}
+
+Status IdToFieldVisitor::VisitNestedType(const Type& type) {
+ const auto& nested = iceberg::internal::checked_cast<const
NestedType&>(type);
+ const auto& fields = nested.fields();
+ for (const auto& field : fields) {
+ id_to_field_.emplace(field.field_id(), std::cref(field));
+ ICEBERG_RETURN_UNEXPECTED(Visit(*field.type()));
+ }
+ return {};
+}
+
+NametoIdVisitor::NametoIdVisitor(std::unordered_map<std::string, int32_t>&
name_to_id,
+ bool case_sensitive)
+ : name_to_id_(name_to_id), case_sensitive_(case_sensitive) {}
+
+Status NametoIdVisitor::Visit(const ListType& type, const std::string& path,
+ const std::string& short_path) {
+ const auto& field = type.fields()[0];
+ std::string new_path = BuildPath(path, field.name(), case_sensitive_);
+ std::string new_short_path;
+ if (field.type()->type_id() == TypeId::kStruct) {
+ new_short_path = short_path;
+ } else {
+ new_short_path = BuildPath(short_path, field.name(), case_sensitive_);
+ }
+ name_to_id_[new_path] = field.field_id();
Review Comment:
IMO, we need to check the return value of emplace, otherwise `new_path` or
`new_short_path` passed to `VisitTypeInline` would be inconsistent. The current
algorithm to add both canonical path and short path will never conflict, so it
should always succeed, right?
##########
src/iceberg/schema.cc:
##########
@@ -44,4 +78,151 @@ bool Schema::Equals(const Schema& other) const {
return schema_id_ == other.schema_id_ && fields_ == other.fields_;
}
+Result<std::optional<std::reference_wrapper<const SchemaField>>>
Schema::FindFieldByName(
+ std::string_view name, bool case_sensitive) const {
+ if (case_sensitive) {
+ ICEBERG_RETURN_UNEXPECTED(InitNameToIndexMap());
+ auto it = name_to_id_.find(std::string(name));
+ if (it == name_to_id_.end()) return std::nullopt;
+ return FindFieldById(it->second);
+ }
+ ICEBERG_RETURN_UNEXPECTED(InitLowerCaseNameToIndexMap());
+ std::string lower_name(name);
+ std::ranges::transform(lower_name, lower_name.begin(), ::tolower);
+ auto it = lowercase_name_to_id_.find(lower_name);
+ if (it == lowercase_name_to_id_.end()) return std::nullopt;
+ return FindFieldById(it->second);
+}
+
+Result<Status> Schema::InitIdToIndexMap() const {
+ if (!id_to_field_.empty()) {
+ return {};
+ }
+ IdToFieldVisitor visitor(id_to_field_);
+ ICEBERG_RETURN_UNEXPECTED(VisitTypeInline(*this, &visitor));
+ return {};
+}
+
+Result<Status> Schema::InitNameToIndexMap() const {
+ if (!name_to_id_.empty()) {
+ return {};
+ }
+ std::string path, short_path;
+ NametoIdVisitor visitor(name_to_id_, true);
+ ICEBERG_RETURN_UNEXPECTED(VisitTypeInline(*this, &visitor, path,
short_path));
+ return {};
+}
+
+Result<Status> Schema::InitLowerCaseNameToIndexMap() const {
+ if (!lowercase_name_to_id_.empty()) {
+ return {};
+ }
+ std::string path, short_path;
+ NametoIdVisitor visitor(lowercase_name_to_id_, false);
+ ICEBERG_RETURN_UNEXPECTED(VisitTypeInline(*this, &visitor, path,
short_path));
+ return {};
+}
+
+Result<std::optional<std::reference_wrapper<const SchemaField>>>
Schema::FindFieldById(
+ int32_t field_id) const {
+ ICEBERG_RETURN_UNEXPECTED(InitIdToIndexMap());
+ auto it = id_to_field_.find(field_id);
+ if (it == id_to_field_.end()) {
+ return std::nullopt;
+ }
+ return it->second;
+}
+
+IdToFieldVisitor::IdToFieldVisitor(
+ std::unordered_map<int32_t, std::reference_wrapper<const SchemaField>>&
id_to_field)
+ : id_to_field_(id_to_field) {}
+
+Status IdToFieldVisitor::Visit(const Type& type) {
+ if (type.is_nested()) {
+ ICEBERG_RETURN_UNEXPECTED(VisitNestedType(type));
+ }
+ return {};
+}
+
+Status IdToFieldVisitor::VisitNestedType(const Type& type) {
+ const auto& nested = iceberg::internal::checked_cast<const
NestedType&>(type);
+ const auto& fields = nested.fields();
+ for (const auto& field : fields) {
+ id_to_field_.emplace(field.field_id(), std::cref(field));
+ ICEBERG_RETURN_UNEXPECTED(Visit(*field.type()));
+ }
+ return {};
+}
+
+NametoIdVisitor::NametoIdVisitor(std::unordered_map<std::string, int32_t>&
name_to_id,
+ bool case_sensitive)
+ : name_to_id_(name_to_id), case_sensitive_(case_sensitive) {}
+
+Status NametoIdVisitor::Visit(const ListType& type, const std::string& path,
+ const std::string& short_path) {
+ const auto& field = type.fields()[0];
+ std::string new_path = BuildPath(path, field.name(), case_sensitive_);
+ std::string new_short_path;
+ if (field.type()->type_id() == TypeId::kStruct) {
+ new_short_path = short_path;
+ } else {
+ new_short_path = BuildPath(short_path, field.name(), case_sensitive_);
+ }
+ name_to_id_[new_path] = field.field_id();
Review Comment:
```suggestion
name_to_id_.emplace(new_path, field.field_id());
```
Let's use same style to insert.
##########
test/schema_test.cc:
##########
@@ -81,3 +81,260 @@ TEST(SchemaTest, Equality) {
ASSERT_EQ(schema1, schema5);
ASSERT_EQ(schema5, schema1);
}
+
+class NestedTypeTest : public ::testing::Test {
+ protected:
+ void SetUp() override {
+ field1_ = iceberg::SchemaField(1, "Foo", iceberg::int32(), true);
+ field2_ = iceberg::SchemaField(2, "Bar", iceberg::string(), true);
+ field3_ = iceberg::SchemaField(3, "Foobar", iceberg::int32(), true);
+
+ iceberg::StructType structtype =
+ iceberg::StructType(std::vector<iceberg::SchemaField>{
+ field1_.value(), field2_.value(), field3_.value()});
+
+ auto listype = iceberg::ListType(iceberg::SchemaField::MakeRequired(
+ 4, "element", std::make_shared<iceberg::StructType>(structtype)));
+
+ auto maptype =
+ iceberg::MapType(iceberg::SchemaField::MakeRequired(5, "key",
iceberg::int32()),
+ iceberg::SchemaField::MakeRequired(
+ 6, "value",
std::make_shared<iceberg::ListType>(listype)));
+
+ field4_ = iceberg::SchemaField::MakeRequired(
+ 4, "element", std::make_shared<iceberg::StructType>(structtype));
+ field5_ = iceberg::SchemaField::MakeRequired(5, "key", iceberg::int32());
+ field6_ = iceberg::SchemaField::MakeRequired(
+ 6, "value", std::make_shared<iceberg::ListType>(listype));
+ field7_ = iceberg::SchemaField::MakeRequired(
+ 7, "Value", std::make_shared<iceberg::MapType>(maptype));
+
+ schema_ = std::make_shared<iceberg::Schema>(
+ std::vector<iceberg::SchemaField>{field7_.value()}, 1);
+ }
+
+ std::shared_ptr<iceberg::Schema> schema_;
+ std::optional<iceberg::SchemaField> field1_;
+ std::optional<iceberg::SchemaField> field2_;
+ std::optional<iceberg::SchemaField> field3_;
+ std::optional<iceberg::SchemaField> field4_;
+ std::optional<iceberg::SchemaField> field5_;
+ std::optional<iceberg::SchemaField> field6_;
+ std::optional<iceberg::SchemaField> field7_;
+};
+
+TEST_F(NestedTypeTest, TestFindById) {
+ ASSERT_THAT(schema_->FindFieldById(7), ::testing::Optional(field7_));
+ ASSERT_THAT(schema_->FindFieldById(6), ::testing::Optional(field6_));
+ ASSERT_THAT(schema_->FindFieldById(5), ::testing::Optional(field5_));
+ ASSERT_THAT(schema_->FindFieldById(4), ::testing::Optional(field4_));
+ ASSERT_THAT(schema_->FindFieldById(3), ::testing::Optional(field3_));
+ ASSERT_THAT(schema_->FindFieldById(2), ::testing::Optional(field2_));
+ ASSERT_THAT(schema_->FindFieldById(1), ::testing::Optional(field1_));
+
+ ASSERT_THAT(schema_->FindFieldById(10), ::testing::Optional(std::nullopt));
+}
+
+TEST_F(NestedTypeTest, TestFindByName) {
+ ASSERT_THAT(schema_->FindFieldByName("Value"), ::testing::Optional(field7_));
+ ASSERT_THAT(schema_->FindFieldByName("Value.value"),
::testing::Optional(field6_));
+ ASSERT_THAT(schema_->FindFieldByName("Value.key"),
::testing::Optional(field5_));
+ ASSERT_THAT(schema_->FindFieldByName("Value.value.element"),
+ ::testing::Optional(field4_));
+ ASSERT_THAT(schema_->FindFieldByName("Value.value.element.Foobar"),
+ ::testing::Optional(field3_));
+ ASSERT_THAT(schema_->FindFieldByName("Value.value.element.Bar"),
+ ::testing::Optional(field2_));
+ ASSERT_THAT(schema_->FindFieldByName("Value.value.element.Foo"),
+ ::testing::Optional(field1_));
+
+ ASSERT_THAT(schema_->FindFieldByName("Value.value.element.FoO"),
+ ::testing::Optional(std::nullopt));
+}
+
+TEST_F(NestedTypeTest, TestFindByNameCaseInsensitive) {
+ ASSERT_THAT(schema_->FindFieldByName("vALue", false),
::testing::Optional(field7_));
+ ASSERT_THAT(schema_->FindFieldByName("vALue.VALUE", false),
+ ::testing::Optional(field6_));
+ ASSERT_THAT(schema_->FindFieldByName("valUe.kEy", false),
::testing::Optional(field5_));
+ ASSERT_THAT(schema_->FindFieldByName("vaLue.vAlue.elEment", false),
+ ::testing::Optional(field4_));
+ ASSERT_THAT(schema_->FindFieldByName("vaLue.vAlue.eLement.fOObar", false),
+ ::testing::Optional(field3_));
+ ASSERT_THAT(schema_->FindFieldByName("valUe.vaLUe.elemEnt.Bar", false),
+ ::testing::Optional(field2_));
+ ASSERT_THAT(schema_->FindFieldByName("valUe.valUe.ELEMENT.FOO", false),
+ ::testing::Optional(field1_));
+ ASSERT_THAT(schema_->FindFieldByName("valUe.valUe.ELEMENT.FO", false),
+ ::testing::Optional(std::nullopt));
+}
+
+TEST_F(NestedTypeTest, TestFindByShortNameCaseInsensitive) {
+ ASSERT_THAT(schema_->FindFieldByName("vaLue.value.FOO", false),
+ ::testing::Optional(field1_));
+ ASSERT_THAT(schema_->FindFieldByName("Value.value.Bar", false),
+ ::testing::Optional(field2_));
+ ASSERT_THAT(schema_->FindFieldByName("Value.value.FooBAR", false),
+ ::testing::Optional(field3_));
+ ASSERT_THAT(schema_->FindFieldByName("Value.value.FooBAR.a", false),
+ ::testing::Optional(std::nullopt));
+}
+
+class NestType2Test : public ::testing::Test {
+ protected:
+ void SetUp() override {
+ field1_ = iceberg::SchemaField(1, "Foo", iceberg::int32(), true);
+ field2_ = iceberg::SchemaField(2, "Bar", iceberg::string(), true);
+ field3_ = iceberg::SchemaField(3, "Foobar", iceberg::int32(), true);
+
+ iceberg::StructType structtype =
+ iceberg::StructType({field1_.value(), field2_.value(),
field3_.value()});
+
+ field4_ = iceberg::SchemaField::MakeRequired(
+ 4, "element", std::make_shared<iceberg::StructType>(structtype));
+ auto listype = iceberg::ListType(field4_.value());
+
+ iceberg::StructType structtype2 = iceberg::StructType(
+ {iceberg::SchemaField::MakeRequired(5, "First_child",
iceberg::int32()),
+ iceberg::SchemaField::MakeRequired(
+ 6, "Second_child",
std::make_shared<iceberg::ListType>(listype))});
+
+ auto maptype = iceberg::MapType(
+ iceberg::SchemaField::MakeRequired(7, "key", iceberg::int32()),
+ iceberg::SchemaField::MakeRequired(
+ 8, "value", std::make_shared<iceberg::StructType>(structtype2)));
+
+ field5_ = iceberg::SchemaField::MakeRequired(5, "First_child",
iceberg::int32());
+ field6_ = iceberg::SchemaField::MakeRequired(
+ 6, "Second_child", std::make_shared<iceberg::ListType>(listype));
+ field7_ = iceberg::SchemaField::MakeRequired(7, "key", iceberg::int32());
+ field8_ = iceberg::SchemaField::MakeRequired(
+ 8, "value", std::make_shared<iceberg::StructType>(structtype2));
+ field9_ = iceberg::SchemaField::MakeRequired(
+ 9, "Map", std::make_shared<iceberg::MapType>(maptype));
+
+ schema_ = std::make_shared<iceberg::Schema>(
+ std::vector<iceberg::SchemaField>{field9_.value()}, 1);
+ }
+
+ std::shared_ptr<iceberg::Schema> schema_;
+ std::optional<iceberg::SchemaField> field1_;
+ std::optional<iceberg::SchemaField> field2_;
+ std::optional<iceberg::SchemaField> field3_;
+ std::optional<iceberg::SchemaField> field4_;
+ std::optional<iceberg::SchemaField> field5_;
+ std::optional<iceberg::SchemaField> field6_;
+ std::optional<iceberg::SchemaField> field7_;
+ std::optional<iceberg::SchemaField> field8_;
+ std::optional<iceberg::SchemaField> field9_;
+};
Review Comment:
```suggestion
class MapOfNestedStructTest : public ::testing::Test {
protected:
void SetUp() override {
field1_ = iceberg::SchemaField::MakeOptional(1, "Foo", iceberg::int32());
field2_ = iceberg::SchemaField::MakeOptional(2, "Bar",
iceberg::string());
field3_ = iceberg::SchemaField::MakeOptional(3, "Foobar",
iceberg::int32());
auto struct_type = std::make_shared<iceberg::StructType>(
std::vector<iceberg::SchemaField>{field1_, field2_, field3_});
field4_ = iceberg::SchemaField::MakeRequired(4, "element", struct_type);
auto list_type = std::make_shared<iceberg::ListType>(field4_);
field5_ = iceberg::SchemaField::MakeRequired(5, "First_child",
iceberg::int32());
field6_ = iceberg::SchemaField::MakeRequired(6, "Second_child",
list_type);
auto struct_type2 = std::make_shared<iceberg::StructType>(
std::vector<iceberg::SchemaField>{field5_, field6_});
field7_ = iceberg::SchemaField::MakeRequired(7, "key", iceberg::int32());
field8_ = iceberg::SchemaField::MakeRequired(8, "value", struct_type2);
auto map_type = std::make_shared<iceberg::MapType>(field7_, field8_);
field9_ = iceberg::SchemaField::MakeRequired(9, "Map", map_type);
schema_ =
std::make_shared<iceberg::Schema>(std::vector<iceberg::SchemaField>{field9_});
}
std::shared_ptr<iceberg::Schema> schema_;
iceberg::SchemaField field1_;
iceberg::SchemaField field2_;
iceberg::SchemaField field3_;
iceberg::SchemaField field4_;
iceberg::SchemaField field5_;
iceberg::SchemaField field6_;
iceberg::SchemaField field7_;
iceberg::SchemaField field8_;
iceberg::SchemaField field9_;
};
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]