wgtmac commented on code in PR #127:
URL: https://github.com/apache/iceberg-cpp/pull/127#discussion_r2249626673
##########
src/iceberg/avro/avro_schema_util.cc:
##########
@@ -773,4 +775,278 @@ Result<SchemaProjection> Project(const Schema&
expected_schema,
return SchemaProjection{std::move(field_projection.children)};
}
+namespace {
+
+Result<::avro::NodePtr> CreateRecordNodeWithFieldIds(const ::avro::NodePtr&
original_node,
+ const MappedField& field)
{
+ auto new_record_node = std::make_shared<::avro::NodeRecord>();
+ new_record_node->setName(original_node->name());
+
+ for (size_t i = 0; i < original_node->leaves(); ++i) {
+ if (i >= original_node->names()) {
+ return InvalidSchema("Index {} is out of bounds for names (size: {})", i,
+ original_node->names());
+ }
+ const std::string& field_name = original_node->nameAt(i);
+ ::avro::NodePtr field_node = original_node->leafAt(i);
+
+ // TODO(liuxiaoyu): Add support for case sensitivity in name matching.
+ // Try to find nested field by name
+ const MappedField* nested_field = nullptr;
+ if (field.nested_mapping) {
+ auto fields_span = field.nested_mapping->fields();
+ for (const auto& f : fields_span) {
+ if (f.names.find(field_name) != f.names.end()) {
+ nested_field = &f;
+ break;
+ }
+ }
+ }
+
+ if (nested_field) {
+ // Check if field_id is present
+ if (!nested_field->field_id.has_value()) {
+ return InvalidSchema("Field ID is missing for field '{}' in nested
mapping",
+ field_name);
+ }
+
+ // Preserve existing custom attributes for this field
+ ::avro::CustomAttributes attributes;
+ if (i < original_node->customAttributes()) {
+ // Copy all existing attributes from the original node
+ const auto& original_attrs = original_node->customAttributesAt(i);
+ const auto& existing_attrs = original_attrs.attributes();
+ for (const auto& attr_pair : existing_attrs) {
+ // Copy each existing attribute to preserve original metadata
+ attributes.addAttribute(attr_pair.first, attr_pair.second, false);
+ }
+ }
+
+ // Add field ID attribute to the new node (preserving existing
attributes)
+ attributes.addAttribute(std::string(kFieldIdProp),
+ std::to_string(nested_field->field_id.value()),
false);
Review Comment:
```suggestion
std::to_string(nested_field->field_id.value()), /*addQuote=*/false);
```
##########
src/iceberg/avro/avro_schema_util.cc:
##########
@@ -773,4 +775,278 @@ Result<SchemaProjection> Project(const Schema&
expected_schema,
return SchemaProjection{std::move(field_projection.children)};
}
+namespace {
+
+Result<::avro::NodePtr> CreateRecordNodeWithFieldIds(const ::avro::NodePtr&
original_node,
+ const MappedField& field)
{
+ auto new_record_node = std::make_shared<::avro::NodeRecord>();
+ new_record_node->setName(original_node->name());
+
+ for (size_t i = 0; i < original_node->leaves(); ++i) {
+ if (i >= original_node->names()) {
+ return InvalidSchema("Index {} is out of bounds for names (size: {})", i,
+ original_node->names());
+ }
+ const std::string& field_name = original_node->nameAt(i);
+ ::avro::NodePtr field_node = original_node->leafAt(i);
+
+ // TODO(liuxiaoyu): Add support for case sensitivity in name matching.
+ // Try to find nested field by name
+ const MappedField* nested_field = nullptr;
+ if (field.nested_mapping) {
+ auto fields_span = field.nested_mapping->fields();
+ for (const auto& f : fields_span) {
+ if (f.names.find(field_name) != f.names.end()) {
+ nested_field = &f;
+ break;
+ }
+ }
+ }
+
+ if (nested_field) {
+ // Check if field_id is present
+ if (!nested_field->field_id.has_value()) {
+ return InvalidSchema("Field ID is missing for field '{}' in nested
mapping",
+ field_name);
+ }
+
+ // Preserve existing custom attributes for this field
+ ::avro::CustomAttributes attributes;
+ if (i < original_node->customAttributes()) {
+ // Copy all existing attributes from the original node
+ const auto& original_attrs = original_node->customAttributesAt(i);
+ const auto& existing_attrs = original_attrs.attributes();
+ for (const auto& attr_pair : existing_attrs) {
+ // Copy each existing attribute to preserve original metadata
+ attributes.addAttribute(attr_pair.first, attr_pair.second, false);
+ }
+ }
+
+ // Add field ID attribute to the new node (preserving existing
attributes)
+ attributes.addAttribute(std::string(kFieldIdProp),
+ std::to_string(nested_field->field_id.value()),
false);
+
+ new_record_node->addCustomAttributesForField(attributes);
+
+ // Recursively apply field IDs to nested fields
+ ICEBERG_ASSIGN_OR_RAISE(auto new_nested_node,
+ MakeAvroNodeWithFieldIds(field_node,
*nested_field));
+ new_record_node->addName(field_name);
+ new_record_node->addLeaf(new_nested_node);
+ } else {
+ // If no nested field found, this is an error
+ return InvalidSchema("Field '{}' not found in nested mapping",
field_name);
+ }
+ }
+
+ return new_record_node;
+}
+
+Result<::avro::NodePtr> CreateArrayNodeWithFieldIds(const ::avro::NodePtr&
original_node,
+ const MappedField& field) {
+ if (original_node->leaves() != 1) {
+ return InvalidSchema("Array type must have exactly one leaf");
+ }
+
+ auto new_array_node = std::make_shared<::avro::NodeArray>();
+
+ // Check if this is a map represented as array
+ if (HasMapLogicalType(original_node)) {
+ ICEBERG_ASSIGN_OR_RAISE(auto new_element_node,
+ MakeAvroNodeWithFieldIds(original_node->leafAt(0),
field));
+ new_array_node->addLeaf(new_element_node);
+ return new_array_node;
+ }
+
+ // For regular arrays, try to find element field ID from nested mapping
+ const MappedField* element_field = nullptr;
+ if (field.nested_mapping) {
+ auto fields_span = field.nested_mapping->fields();
+ for (const auto& f : fields_span) {
+ if (f.names.find(std::string(kElement)) != f.names.end()) {
+ element_field = &f;
+ break;
+ }
+ }
+ }
+
+ if (element_field) {
+ // Check if field_id is present
+ if (!element_field->field_id.has_value()) {
+ return InvalidSchema("Field ID is missing for element field in array");
+ }
+
+ ICEBERG_ASSIGN_OR_RAISE(
+ auto new_element_node,
+ MakeAvroNodeWithFieldIds(original_node->leafAt(0), *element_field));
+ new_array_node->addLeaf(new_element_node);
+
+ // Add element field ID as custom attribute
+ ::avro::CustomAttributes element_attributes;
+ element_attributes.addAttribute(std::string(kFieldIdProp),
+ std::to_string(*element_field->field_id),
false);
Review Comment:
```suggestion
std::to_string(*element_field->field_id), /*addQuote=*/false);
```
##########
src/iceberg/avro/avro_schema_util.cc:
##########
@@ -783,4 +785,237 @@ Result<SchemaProjection> Project(const Schema&
expected_schema,
return SchemaProjection{std::move(field_projection.children)};
}
+namespace {
+
+Result<::avro::NodePtr> CreateRecordNodeWithFieldIds(const ::avro::NodePtr&
original_node,
+ const MappedField& field)
{
+ auto new_record_node = std::make_shared<::avro::NodeRecord>();
+ new_record_node->setName(original_node->name());
+
+ if (original_node->leaves() > original_node->names()) {
+ return InvalidSchema("Node has {} leaves but only {} names",
original_node->leaves(),
+ original_node->names());
+ }
+
+ for (size_t i = 0; i < original_node->leaves(); ++i) {
+ const std::string& field_name = original_node->nameAt(i);
+ ::avro::NodePtr field_node = original_node->leafAt(i);
+
+ // TODO(liuxiaoyu): Add support for case sensitivity in name matching.
+ // Try to find nested field by name
+ const MappedField* nested_field = nullptr;
+ if (field.nested_mapping) {
+ auto fields_span = field.nested_mapping->fields();
+ for (const auto& f : fields_span) {
+ if (f.names.find(field_name) != f.names.end()) {
+ nested_field = &f;
+ break;
+ }
+ }
+ }
+
+ if (nested_field) {
+ // Check if field_id is present
+ if (!nested_field->field_id.has_value()) {
+ return InvalidSchema("Field ID is missing for field '{}' in nested
mapping",
+ field_name);
+ }
+
+ // Preserve existing custom attributes for this field
+ ::avro::CustomAttributes attributes;
+ if (i < original_node->customAttributes()) {
+ // Copy all existing attributes from the original node
+ const auto& original_attrs = original_node->customAttributesAt(i);
+ const auto& existing_attrs = original_attrs.attributes();
+ for (const auto& attr_pair : existing_attrs) {
Review Comment:
I'd prefer to apply this suggestion to make it shorter.
##########
src/iceberg/avro/avro_schema_util.cc:
##########
@@ -783,4 +785,237 @@ Result<SchemaProjection> Project(const Schema&
expected_schema,
return SchemaProjection{std::move(field_projection.children)};
}
+namespace {
+
+Result<::avro::NodePtr> CreateRecordNodeWithFieldIds(const ::avro::NodePtr&
original_node,
+ const MappedField& field)
{
+ auto new_record_node = std::make_shared<::avro::NodeRecord>();
+ new_record_node->setName(original_node->name());
+
+ if (original_node->leaves() > original_node->names()) {
+ return InvalidSchema("Node has {} leaves but only {} names",
original_node->leaves(),
+ original_node->names());
+ }
+
+ for (size_t i = 0; i < original_node->leaves(); ++i) {
+ const std::string& field_name = original_node->nameAt(i);
+ ::avro::NodePtr field_node = original_node->leafAt(i);
+
+ // TODO(liuxiaoyu): Add support for case sensitivity in name matching.
+ // Try to find nested field by name
+ const MappedField* nested_field = nullptr;
+ if (field.nested_mapping) {
+ auto fields_span = field.nested_mapping->fields();
+ for (const auto& f : fields_span) {
+ if (f.names.find(field_name) != f.names.end()) {
+ nested_field = &f;
+ break;
+ }
+ }
+ }
+
+ if (nested_field) {
+ // Check if field_id is present
+ if (!nested_field->field_id.has_value()) {
+ return InvalidSchema("Field ID is missing for field '{}' in nested
mapping",
+ field_name);
+ }
+
+ // Preserve existing custom attributes for this field
+ ::avro::CustomAttributes attributes;
+ if (i < original_node->customAttributes()) {
+ // Copy all existing attributes from the original node
+ const auto& original_attrs = original_node->customAttributesAt(i);
+ const auto& existing_attrs = original_attrs.attributes();
+ for (const auto& attr_pair : existing_attrs) {
+ attributes.addAttribute(attr_pair.first, attr_pair.second, false);
Review Comment:
Could you please resolve this?
##########
test/avro_schema_test.cc:
##########
@@ -1057,4 +1059,454 @@ TEST(AvroSchemaProjectionTest,
ProjectDecimalIncompatible) {
ASSERT_THAT(projection_result, HasErrorMessage("Cannot read"));
}
+// NameMapping tests for Avro schema context
+class NameMappingAvroSchemaTest : public ::testing::Test {
+ protected:
+ // Helper function to create a simple name mapping
+ std::unique_ptr<NameMapping> CreateSimpleNameMapping() {
+ std::vector<MappedField> fields;
+ fields.emplace_back(MappedField{.names = {"id"}, .field_id = 1});
+ fields.emplace_back(MappedField{.names = {"name"}, .field_id = 2});
+ fields.emplace_back(MappedField{.names = {"age"}, .field_id = 3});
+ return NameMapping::Make(std::move(fields));
+ }
+
+ // Helper function to create a nested name mapping
+ std::unique_ptr<NameMapping> CreateNestedNameMapping() {
+ std::vector<MappedField> fields;
+ fields.emplace_back(MappedField{.names = {"id"}, .field_id = 1});
+
+ // Nested mapping for address
+ std::vector<MappedField> address_fields;
+ address_fields.emplace_back(MappedField{.names = {"street"}, .field_id =
10});
+ address_fields.emplace_back(MappedField{.names = {"city"}, .field_id =
11});
+ address_fields.emplace_back(MappedField{.names = {"zip"}, .field_id = 12});
+ auto address_mapping = MappedFields::Make(std::move(address_fields));
+
+ fields.emplace_back(MappedField{.names = {"address"},
+ .field_id = 2,
+ .nested_mapping =
std::move(address_mapping)});
+
+ return NameMapping::Make(std::move(fields));
+ }
+
+ // Helper function to create a name mapping for array types
+ std::unique_ptr<NameMapping> CreateArrayNameMapping() {
+ std::vector<MappedField> fields;
+ fields.emplace_back(MappedField{.names = {"id"}, .field_id = 1});
+
+ // Nested mapping for array element
+ std::vector<MappedField> element_fields;
+ element_fields.emplace_back(MappedField{.names = {"element"}, .field_id =
20});
+ auto element_mapping = MappedFields::Make(std::move(element_fields));
+
+ fields.emplace_back(MappedField{
+ .names = {"items"}, .field_id = 2, .nested_mapping =
std::move(element_mapping)});
+
+ return NameMapping::Make(std::move(fields));
+ }
+
+ // Helper function to create a name mapping for map types
+ std::unique_ptr<NameMapping> CreateMapNameMapping() {
+ std::vector<MappedField> fields;
+ fields.emplace_back(MappedField{.names = {"id"}, .field_id = 1});
+
+ // Nested mapping for map key-value
+ std::vector<MappedField> map_fields;
+ map_fields.emplace_back(MappedField{.names = {"key"}, .field_id = 30});
+ map_fields.emplace_back(MappedField{.names = {"value"}, .field_id = 31});
+ auto map_mapping = MappedFields::Make(std::move(map_fields));
+
+ fields.emplace_back(MappedField{.names = {"properties"},
+ .field_id = 2,
+ .nested_mapping = std::move(map_mapping)});
+
+ return NameMapping::Make(std::move(fields));
+ }
+
+ // Helper function to create a name mapping for union types
+ std::unique_ptr<NameMapping> CreateUnionNameMapping() {
+ std::vector<MappedField> fields;
+ fields.emplace_back(MappedField{.names = {"id"}, .field_id = 1});
+ fields.emplace_back(MappedField{.names = {"data"}, .field_id = 2});
+ return NameMapping::Make(std::move(fields));
+ }
+
+ // Helper function to create a name mapping for complex map types
+ // (array<struct<key,value>>)
+ std::unique_ptr<NameMapping> CreateComplexMapNameMapping() {
+ std::vector<MappedField> fields;
+ fields.emplace_back(MappedField{.names = {"id"}, .field_id = 1});
+
+ // Nested mapping for array element (struct<key,value>)
+ std::vector<MappedField> element_fields;
+ element_fields.emplace_back(MappedField{.names = {"key"}, .field_id = 40});
+ element_fields.emplace_back(MappedField{.names = {"value"}, .field_id =
41});
+ auto element_mapping = MappedFields::Make(std::move(element_fields));
+
+ // Nested mapping for array
+ std::vector<MappedField> array_fields;
+ array_fields.emplace_back(MappedField{.names = {"element"},
+ .field_id = 50,
+ .nested_mapping =
std::move(element_mapping)});
+ auto array_mapping = MappedFields::Make(std::move(array_fields));
+
+ fields.emplace_back(MappedField{
+ .names = {"entries"}, .field_id = 2, .nested_mapping =
std::move(array_mapping)});
+
+ return NameMapping::Make(std::move(fields));
+ }
+};
+
+TEST_F(NameMappingAvroSchemaTest, ApplyNameMappingToRecord) {
+ // Create a simple Avro record schema without field IDs
+ std::string avro_schema_json = R"({
+ "type": "record",
+ "name": "test_record",
+ "fields": [
+ {"name": "id", "type": "int"},
+ {"name": "name", "type": "string"},
+ {"name": "age", "type": "int"}
+ ]
+ })";
+ auto avro_schema = ::avro::compileJsonSchemaFromString(avro_schema_json);
+
+ auto name_mapping = CreateSimpleNameMapping();
+
+ auto result = MakeAvroNodeWithFieldIds(avro_schema.root(), *name_mapping);
+ ASSERT_THAT(result, IsOk());
+
+ const auto& node = *result;
+ EXPECT_EQ(node->type(), ::avro::AVRO_RECORD);
+ EXPECT_EQ(node->names(), 3);
+ EXPECT_EQ(node->leaves(), 3);
+
+ // Check that field IDs are properly applied
+ ASSERT_EQ(node->customAttributes(), 3);
+ ASSERT_NO_FATAL_FAILURE(CheckFieldIdAt(node, /*index=*/0, /*field_id=*/1));
+ ASSERT_NO_FATAL_FAILURE(CheckFieldIdAt(node, /*index=*/1, /*field_id=*/2));
+ ASSERT_NO_FATAL_FAILURE(CheckFieldIdAt(node, /*index=*/2, /*field_id=*/3));
+}
+
+TEST_F(NameMappingAvroSchemaTest, ApplyNameMappingToNestedRecord) {
+ // Create a nested Avro record schema without field IDs
+ std::string avro_schema_json = R"({
+ "type": "record",
+ "name": "test_record",
+ "fields": [
+ {"name": "id", "type": "int"},
+ {"name": "address", "type": {
+ "type": "record",
+ "name": "address",
+ "fields": [
+ {"name": "street", "type": "string"},
+ {"name": "city", "type": "string"},
+ {"name": "zip", "type": "string"}
+ ]
+ }}
+ ]
+ })";
+ auto avro_schema = ::avro::compileJsonSchemaFromString(avro_schema_json);
+
+ auto name_mapping = CreateNestedNameMapping();
+
+ auto result = MakeAvroNodeWithFieldIds(avro_schema.root(), *name_mapping);
+ ASSERT_THAT(result, IsOk());
+
+ const auto& node = *result;
+ EXPECT_EQ(node->type(), ::avro::AVRO_RECORD);
+ EXPECT_EQ(node->names(), 2);
+ EXPECT_EQ(node->leaves(), 2);
+
+ // Check that field IDs are properly applied to top-level fields
+ ASSERT_EQ(node->customAttributes(), 2);
+ ASSERT_NO_FATAL_FAILURE(CheckFieldIdAt(node, /*index=*/0, /*field_id=*/1));
+ ASSERT_NO_FATAL_FAILURE(CheckFieldIdAt(node, /*index=*/1, /*field_id=*/2));
+
+ // Check nested record
+ const auto& address_node = node->leafAt(1);
+ EXPECT_EQ(address_node->type(), ::avro::AVRO_RECORD);
+ EXPECT_EQ(address_node->names(), 3);
+ EXPECT_EQ(address_node->leaves(), 3);
+
+ // Check that field IDs are properly applied to nested fields
+ ASSERT_EQ(address_node->customAttributes(), 3);
+ ASSERT_NO_FATAL_FAILURE(CheckFieldIdAt(address_node, /*index=*/0,
/*field_id=*/10));
+ ASSERT_NO_FATAL_FAILURE(CheckFieldIdAt(address_node, /*index=*/1,
/*field_id=*/11));
+ ASSERT_NO_FATAL_FAILURE(CheckFieldIdAt(address_node, /*index=*/2,
/*field_id=*/12));
+}
+
+TEST_F(NameMappingAvroSchemaTest, ApplyNameMappingToArray) {
+ // Create an Avro array schema without field IDs
+ std::string avro_schema_json = R"({
+ "type": "record",
+ "name": "test_record",
+ "fields": [
+ {"name": "id", "type": "int"},
+ {"name": "items", "type": {
+ "type": "array",
+ "items": "string"
+ }}
+ ]
+ })";
+ auto avro_schema = ::avro::compileJsonSchemaFromString(avro_schema_json);
+
+ auto name_mapping = CreateArrayNameMapping();
+
+ auto result = MakeAvroNodeWithFieldIds(avro_schema.root(), *name_mapping);
+ ASSERT_THAT(result, IsOk());
+
+ const auto& new_node = *result;
+ EXPECT_EQ(new_node->type(), ::avro::AVRO_RECORD);
+ EXPECT_EQ(new_node->names(), 2);
+ EXPECT_EQ(new_node->leaves(), 2);
+
+ // Check that field IDs are properly applied to top-level fields
+ ASSERT_EQ(new_node->customAttributes(), 2);
+ ASSERT_NO_FATAL_FAILURE(CheckFieldIdAt(new_node, /*index=*/0,
/*field_id=*/1));
+ ASSERT_NO_FATAL_FAILURE(CheckFieldIdAt(new_node, /*index=*/1,
/*field_id=*/2));
+
+ // Check array field structure and element field ID
+ const auto& array_node = new_node->leafAt(1);
+ EXPECT_EQ(array_node->type(), ::avro::AVRO_ARRAY);
+ EXPECT_EQ(array_node->leaves(), 1);
+
+ // Check that array element has field ID applied
+ const auto& element_node = array_node->leafAt(0);
+ EXPECT_EQ(element_node->type(), ::avro::AVRO_STRING);
+ ASSERT_NO_FATAL_FAILURE(CheckFieldIdAt(array_node, /*index=*/0,
/*field_id=*/20));
+}
+
+TEST_F(NameMappingAvroSchemaTest, ApplyNameMappingToMap) {
+ // Create an Avro map schema without field IDs
+ std::string avro_schema_json = R"({
+ "type": "record",
+ "name": "test_record",
+ "fields": [
+ {"name": "id", "type": "int"},
+ {"name": "properties", "type": {
+ "type": "map",
+ "values": "string"
+ }}
+ ]
+ })";
+ auto avro_schema = ::avro::compileJsonSchemaFromString(avro_schema_json);
+
+ auto name_mapping = CreateMapNameMapping();
+
+ auto result = MakeAvroNodeWithFieldIds(avro_schema.root(), *name_mapping);
+ ASSERT_THAT(result, IsOk());
+
+ const auto& new_node = *result;
+ EXPECT_EQ(new_node->type(), ::avro::AVRO_RECORD);
+ EXPECT_EQ(new_node->names(), 2);
+ EXPECT_EQ(new_node->leaves(), 2);
+
+ // Check that field IDs are properly applied to top-level fields
+ ASSERT_EQ(new_node->customAttributes(), 2);
+ ASSERT_NO_FATAL_FAILURE(CheckFieldIdAt(new_node, /*index=*/0,
/*field_id=*/1));
+ ASSERT_NO_FATAL_FAILURE(CheckFieldIdAt(new_node, /*index=*/1,
/*field_id=*/2));
+
+ // Check map field structure and key-value field IDs
+ const auto& map_node = new_node->leafAt(1);
+ EXPECT_EQ(map_node->type(), ::avro::AVRO_MAP);
+ ASSERT_GE(map_node->leaves(), 2);
+ EXPECT_EQ(map_node->leafAt(0)->type(), ::avro::AVRO_STRING);
+ EXPECT_EQ(map_node->leafAt(1)->type(), ::avro::AVRO_STRING);
+ ASSERT_EQ(map_node->customAttributes(), 2);
+ ASSERT_NO_FATAL_FAILURE(CheckFieldIdAt(map_node, /*index=*/0,
/*field_id=*/30));
+ ASSERT_NO_FATAL_FAILURE(CheckFieldIdAt(map_node, /*index=*/1,
/*field_id=*/31));
+}
+
+TEST_F(NameMappingAvroSchemaTest, ApplyNameMappingToComplexMap) {
+ // Create an Avro schema for complex map (array<struct<key,value>>) without
field IDs
+ // This represents a map where key is not a string type
+ std::string avro_schema_json = R"({
+ "type": "record",
+ "name": "test_record",
+ "fields": [
+ {"name": "id", "type": "int"},
+ {"name": "entries", "type": {
+ "type": "array",
+ "items": {
+ "type": "record",
+ "name": "entry",
+ "fields": [
+ {"name": "key", "type": "int"},
+ {"name": "value", "type": "string"}
+ ]
+ }
+ }}
+ ]
+ })";
+ auto avro_schema = ::avro::compileJsonSchemaFromString(avro_schema_json);
+
+ auto name_mapping = CreateComplexMapNameMapping();
+
+ auto result = MakeAvroNodeWithFieldIds(avro_schema.root(), *name_mapping);
+ ASSERT_THAT(result, IsOk());
+
+ const auto& new_node = *result;
+ EXPECT_EQ(new_node->type(), ::avro::AVRO_RECORD);
+ EXPECT_EQ(new_node->names(), 2);
+ EXPECT_EQ(new_node->leaves(), 2);
+
+ // Check that field IDs are properly applied to top-level fields
+ ASSERT_EQ(new_node->customAttributes(), 2);
+ ASSERT_NO_FATAL_FAILURE(CheckFieldIdAt(new_node, /*index=*/0,
/*field_id=*/1));
+ ASSERT_NO_FATAL_FAILURE(CheckFieldIdAt(new_node, /*index=*/1,
/*field_id=*/2));
+
+ // Check array field structure (representing the map)
+ const auto& array_node = new_node->leafAt(1);
+ EXPECT_EQ(array_node->type(), ::avro::AVRO_ARRAY);
+ EXPECT_EQ(array_node->leaves(), 1);
+
+ // Check array element (struct<key,value>)
+ const auto& element_node = array_node->leafAt(0);
+ EXPECT_EQ(element_node->type(), ::avro::AVRO_RECORD);
+ EXPECT_EQ(element_node->names(), 2);
+ EXPECT_EQ(element_node->leaves(), 2);
+
+ // Check that field IDs are properly applied to struct fields
+ ASSERT_EQ(element_node->customAttributes(), 2);
+ ASSERT_NO_FATAL_FAILURE(CheckFieldIdAt(element_node, /*index=*/0,
/*field_id=*/40));
+ ASSERT_NO_FATAL_FAILURE(CheckFieldIdAt(element_node, /*index=*/1,
/*field_id=*/41));
+
+ // Check key and value types
+ EXPECT_EQ(element_node->leafAt(0)->type(), ::avro::AVRO_INT);
+ EXPECT_EQ(element_node->leafAt(1)->type(), ::avro::AVRO_STRING);
+}
+
+TEST_F(NameMappingAvroSchemaTest, ApplyNameMappingToUnion) {
+ // Create an Avro union schema without field IDs
+ std::string avro_schema_json = R"({
+ "type": "record",
+ "name": "test_record",
+ "fields": [
+ {"name": "id", "type": "int"},
+ {"name": "data", "type": ["null", "string"]}
+ ]
+ })";
+ auto avro_schema = ::avro::compileJsonSchemaFromString(avro_schema_json);
+
+ auto name_mapping = CreateUnionNameMapping();
+
+ auto result = MakeAvroNodeWithFieldIds(avro_schema.root(), *name_mapping);
+ ASSERT_THAT(result, IsOk());
+
+ const auto& new_node = *result;
+ EXPECT_EQ(new_node->type(), ::avro::AVRO_RECORD);
+ EXPECT_EQ(new_node->names(), 2);
+ EXPECT_EQ(new_node->leaves(), 2);
+
+ // Check that field IDs are properly applied to top-level fields
+ ASSERT_EQ(new_node->customAttributes(), 2);
+ ASSERT_NO_FATAL_FAILURE(CheckFieldIdAt(new_node, /*index=*/0,
/*field_id=*/1));
+ ASSERT_NO_FATAL_FAILURE(CheckFieldIdAt(new_node, /*index=*/1,
/*field_id=*/2));
+
+ // Check union field
+ const auto& union_node = new_node->leafAt(1);
+ EXPECT_EQ(union_node->type(), ::avro::AVRO_UNION);
+ EXPECT_EQ(union_node->leaves(), 2);
+
+ // Check that the non-null branch has field ID applied
Review Comment:
```suggestion
```
##########
test/avro_schema_test.cc:
##########
@@ -1057,4 +1059,454 @@ TEST(AvroSchemaProjectionTest,
ProjectDecimalIncompatible) {
ASSERT_THAT(projection_result, HasErrorMessage("Cannot read"));
}
+// NameMapping tests for Avro schema context
+class NameMappingAvroSchemaTest : public ::testing::Test {
+ protected:
+ // Helper function to create a simple name mapping
+ std::unique_ptr<NameMapping> CreateSimpleNameMapping() {
+ std::vector<MappedField> fields;
+ fields.emplace_back(MappedField{.names = {"id"}, .field_id = 1});
+ fields.emplace_back(MappedField{.names = {"name"}, .field_id = 2});
+ fields.emplace_back(MappedField{.names = {"age"}, .field_id = 3});
+ return NameMapping::Make(std::move(fields));
+ }
+
+ // Helper function to create a nested name mapping
+ std::unique_ptr<NameMapping> CreateNestedNameMapping() {
+ std::vector<MappedField> fields;
+ fields.emplace_back(MappedField{.names = {"id"}, .field_id = 1});
+
+ // Nested mapping for address
+ std::vector<MappedField> address_fields;
+ address_fields.emplace_back(MappedField{.names = {"street"}, .field_id =
10});
+ address_fields.emplace_back(MappedField{.names = {"city"}, .field_id =
11});
+ address_fields.emplace_back(MappedField{.names = {"zip"}, .field_id = 12});
+ auto address_mapping = MappedFields::Make(std::move(address_fields));
+
+ fields.emplace_back(MappedField{.names = {"address"},
+ .field_id = 2,
+ .nested_mapping =
std::move(address_mapping)});
+
+ return NameMapping::Make(std::move(fields));
+ }
+
+ // Helper function to create a name mapping for array types
+ std::unique_ptr<NameMapping> CreateArrayNameMapping() {
+ std::vector<MappedField> fields;
+ fields.emplace_back(MappedField{.names = {"id"}, .field_id = 1});
+
+ // Nested mapping for array element
+ std::vector<MappedField> element_fields;
+ element_fields.emplace_back(MappedField{.names = {"element"}, .field_id =
20});
+ auto element_mapping = MappedFields::Make(std::move(element_fields));
+
+ fields.emplace_back(MappedField{
+ .names = {"items"}, .field_id = 2, .nested_mapping =
std::move(element_mapping)});
+
+ return NameMapping::Make(std::move(fields));
+ }
+
+ // Helper function to create a name mapping for map types
+ std::unique_ptr<NameMapping> CreateMapNameMapping() {
+ std::vector<MappedField> fields;
+ fields.emplace_back(MappedField{.names = {"id"}, .field_id = 1});
+
+ // Nested mapping for map key-value
+ std::vector<MappedField> map_fields;
+ map_fields.emplace_back(MappedField{.names = {"key"}, .field_id = 30});
+ map_fields.emplace_back(MappedField{.names = {"value"}, .field_id = 31});
+ auto map_mapping = MappedFields::Make(std::move(map_fields));
+
+ fields.emplace_back(MappedField{.names = {"properties"},
+ .field_id = 2,
+ .nested_mapping = std::move(map_mapping)});
+
+ return NameMapping::Make(std::move(fields));
+ }
+
+ // Helper function to create a name mapping for union types
+ std::unique_ptr<NameMapping> CreateUnionNameMapping() {
+ std::vector<MappedField> fields;
+ fields.emplace_back(MappedField{.names = {"id"}, .field_id = 1});
+ fields.emplace_back(MappedField{.names = {"data"}, .field_id = 2});
+ return NameMapping::Make(std::move(fields));
+ }
+
+ // Helper function to create a name mapping for complex map types
+ // (array<struct<key,value>>)
+ std::unique_ptr<NameMapping> CreateComplexMapNameMapping() {
+ std::vector<MappedField> fields;
+ fields.emplace_back(MappedField{.names = {"id"}, .field_id = 1});
+
+ // Nested mapping for array element (struct<key,value>)
+ std::vector<MappedField> element_fields;
+ element_fields.emplace_back(MappedField{.names = {"key"}, .field_id = 40});
+ element_fields.emplace_back(MappedField{.names = {"value"}, .field_id =
41});
+ auto element_mapping = MappedFields::Make(std::move(element_fields));
+
+ // Nested mapping for array
+ std::vector<MappedField> array_fields;
+ array_fields.emplace_back(MappedField{.names = {"element"},
+ .field_id = 50,
+ .nested_mapping =
std::move(element_mapping)});
+ auto array_mapping = MappedFields::Make(std::move(array_fields));
+
+ fields.emplace_back(MappedField{
+ .names = {"entries"}, .field_id = 2, .nested_mapping =
std::move(array_mapping)});
+
+ return NameMapping::Make(std::move(fields));
+ }
+};
+
+TEST_F(NameMappingAvroSchemaTest, ApplyNameMappingToRecord) {
+ // Create a simple Avro record schema without field IDs
+ std::string avro_schema_json = R"({
+ "type": "record",
+ "name": "test_record",
+ "fields": [
+ {"name": "id", "type": "int"},
+ {"name": "name", "type": "string"},
+ {"name": "age", "type": "int"}
+ ]
+ })";
+ auto avro_schema = ::avro::compileJsonSchemaFromString(avro_schema_json);
+
+ auto name_mapping = CreateSimpleNameMapping();
+
+ auto result = MakeAvroNodeWithFieldIds(avro_schema.root(), *name_mapping);
+ ASSERT_THAT(result, IsOk());
+
+ const auto& node = *result;
+ EXPECT_EQ(node->type(), ::avro::AVRO_RECORD);
+ EXPECT_EQ(node->names(), 3);
+ EXPECT_EQ(node->leaves(), 3);
+
+ // Check that field IDs are properly applied
+ ASSERT_EQ(node->customAttributes(), 3);
+ ASSERT_NO_FATAL_FAILURE(CheckFieldIdAt(node, /*index=*/0, /*field_id=*/1));
+ ASSERT_NO_FATAL_FAILURE(CheckFieldIdAt(node, /*index=*/1, /*field_id=*/2));
+ ASSERT_NO_FATAL_FAILURE(CheckFieldIdAt(node, /*index=*/2, /*field_id=*/3));
+}
+
+TEST_F(NameMappingAvroSchemaTest, ApplyNameMappingToNestedRecord) {
+ // Create a nested Avro record schema without field IDs
+ std::string avro_schema_json = R"({
+ "type": "record",
+ "name": "test_record",
+ "fields": [
+ {"name": "id", "type": "int"},
+ {"name": "address", "type": {
+ "type": "record",
+ "name": "address",
+ "fields": [
+ {"name": "street", "type": "string"},
+ {"name": "city", "type": "string"},
+ {"name": "zip", "type": "string"}
+ ]
+ }}
+ ]
+ })";
+ auto avro_schema = ::avro::compileJsonSchemaFromString(avro_schema_json);
+
+ auto name_mapping = CreateNestedNameMapping();
+
+ auto result = MakeAvroNodeWithFieldIds(avro_schema.root(), *name_mapping);
+ ASSERT_THAT(result, IsOk());
+
+ const auto& node = *result;
+ EXPECT_EQ(node->type(), ::avro::AVRO_RECORD);
+ EXPECT_EQ(node->names(), 2);
+ EXPECT_EQ(node->leaves(), 2);
+
+ // Check that field IDs are properly applied to top-level fields
+ ASSERT_EQ(node->customAttributes(), 2);
+ ASSERT_NO_FATAL_FAILURE(CheckFieldIdAt(node, /*index=*/0, /*field_id=*/1));
+ ASSERT_NO_FATAL_FAILURE(CheckFieldIdAt(node, /*index=*/1, /*field_id=*/2));
+
+ // Check nested record
+ const auto& address_node = node->leafAt(1);
+ EXPECT_EQ(address_node->type(), ::avro::AVRO_RECORD);
+ EXPECT_EQ(address_node->names(), 3);
+ EXPECT_EQ(address_node->leaves(), 3);
+
+ // Check that field IDs are properly applied to nested fields
+ ASSERT_EQ(address_node->customAttributes(), 3);
+ ASSERT_NO_FATAL_FAILURE(CheckFieldIdAt(address_node, /*index=*/0,
/*field_id=*/10));
+ ASSERT_NO_FATAL_FAILURE(CheckFieldIdAt(address_node, /*index=*/1,
/*field_id=*/11));
+ ASSERT_NO_FATAL_FAILURE(CheckFieldIdAt(address_node, /*index=*/2,
/*field_id=*/12));
+}
+
+TEST_F(NameMappingAvroSchemaTest, ApplyNameMappingToArray) {
+ // Create an Avro array schema without field IDs
+ std::string avro_schema_json = R"({
+ "type": "record",
+ "name": "test_record",
+ "fields": [
+ {"name": "id", "type": "int"},
+ {"name": "items", "type": {
+ "type": "array",
+ "items": "string"
+ }}
+ ]
+ })";
+ auto avro_schema = ::avro::compileJsonSchemaFromString(avro_schema_json);
+
+ auto name_mapping = CreateArrayNameMapping();
+
+ auto result = MakeAvroNodeWithFieldIds(avro_schema.root(), *name_mapping);
+ ASSERT_THAT(result, IsOk());
+
+ const auto& new_node = *result;
+ EXPECT_EQ(new_node->type(), ::avro::AVRO_RECORD);
+ EXPECT_EQ(new_node->names(), 2);
+ EXPECT_EQ(new_node->leaves(), 2);
+
+ // Check that field IDs are properly applied to top-level fields
+ ASSERT_EQ(new_node->customAttributes(), 2);
+ ASSERT_NO_FATAL_FAILURE(CheckFieldIdAt(new_node, /*index=*/0,
/*field_id=*/1));
+ ASSERT_NO_FATAL_FAILURE(CheckFieldIdAt(new_node, /*index=*/1,
/*field_id=*/2));
+
+ // Check array field structure and element field ID
+ const auto& array_node = new_node->leafAt(1);
+ EXPECT_EQ(array_node->type(), ::avro::AVRO_ARRAY);
+ EXPECT_EQ(array_node->leaves(), 1);
+
+ // Check that array element has field ID applied
+ const auto& element_node = array_node->leafAt(0);
+ EXPECT_EQ(element_node->type(), ::avro::AVRO_STRING);
+ ASSERT_NO_FATAL_FAILURE(CheckFieldIdAt(array_node, /*index=*/0,
/*field_id=*/20));
+}
+
+TEST_F(NameMappingAvroSchemaTest, ApplyNameMappingToMap) {
+ // Create an Avro map schema without field IDs
+ std::string avro_schema_json = R"({
+ "type": "record",
+ "name": "test_record",
+ "fields": [
+ {"name": "id", "type": "int"},
+ {"name": "properties", "type": {
+ "type": "map",
+ "values": "string"
+ }}
+ ]
+ })";
+ auto avro_schema = ::avro::compileJsonSchemaFromString(avro_schema_json);
+
+ auto name_mapping = CreateMapNameMapping();
+
+ auto result = MakeAvroNodeWithFieldIds(avro_schema.root(), *name_mapping);
+ ASSERT_THAT(result, IsOk());
+
+ const auto& new_node = *result;
+ EXPECT_EQ(new_node->type(), ::avro::AVRO_RECORD);
+ EXPECT_EQ(new_node->names(), 2);
+ EXPECT_EQ(new_node->leaves(), 2);
+
+ // Check that field IDs are properly applied to top-level fields
+ ASSERT_EQ(new_node->customAttributes(), 2);
+ ASSERT_NO_FATAL_FAILURE(CheckFieldIdAt(new_node, /*index=*/0,
/*field_id=*/1));
+ ASSERT_NO_FATAL_FAILURE(CheckFieldIdAt(new_node, /*index=*/1,
/*field_id=*/2));
+
+ // Check map field structure and key-value field IDs
+ const auto& map_node = new_node->leafAt(1);
+ EXPECT_EQ(map_node->type(), ::avro::AVRO_MAP);
+ ASSERT_GE(map_node->leaves(), 2);
+ EXPECT_EQ(map_node->leafAt(0)->type(), ::avro::AVRO_STRING);
+ EXPECT_EQ(map_node->leafAt(1)->type(), ::avro::AVRO_STRING);
+ ASSERT_EQ(map_node->customAttributes(), 2);
+ ASSERT_NO_FATAL_FAILURE(CheckFieldIdAt(map_node, /*index=*/0,
/*field_id=*/30));
+ ASSERT_NO_FATAL_FAILURE(CheckFieldIdAt(map_node, /*index=*/1,
/*field_id=*/31));
+}
+
+TEST_F(NameMappingAvroSchemaTest, ApplyNameMappingToComplexMap) {
+ // Create an Avro schema for complex map (array<struct<key,value>>) without
field IDs
+ // This represents a map where key is not a string type
+ std::string avro_schema_json = R"({
+ "type": "record",
+ "name": "test_record",
+ "fields": [
+ {"name": "id", "type": "int"},
+ {"name": "entries", "type": {
+ "type": "array",
+ "items": {
+ "type": "record",
+ "name": "entry",
+ "fields": [
+ {"name": "key", "type": "int"},
+ {"name": "value", "type": "string"}
+ ]
+ }
+ }}
+ ]
+ })";
+ auto avro_schema = ::avro::compileJsonSchemaFromString(avro_schema_json);
+
+ auto name_mapping = CreateComplexMapNameMapping();
+
+ auto result = MakeAvroNodeWithFieldIds(avro_schema.root(), *name_mapping);
+ ASSERT_THAT(result, IsOk());
+
+ const auto& new_node = *result;
+ EXPECT_EQ(new_node->type(), ::avro::AVRO_RECORD);
+ EXPECT_EQ(new_node->names(), 2);
+ EXPECT_EQ(new_node->leaves(), 2);
+
+ // Check that field IDs are properly applied to top-level fields
+ ASSERT_EQ(new_node->customAttributes(), 2);
+ ASSERT_NO_FATAL_FAILURE(CheckFieldIdAt(new_node, /*index=*/0,
/*field_id=*/1));
+ ASSERT_NO_FATAL_FAILURE(CheckFieldIdAt(new_node, /*index=*/1,
/*field_id=*/2));
+
+ // Check array field structure (representing the map)
+ const auto& array_node = new_node->leafAt(1);
+ EXPECT_EQ(array_node->type(), ::avro::AVRO_ARRAY);
+ EXPECT_EQ(array_node->leaves(), 1);
+
+ // Check array element (struct<key,value>)
+ const auto& element_node = array_node->leafAt(0);
+ EXPECT_EQ(element_node->type(), ::avro::AVRO_RECORD);
+ EXPECT_EQ(element_node->names(), 2);
+ EXPECT_EQ(element_node->leaves(), 2);
+
+ // Check that field IDs are properly applied to struct fields
+ ASSERT_EQ(element_node->customAttributes(), 2);
+ ASSERT_NO_FATAL_FAILURE(CheckFieldIdAt(element_node, /*index=*/0,
/*field_id=*/40));
+ ASSERT_NO_FATAL_FAILURE(CheckFieldIdAt(element_node, /*index=*/1,
/*field_id=*/41));
+
+ // Check key and value types
+ EXPECT_EQ(element_node->leafAt(0)->type(), ::avro::AVRO_INT);
+ EXPECT_EQ(element_node->leafAt(1)->type(), ::avro::AVRO_STRING);
+}
+
+TEST_F(NameMappingAvroSchemaTest, ApplyNameMappingToUnion) {
+ // Create an Avro union schema without field IDs
+ std::string avro_schema_json = R"({
+ "type": "record",
+ "name": "test_record",
+ "fields": [
+ {"name": "id", "type": "int"},
+ {"name": "data", "type": ["null", "string"]}
+ ]
+ })";
+ auto avro_schema = ::avro::compileJsonSchemaFromString(avro_schema_json);
+
+ auto name_mapping = CreateUnionNameMapping();
+
+ auto result = MakeAvroNodeWithFieldIds(avro_schema.root(), *name_mapping);
+ ASSERT_THAT(result, IsOk());
+
+ const auto& new_node = *result;
+ EXPECT_EQ(new_node->type(), ::avro::AVRO_RECORD);
+ EXPECT_EQ(new_node->names(), 2);
+ EXPECT_EQ(new_node->leaves(), 2);
+
+ // Check that field IDs are properly applied to top-level fields
+ ASSERT_EQ(new_node->customAttributes(), 2);
+ ASSERT_NO_FATAL_FAILURE(CheckFieldIdAt(new_node, /*index=*/0,
/*field_id=*/1));
+ ASSERT_NO_FATAL_FAILURE(CheckFieldIdAt(new_node, /*index=*/1,
/*field_id=*/2));
+
+ // Check union field
+ const auto& union_node = new_node->leafAt(1);
+ EXPECT_EQ(union_node->type(), ::avro::AVRO_UNION);
+ EXPECT_EQ(union_node->leaves(), 2);
+
+ // Check that the non-null branch has field ID applied
+ const auto& non_null_branch = union_node->leafAt(1);
+ EXPECT_EQ(non_null_branch->type(), ::avro::AVRO_STRING);
+}
+
+TEST_F(NameMappingAvroSchemaTest, MissingFieldIdError) {
+ // Create a name mapping with missing field ID
+ std::vector<MappedField> fields;
+ fields.emplace_back(MappedField{.names = {"id"}, .field_id = 1});
+ fields.emplace_back(MappedField{.names = {"name"}}); // Missing field_id
+ auto name_mapping = NameMapping::Make(std::move(fields));
+
+ // Create a simple Avro record schema
+ std::string avro_schema_json = R"({
+ "type": "record",
+ "name": "test_record",
+ "fields": [
+ {"name": "id", "type": "int"},
+ {"name": "name", "type": "string"}
+ ]
+ })";
+ auto avro_schema = ::avro::compileJsonSchemaFromString(avro_schema_json);
+
+ auto result = MakeAvroNodeWithFieldIds(avro_schema.root(), *name_mapping);
+ ASSERT_THAT(result, IsError(ErrorKind::kInvalidSchema));
+ ASSERT_THAT(result,
+ HasErrorMessage("Field ID is missing for field 'name' in nested
mapping"));
Review Comment:
TBH, I don't think this is error because it is acceptable to have some
fields without field-ids after applying name mappings.
##########
test/avro_schema_test.cc:
##########
@@ -1057,4 +1059,454 @@ TEST(AvroSchemaProjectionTest,
ProjectDecimalIncompatible) {
ASSERT_THAT(projection_result, HasErrorMessage("Cannot read"));
}
+// NameMapping tests for Avro schema context
+class NameMappingAvroSchemaTest : public ::testing::Test {
+ protected:
+ // Helper function to create a simple name mapping
+ std::unique_ptr<NameMapping> CreateSimpleNameMapping() {
+ std::vector<MappedField> fields;
+ fields.emplace_back(MappedField{.names = {"id"}, .field_id = 1});
+ fields.emplace_back(MappedField{.names = {"name"}, .field_id = 2});
+ fields.emplace_back(MappedField{.names = {"age"}, .field_id = 3});
+ return NameMapping::Make(std::move(fields));
+ }
+
+ // Helper function to create a nested name mapping
+ std::unique_ptr<NameMapping> CreateNestedNameMapping() {
+ std::vector<MappedField> fields;
+ fields.emplace_back(MappedField{.names = {"id"}, .field_id = 1});
+
+ // Nested mapping for address
+ std::vector<MappedField> address_fields;
+ address_fields.emplace_back(MappedField{.names = {"street"}, .field_id =
10});
+ address_fields.emplace_back(MappedField{.names = {"city"}, .field_id =
11});
+ address_fields.emplace_back(MappedField{.names = {"zip"}, .field_id = 12});
+ auto address_mapping = MappedFields::Make(std::move(address_fields));
+
+ fields.emplace_back(MappedField{.names = {"address"},
+ .field_id = 2,
+ .nested_mapping =
std::move(address_mapping)});
+
+ return NameMapping::Make(std::move(fields));
+ }
+
+ // Helper function to create a name mapping for array types
+ std::unique_ptr<NameMapping> CreateArrayNameMapping() {
+ std::vector<MappedField> fields;
+ fields.emplace_back(MappedField{.names = {"id"}, .field_id = 1});
+
+ // Nested mapping for array element
+ std::vector<MappedField> element_fields;
+ element_fields.emplace_back(MappedField{.names = {"element"}, .field_id =
20});
+ auto element_mapping = MappedFields::Make(std::move(element_fields));
+
+ fields.emplace_back(MappedField{
+ .names = {"items"}, .field_id = 2, .nested_mapping =
std::move(element_mapping)});
+
+ return NameMapping::Make(std::move(fields));
+ }
+
+ // Helper function to create a name mapping for map types
+ std::unique_ptr<NameMapping> CreateMapNameMapping() {
+ std::vector<MappedField> fields;
+ fields.emplace_back(MappedField{.names = {"id"}, .field_id = 1});
+
+ // Nested mapping for map key-value
+ std::vector<MappedField> map_fields;
+ map_fields.emplace_back(MappedField{.names = {"key"}, .field_id = 30});
+ map_fields.emplace_back(MappedField{.names = {"value"}, .field_id = 31});
+ auto map_mapping = MappedFields::Make(std::move(map_fields));
+
+ fields.emplace_back(MappedField{.names = {"properties"},
+ .field_id = 2,
+ .nested_mapping = std::move(map_mapping)});
+
+ return NameMapping::Make(std::move(fields));
+ }
+
+ // Helper function to create a name mapping for union types
+ std::unique_ptr<NameMapping> CreateUnionNameMapping() {
+ std::vector<MappedField> fields;
+ fields.emplace_back(MappedField{.names = {"id"}, .field_id = 1});
+ fields.emplace_back(MappedField{.names = {"data"}, .field_id = 2});
+ return NameMapping::Make(std::move(fields));
+ }
+
+ // Helper function to create a name mapping for complex map types
+ // (array<struct<key,value>>)
+ std::unique_ptr<NameMapping> CreateComplexMapNameMapping() {
+ std::vector<MappedField> fields;
+ fields.emplace_back(MappedField{.names = {"id"}, .field_id = 1});
+
+ // Nested mapping for array element (struct<key,value>)
+ std::vector<MappedField> element_fields;
+ element_fields.emplace_back(MappedField{.names = {"key"}, .field_id = 40});
+ element_fields.emplace_back(MappedField{.names = {"value"}, .field_id =
41});
+ auto element_mapping = MappedFields::Make(std::move(element_fields));
+
+ // Nested mapping for array
+ std::vector<MappedField> array_fields;
+ array_fields.emplace_back(MappedField{.names = {"element"},
+ .field_id = 50,
+ .nested_mapping =
std::move(element_mapping)});
+ auto array_mapping = MappedFields::Make(std::move(array_fields));
+
+ fields.emplace_back(MappedField{
+ .names = {"entries"}, .field_id = 2, .nested_mapping =
std::move(array_mapping)});
+
+ return NameMapping::Make(std::move(fields));
+ }
+};
+
+TEST_F(NameMappingAvroSchemaTest, ApplyNameMappingToRecord) {
+ // Create a simple Avro record schema without field IDs
+ std::string avro_schema_json = R"({
+ "type": "record",
+ "name": "test_record",
+ "fields": [
+ {"name": "id", "type": "int"},
+ {"name": "name", "type": "string"},
+ {"name": "age", "type": "int"}
+ ]
+ })";
+ auto avro_schema = ::avro::compileJsonSchemaFromString(avro_schema_json);
+
+ auto name_mapping = CreateSimpleNameMapping();
+
+ auto result = MakeAvroNodeWithFieldIds(avro_schema.root(), *name_mapping);
+ ASSERT_THAT(result, IsOk());
+
+ const auto& node = *result;
+ EXPECT_EQ(node->type(), ::avro::AVRO_RECORD);
+ EXPECT_EQ(node->names(), 3);
+ EXPECT_EQ(node->leaves(), 3);
+
+ // Check that field IDs are properly applied
+ ASSERT_EQ(node->customAttributes(), 3);
+ ASSERT_NO_FATAL_FAILURE(CheckFieldIdAt(node, /*index=*/0, /*field_id=*/1));
+ ASSERT_NO_FATAL_FAILURE(CheckFieldIdAt(node, /*index=*/1, /*field_id=*/2));
+ ASSERT_NO_FATAL_FAILURE(CheckFieldIdAt(node, /*index=*/2, /*field_id=*/3));
+}
+
+TEST_F(NameMappingAvroSchemaTest, ApplyNameMappingToNestedRecord) {
+ // Create a nested Avro record schema without field IDs
+ std::string avro_schema_json = R"({
+ "type": "record",
+ "name": "test_record",
+ "fields": [
+ {"name": "id", "type": "int"},
+ {"name": "address", "type": {
+ "type": "record",
+ "name": "address",
+ "fields": [
+ {"name": "street", "type": "string"},
+ {"name": "city", "type": "string"},
+ {"name": "zip", "type": "string"}
+ ]
+ }}
+ ]
+ })";
+ auto avro_schema = ::avro::compileJsonSchemaFromString(avro_schema_json);
+
+ auto name_mapping = CreateNestedNameMapping();
+
+ auto result = MakeAvroNodeWithFieldIds(avro_schema.root(), *name_mapping);
+ ASSERT_THAT(result, IsOk());
+
+ const auto& node = *result;
+ EXPECT_EQ(node->type(), ::avro::AVRO_RECORD);
+ EXPECT_EQ(node->names(), 2);
+ EXPECT_EQ(node->leaves(), 2);
+
+ // Check that field IDs are properly applied to top-level fields
+ ASSERT_EQ(node->customAttributes(), 2);
+ ASSERT_NO_FATAL_FAILURE(CheckFieldIdAt(node, /*index=*/0, /*field_id=*/1));
+ ASSERT_NO_FATAL_FAILURE(CheckFieldIdAt(node, /*index=*/1, /*field_id=*/2));
+
+ // Check nested record
+ const auto& address_node = node->leafAt(1);
+ EXPECT_EQ(address_node->type(), ::avro::AVRO_RECORD);
+ EXPECT_EQ(address_node->names(), 3);
+ EXPECT_EQ(address_node->leaves(), 3);
+
+ // Check that field IDs are properly applied to nested fields
+ ASSERT_EQ(address_node->customAttributes(), 3);
+ ASSERT_NO_FATAL_FAILURE(CheckFieldIdAt(address_node, /*index=*/0,
/*field_id=*/10));
+ ASSERT_NO_FATAL_FAILURE(CheckFieldIdAt(address_node, /*index=*/1,
/*field_id=*/11));
+ ASSERT_NO_FATAL_FAILURE(CheckFieldIdAt(address_node, /*index=*/2,
/*field_id=*/12));
+}
+
+TEST_F(NameMappingAvroSchemaTest, ApplyNameMappingToArray) {
+ // Create an Avro array schema without field IDs
+ std::string avro_schema_json = R"({
+ "type": "record",
+ "name": "test_record",
+ "fields": [
+ {"name": "id", "type": "int"},
+ {"name": "items", "type": {
+ "type": "array",
+ "items": "string"
+ }}
+ ]
+ })";
+ auto avro_schema = ::avro::compileJsonSchemaFromString(avro_schema_json);
+
+ auto name_mapping = CreateArrayNameMapping();
+
+ auto result = MakeAvroNodeWithFieldIds(avro_schema.root(), *name_mapping);
+ ASSERT_THAT(result, IsOk());
+
+ const auto& new_node = *result;
+ EXPECT_EQ(new_node->type(), ::avro::AVRO_RECORD);
+ EXPECT_EQ(new_node->names(), 2);
+ EXPECT_EQ(new_node->leaves(), 2);
+
+ // Check that field IDs are properly applied to top-level fields
+ ASSERT_EQ(new_node->customAttributes(), 2);
+ ASSERT_NO_FATAL_FAILURE(CheckFieldIdAt(new_node, /*index=*/0,
/*field_id=*/1));
+ ASSERT_NO_FATAL_FAILURE(CheckFieldIdAt(new_node, /*index=*/1,
/*field_id=*/2));
+
+ // Check array field structure and element field ID
+ const auto& array_node = new_node->leafAt(1);
+ EXPECT_EQ(array_node->type(), ::avro::AVRO_ARRAY);
+ EXPECT_EQ(array_node->leaves(), 1);
+
+ // Check that array element has field ID applied
+ const auto& element_node = array_node->leafAt(0);
+ EXPECT_EQ(element_node->type(), ::avro::AVRO_STRING);
+ ASSERT_NO_FATAL_FAILURE(CheckFieldIdAt(array_node, /*index=*/0,
/*field_id=*/20));
+}
+
+TEST_F(NameMappingAvroSchemaTest, ApplyNameMappingToMap) {
+ // Create an Avro map schema without field IDs
+ std::string avro_schema_json = R"({
+ "type": "record",
+ "name": "test_record",
+ "fields": [
+ {"name": "id", "type": "int"},
+ {"name": "properties", "type": {
+ "type": "map",
+ "values": "string"
+ }}
+ ]
+ })";
+ auto avro_schema = ::avro::compileJsonSchemaFromString(avro_schema_json);
+
+ auto name_mapping = CreateMapNameMapping();
+
+ auto result = MakeAvroNodeWithFieldIds(avro_schema.root(), *name_mapping);
+ ASSERT_THAT(result, IsOk());
+
+ const auto& new_node = *result;
+ EXPECT_EQ(new_node->type(), ::avro::AVRO_RECORD);
+ EXPECT_EQ(new_node->names(), 2);
+ EXPECT_EQ(new_node->leaves(), 2);
+
+ // Check that field IDs are properly applied to top-level fields
+ ASSERT_EQ(new_node->customAttributes(), 2);
+ ASSERT_NO_FATAL_FAILURE(CheckFieldIdAt(new_node, /*index=*/0,
/*field_id=*/1));
+ ASSERT_NO_FATAL_FAILURE(CheckFieldIdAt(new_node, /*index=*/1,
/*field_id=*/2));
+
+ // Check map field structure and key-value field IDs
+ const auto& map_node = new_node->leafAt(1);
+ EXPECT_EQ(map_node->type(), ::avro::AVRO_MAP);
+ ASSERT_GE(map_node->leaves(), 2);
+ EXPECT_EQ(map_node->leafAt(0)->type(), ::avro::AVRO_STRING);
+ EXPECT_EQ(map_node->leafAt(1)->type(), ::avro::AVRO_STRING);
+ ASSERT_EQ(map_node->customAttributes(), 2);
+ ASSERT_NO_FATAL_FAILURE(CheckFieldIdAt(map_node, /*index=*/0,
/*field_id=*/30));
+ ASSERT_NO_FATAL_FAILURE(CheckFieldIdAt(map_node, /*index=*/1,
/*field_id=*/31));
+}
+
+TEST_F(NameMappingAvroSchemaTest, ApplyNameMappingToComplexMap) {
+ // Create an Avro schema for complex map (array<struct<key,value>>) without
field IDs
+ // This represents a map where key is not a string type
+ std::string avro_schema_json = R"({
+ "type": "record",
+ "name": "test_record",
+ "fields": [
+ {"name": "id", "type": "int"},
+ {"name": "entries", "type": {
+ "type": "array",
+ "items": {
+ "type": "record",
+ "name": "entry",
+ "fields": [
+ {"name": "key", "type": "int"},
+ {"name": "value", "type": "string"}
+ ]
+ }
+ }}
+ ]
+ })";
+ auto avro_schema = ::avro::compileJsonSchemaFromString(avro_schema_json);
+
+ auto name_mapping = CreateComplexMapNameMapping();
+
+ auto result = MakeAvroNodeWithFieldIds(avro_schema.root(), *name_mapping);
+ ASSERT_THAT(result, IsOk());
+
+ const auto& new_node = *result;
+ EXPECT_EQ(new_node->type(), ::avro::AVRO_RECORD);
+ EXPECT_EQ(new_node->names(), 2);
+ EXPECT_EQ(new_node->leaves(), 2);
+
+ // Check that field IDs are properly applied to top-level fields
+ ASSERT_EQ(new_node->customAttributes(), 2);
+ ASSERT_NO_FATAL_FAILURE(CheckFieldIdAt(new_node, /*index=*/0,
/*field_id=*/1));
+ ASSERT_NO_FATAL_FAILURE(CheckFieldIdAt(new_node, /*index=*/1,
/*field_id=*/2));
+
+ // Check array field structure (representing the map)
+ const auto& array_node = new_node->leafAt(1);
+ EXPECT_EQ(array_node->type(), ::avro::AVRO_ARRAY);
+ EXPECT_EQ(array_node->leaves(), 1);
+
+ // Check array element (struct<key,value>)
+ const auto& element_node = array_node->leafAt(0);
+ EXPECT_EQ(element_node->type(), ::avro::AVRO_RECORD);
+ EXPECT_EQ(element_node->names(), 2);
+ EXPECT_EQ(element_node->leaves(), 2);
+
+ // Check that field IDs are properly applied to struct fields
+ ASSERT_EQ(element_node->customAttributes(), 2);
+ ASSERT_NO_FATAL_FAILURE(CheckFieldIdAt(element_node, /*index=*/0,
/*field_id=*/40));
+ ASSERT_NO_FATAL_FAILURE(CheckFieldIdAt(element_node, /*index=*/1,
/*field_id=*/41));
+
+ // Check key and value types
+ EXPECT_EQ(element_node->leafAt(0)->type(), ::avro::AVRO_INT);
+ EXPECT_EQ(element_node->leafAt(1)->type(), ::avro::AVRO_STRING);
+}
+
+TEST_F(NameMappingAvroSchemaTest, ApplyNameMappingToUnion) {
+ // Create an Avro union schema without field IDs
+ std::string avro_schema_json = R"({
+ "type": "record",
+ "name": "test_record",
+ "fields": [
+ {"name": "id", "type": "int"},
+ {"name": "data", "type": ["null", "string"]}
+ ]
+ })";
+ auto avro_schema = ::avro::compileJsonSchemaFromString(avro_schema_json);
+
+ auto name_mapping = CreateUnionNameMapping();
+
+ auto result = MakeAvroNodeWithFieldIds(avro_schema.root(), *name_mapping);
+ ASSERT_THAT(result, IsOk());
+
+ const auto& new_node = *result;
+ EXPECT_EQ(new_node->type(), ::avro::AVRO_RECORD);
+ EXPECT_EQ(new_node->names(), 2);
+ EXPECT_EQ(new_node->leaves(), 2);
+
+ // Check that field IDs are properly applied to top-level fields
+ ASSERT_EQ(new_node->customAttributes(), 2);
+ ASSERT_NO_FATAL_FAILURE(CheckFieldIdAt(new_node, /*index=*/0,
/*field_id=*/1));
+ ASSERT_NO_FATAL_FAILURE(CheckFieldIdAt(new_node, /*index=*/1,
/*field_id=*/2));
+
+ // Check union field
+ const auto& union_node = new_node->leafAt(1);
+ EXPECT_EQ(union_node->type(), ::avro::AVRO_UNION);
+ EXPECT_EQ(union_node->leaves(), 2);
+
+ // Check that the non-null branch has field ID applied
+ const auto& non_null_branch = union_node->leafAt(1);
+ EXPECT_EQ(non_null_branch->type(), ::avro::AVRO_STRING);
+}
+
+TEST_F(NameMappingAvroSchemaTest, MissingFieldIdError) {
+ // Create a name mapping with missing field ID
+ std::vector<MappedField> fields;
+ fields.emplace_back(MappedField{.names = {"id"}, .field_id = 1});
+ fields.emplace_back(MappedField{.names = {"name"}}); // Missing field_id
+ auto name_mapping = NameMapping::Make(std::move(fields));
+
+ // Create a simple Avro record schema
+ std::string avro_schema_json = R"({
+ "type": "record",
+ "name": "test_record",
+ "fields": [
+ {"name": "id", "type": "int"},
+ {"name": "name", "type": "string"}
+ ]
+ })";
+ auto avro_schema = ::avro::compileJsonSchemaFromString(avro_schema_json);
+
+ auto result = MakeAvroNodeWithFieldIds(avro_schema.root(), *name_mapping);
+ ASSERT_THAT(result, IsError(ErrorKind::kInvalidSchema));
+ ASSERT_THAT(result,
+ HasErrorMessage("Field ID is missing for field 'name' in nested
mapping"));
+}
+
+TEST_F(NameMappingAvroSchemaTest, MissingFieldError) {
+ // Create a name mapping
+ auto name_mapping = CreateSimpleNameMapping();
+
+ // Create an Avro record schema with a field not in the mapping
+ std::string avro_schema_json = R"({
+ "type": "record",
+ "name": "test_record",
+ "fields": [
+ {"name": "id", "type": "int"},
+ {"name": "name", "type": "string"},
+ {"name": "unknown_field", "type": "int"}
+ ]
+ })";
+ auto avro_schema = ::avro::compileJsonSchemaFromString(avro_schema_json);
+
+ auto result = MakeAvroNodeWithFieldIds(avro_schema.root(), *name_mapping);
+ ASSERT_THAT(result, IsError(ErrorKind::kInvalidSchema));
+ ASSERT_THAT(result,
+ HasErrorMessage("Field 'unknown_field' not found in nested
mapping"));
Review Comment:
Same as above.
##########
test/avro_schema_test.cc:
##########
@@ -1057,4 +1059,454 @@ TEST(AvroSchemaProjectionTest,
ProjectDecimalIncompatible) {
ASSERT_THAT(projection_result, HasErrorMessage("Cannot read"));
}
+// NameMapping tests for Avro schema context
+class NameMappingAvroSchemaTest : public ::testing::Test {
+ protected:
+ // Helper function to create a simple name mapping
+ std::unique_ptr<NameMapping> CreateSimpleNameMapping() {
+ std::vector<MappedField> fields;
+ fields.emplace_back(MappedField{.names = {"id"}, .field_id = 1});
+ fields.emplace_back(MappedField{.names = {"name"}, .field_id = 2});
+ fields.emplace_back(MappedField{.names = {"age"}, .field_id = 3});
+ return NameMapping::Make(std::move(fields));
+ }
+
+ // Helper function to create a nested name mapping
+ std::unique_ptr<NameMapping> CreateNestedNameMapping() {
+ std::vector<MappedField> fields;
+ fields.emplace_back(MappedField{.names = {"id"}, .field_id = 1});
+
+ // Nested mapping for address
+ std::vector<MappedField> address_fields;
+ address_fields.emplace_back(MappedField{.names = {"street"}, .field_id =
10});
+ address_fields.emplace_back(MappedField{.names = {"city"}, .field_id =
11});
+ address_fields.emplace_back(MappedField{.names = {"zip"}, .field_id = 12});
+ auto address_mapping = MappedFields::Make(std::move(address_fields));
+
+ fields.emplace_back(MappedField{.names = {"address"},
+ .field_id = 2,
+ .nested_mapping =
std::move(address_mapping)});
+
+ return NameMapping::Make(std::move(fields));
+ }
+
+ // Helper function to create a name mapping for array types
+ std::unique_ptr<NameMapping> CreateArrayNameMapping() {
+ std::vector<MappedField> fields;
+ fields.emplace_back(MappedField{.names = {"id"}, .field_id = 1});
+
+ // Nested mapping for array element
+ std::vector<MappedField> element_fields;
+ element_fields.emplace_back(MappedField{.names = {"element"}, .field_id =
20});
+ auto element_mapping = MappedFields::Make(std::move(element_fields));
+
+ fields.emplace_back(MappedField{
+ .names = {"items"}, .field_id = 2, .nested_mapping =
std::move(element_mapping)});
+
+ return NameMapping::Make(std::move(fields));
+ }
+
+ // Helper function to create a name mapping for map types
+ std::unique_ptr<NameMapping> CreateMapNameMapping() {
+ std::vector<MappedField> fields;
+ fields.emplace_back(MappedField{.names = {"id"}, .field_id = 1});
+
+ // Nested mapping for map key-value
+ std::vector<MappedField> map_fields;
+ map_fields.emplace_back(MappedField{.names = {"key"}, .field_id = 30});
+ map_fields.emplace_back(MappedField{.names = {"value"}, .field_id = 31});
+ auto map_mapping = MappedFields::Make(std::move(map_fields));
+
+ fields.emplace_back(MappedField{.names = {"properties"},
+ .field_id = 2,
+ .nested_mapping = std::move(map_mapping)});
+
+ return NameMapping::Make(std::move(fields));
+ }
+
+ // Helper function to create a name mapping for union types
+ std::unique_ptr<NameMapping> CreateUnionNameMapping() {
+ std::vector<MappedField> fields;
+ fields.emplace_back(MappedField{.names = {"id"}, .field_id = 1});
+ fields.emplace_back(MappedField{.names = {"data"}, .field_id = 2});
+ return NameMapping::Make(std::move(fields));
+ }
+
+ // Helper function to create a name mapping for complex map types
+ // (array<struct<key,value>>)
+ std::unique_ptr<NameMapping> CreateComplexMapNameMapping() {
+ std::vector<MappedField> fields;
+ fields.emplace_back(MappedField{.names = {"id"}, .field_id = 1});
+
+ // Nested mapping for array element (struct<key,value>)
+ std::vector<MappedField> element_fields;
+ element_fields.emplace_back(MappedField{.names = {"key"}, .field_id = 40});
+ element_fields.emplace_back(MappedField{.names = {"value"}, .field_id =
41});
+ auto element_mapping = MappedFields::Make(std::move(element_fields));
+
+ // Nested mapping for array
+ std::vector<MappedField> array_fields;
+ array_fields.emplace_back(MappedField{.names = {"element"},
+ .field_id = 50,
+ .nested_mapping =
std::move(element_mapping)});
+ auto array_mapping = MappedFields::Make(std::move(array_fields));
+
+ fields.emplace_back(MappedField{
+ .names = {"entries"}, .field_id = 2, .nested_mapping =
std::move(array_mapping)});
+
+ return NameMapping::Make(std::move(fields));
+ }
+};
+
+TEST_F(NameMappingAvroSchemaTest, ApplyNameMappingToRecord) {
+ // Create a simple Avro record schema without field IDs
+ std::string avro_schema_json = R"({
+ "type": "record",
+ "name": "test_record",
+ "fields": [
+ {"name": "id", "type": "int"},
+ {"name": "name", "type": "string"},
+ {"name": "age", "type": "int"}
+ ]
+ })";
+ auto avro_schema = ::avro::compileJsonSchemaFromString(avro_schema_json);
+
+ auto name_mapping = CreateSimpleNameMapping();
+
+ auto result = MakeAvroNodeWithFieldIds(avro_schema.root(), *name_mapping);
+ ASSERT_THAT(result, IsOk());
+
+ const auto& node = *result;
+ EXPECT_EQ(node->type(), ::avro::AVRO_RECORD);
+ EXPECT_EQ(node->names(), 3);
+ EXPECT_EQ(node->leaves(), 3);
+
+ // Check that field IDs are properly applied
+ ASSERT_EQ(node->customAttributes(), 3);
+ ASSERT_NO_FATAL_FAILURE(CheckFieldIdAt(node, /*index=*/0, /*field_id=*/1));
+ ASSERT_NO_FATAL_FAILURE(CheckFieldIdAt(node, /*index=*/1, /*field_id=*/2));
+ ASSERT_NO_FATAL_FAILURE(CheckFieldIdAt(node, /*index=*/2, /*field_id=*/3));
+}
+
+TEST_F(NameMappingAvroSchemaTest, ApplyNameMappingToNestedRecord) {
+ // Create a nested Avro record schema without field IDs
+ std::string avro_schema_json = R"({
+ "type": "record",
+ "name": "test_record",
+ "fields": [
+ {"name": "id", "type": "int"},
+ {"name": "address", "type": {
+ "type": "record",
+ "name": "address",
+ "fields": [
+ {"name": "street", "type": "string"},
+ {"name": "city", "type": "string"},
+ {"name": "zip", "type": "string"}
+ ]
+ }}
+ ]
+ })";
+ auto avro_schema = ::avro::compileJsonSchemaFromString(avro_schema_json);
+
+ auto name_mapping = CreateNestedNameMapping();
+
+ auto result = MakeAvroNodeWithFieldIds(avro_schema.root(), *name_mapping);
+ ASSERT_THAT(result, IsOk());
+
+ const auto& node = *result;
+ EXPECT_EQ(node->type(), ::avro::AVRO_RECORD);
+ EXPECT_EQ(node->names(), 2);
+ EXPECT_EQ(node->leaves(), 2);
+
+ // Check that field IDs are properly applied to top-level fields
+ ASSERT_EQ(node->customAttributes(), 2);
+ ASSERT_NO_FATAL_FAILURE(CheckFieldIdAt(node, /*index=*/0, /*field_id=*/1));
+ ASSERT_NO_FATAL_FAILURE(CheckFieldIdAt(node, /*index=*/1, /*field_id=*/2));
+
+ // Check nested record
+ const auto& address_node = node->leafAt(1);
+ EXPECT_EQ(address_node->type(), ::avro::AVRO_RECORD);
+ EXPECT_EQ(address_node->names(), 3);
+ EXPECT_EQ(address_node->leaves(), 3);
+
+ // Check that field IDs are properly applied to nested fields
+ ASSERT_EQ(address_node->customAttributes(), 3);
+ ASSERT_NO_FATAL_FAILURE(CheckFieldIdAt(address_node, /*index=*/0,
/*field_id=*/10));
+ ASSERT_NO_FATAL_FAILURE(CheckFieldIdAt(address_node, /*index=*/1,
/*field_id=*/11));
+ ASSERT_NO_FATAL_FAILURE(CheckFieldIdAt(address_node, /*index=*/2,
/*field_id=*/12));
+}
+
+TEST_F(NameMappingAvroSchemaTest, ApplyNameMappingToArray) {
+ // Create an Avro array schema without field IDs
+ std::string avro_schema_json = R"({
+ "type": "record",
+ "name": "test_record",
+ "fields": [
+ {"name": "id", "type": "int"},
+ {"name": "items", "type": {
+ "type": "array",
+ "items": "string"
+ }}
+ ]
+ })";
+ auto avro_schema = ::avro::compileJsonSchemaFromString(avro_schema_json);
+
+ auto name_mapping = CreateArrayNameMapping();
+
+ auto result = MakeAvroNodeWithFieldIds(avro_schema.root(), *name_mapping);
+ ASSERT_THAT(result, IsOk());
+
+ const auto& new_node = *result;
+ EXPECT_EQ(new_node->type(), ::avro::AVRO_RECORD);
+ EXPECT_EQ(new_node->names(), 2);
+ EXPECT_EQ(new_node->leaves(), 2);
+
+ // Check that field IDs are properly applied to top-level fields
+ ASSERT_EQ(new_node->customAttributes(), 2);
+ ASSERT_NO_FATAL_FAILURE(CheckFieldIdAt(new_node, /*index=*/0,
/*field_id=*/1));
+ ASSERT_NO_FATAL_FAILURE(CheckFieldIdAt(new_node, /*index=*/1,
/*field_id=*/2));
+
+ // Check array field structure and element field ID
+ const auto& array_node = new_node->leafAt(1);
+ EXPECT_EQ(array_node->type(), ::avro::AVRO_ARRAY);
+ EXPECT_EQ(array_node->leaves(), 1);
+
+ // Check that array element has field ID applied
+ const auto& element_node = array_node->leafAt(0);
+ EXPECT_EQ(element_node->type(), ::avro::AVRO_STRING);
+ ASSERT_NO_FATAL_FAILURE(CheckFieldIdAt(array_node, /*index=*/0,
/*field_id=*/20));
+}
+
+TEST_F(NameMappingAvroSchemaTest, ApplyNameMappingToMap) {
+ // Create an Avro map schema without field IDs
+ std::string avro_schema_json = R"({
+ "type": "record",
+ "name": "test_record",
+ "fields": [
+ {"name": "id", "type": "int"},
+ {"name": "properties", "type": {
+ "type": "map",
+ "values": "string"
+ }}
+ ]
+ })";
+ auto avro_schema = ::avro::compileJsonSchemaFromString(avro_schema_json);
+
+ auto name_mapping = CreateMapNameMapping();
+
+ auto result = MakeAvroNodeWithFieldIds(avro_schema.root(), *name_mapping);
+ ASSERT_THAT(result, IsOk());
+
+ const auto& new_node = *result;
+ EXPECT_EQ(new_node->type(), ::avro::AVRO_RECORD);
+ EXPECT_EQ(new_node->names(), 2);
+ EXPECT_EQ(new_node->leaves(), 2);
+
+ // Check that field IDs are properly applied to top-level fields
+ ASSERT_EQ(new_node->customAttributes(), 2);
+ ASSERT_NO_FATAL_FAILURE(CheckFieldIdAt(new_node, /*index=*/0,
/*field_id=*/1));
+ ASSERT_NO_FATAL_FAILURE(CheckFieldIdAt(new_node, /*index=*/1,
/*field_id=*/2));
+
+ // Check map field structure and key-value field IDs
+ const auto& map_node = new_node->leafAt(1);
+ EXPECT_EQ(map_node->type(), ::avro::AVRO_MAP);
+ ASSERT_GE(map_node->leaves(), 2);
+ EXPECT_EQ(map_node->leafAt(0)->type(), ::avro::AVRO_STRING);
+ EXPECT_EQ(map_node->leafAt(1)->type(), ::avro::AVRO_STRING);
+ ASSERT_EQ(map_node->customAttributes(), 2);
+ ASSERT_NO_FATAL_FAILURE(CheckFieldIdAt(map_node, /*index=*/0,
/*field_id=*/30));
+ ASSERT_NO_FATAL_FAILURE(CheckFieldIdAt(map_node, /*index=*/1,
/*field_id=*/31));
+}
+
+TEST_F(NameMappingAvroSchemaTest, ApplyNameMappingToComplexMap) {
+ // Create an Avro schema for complex map (array<struct<key,value>>) without
field IDs
+ // This represents a map where key is not a string type
+ std::string avro_schema_json = R"({
+ "type": "record",
+ "name": "test_record",
+ "fields": [
+ {"name": "id", "type": "int"},
+ {"name": "entries", "type": {
+ "type": "array",
+ "items": {
+ "type": "record",
+ "name": "entry",
+ "fields": [
+ {"name": "key", "type": "int"},
+ {"name": "value", "type": "string"}
+ ]
+ }
+ }}
+ ]
+ })";
+ auto avro_schema = ::avro::compileJsonSchemaFromString(avro_schema_json);
+
+ auto name_mapping = CreateComplexMapNameMapping();
+
+ auto result = MakeAvroNodeWithFieldIds(avro_schema.root(), *name_mapping);
+ ASSERT_THAT(result, IsOk());
+
+ const auto& new_node = *result;
+ EXPECT_EQ(new_node->type(), ::avro::AVRO_RECORD);
+ EXPECT_EQ(new_node->names(), 2);
+ EXPECT_EQ(new_node->leaves(), 2);
+
+ // Check that field IDs are properly applied to top-level fields
+ ASSERT_EQ(new_node->customAttributes(), 2);
+ ASSERT_NO_FATAL_FAILURE(CheckFieldIdAt(new_node, /*index=*/0,
/*field_id=*/1));
+ ASSERT_NO_FATAL_FAILURE(CheckFieldIdAt(new_node, /*index=*/1,
/*field_id=*/2));
+
+ // Check array field structure (representing the map)
+ const auto& array_node = new_node->leafAt(1);
+ EXPECT_EQ(array_node->type(), ::avro::AVRO_ARRAY);
+ EXPECT_EQ(array_node->leaves(), 1);
+
+ // Check array element (struct<key,value>)
+ const auto& element_node = array_node->leafAt(0);
+ EXPECT_EQ(element_node->type(), ::avro::AVRO_RECORD);
+ EXPECT_EQ(element_node->names(), 2);
+ EXPECT_EQ(element_node->leaves(), 2);
+
+ // Check that field IDs are properly applied to struct fields
+ ASSERT_EQ(element_node->customAttributes(), 2);
+ ASSERT_NO_FATAL_FAILURE(CheckFieldIdAt(element_node, /*index=*/0,
/*field_id=*/40));
+ ASSERT_NO_FATAL_FAILURE(CheckFieldIdAt(element_node, /*index=*/1,
/*field_id=*/41));
+
+ // Check key and value types
+ EXPECT_EQ(element_node->leafAt(0)->type(), ::avro::AVRO_INT);
+ EXPECT_EQ(element_node->leafAt(1)->type(), ::avro::AVRO_STRING);
+}
+
+TEST_F(NameMappingAvroSchemaTest, ApplyNameMappingToUnion) {
+ // Create an Avro union schema without field IDs
+ std::string avro_schema_json = R"({
+ "type": "record",
+ "name": "test_record",
+ "fields": [
+ {"name": "id", "type": "int"},
+ {"name": "data", "type": ["null", "string"]}
+ ]
+ })";
+ auto avro_schema = ::avro::compileJsonSchemaFromString(avro_schema_json);
+
+ auto name_mapping = CreateUnionNameMapping();
+
+ auto result = MakeAvroNodeWithFieldIds(avro_schema.root(), *name_mapping);
+ ASSERT_THAT(result, IsOk());
+
+ const auto& new_node = *result;
+ EXPECT_EQ(new_node->type(), ::avro::AVRO_RECORD);
+ EXPECT_EQ(new_node->names(), 2);
+ EXPECT_EQ(new_node->leaves(), 2);
+
+ // Check that field IDs are properly applied to top-level fields
+ ASSERT_EQ(new_node->customAttributes(), 2);
+ ASSERT_NO_FATAL_FAILURE(CheckFieldIdAt(new_node, /*index=*/0,
/*field_id=*/1));
+ ASSERT_NO_FATAL_FAILURE(CheckFieldIdAt(new_node, /*index=*/1,
/*field_id=*/2));
+
+ // Check union field
+ const auto& union_node = new_node->leafAt(1);
+ EXPECT_EQ(union_node->type(), ::avro::AVRO_UNION);
+ EXPECT_EQ(union_node->leaves(), 2);
+
+ // Check that the non-null branch has field ID applied
+ const auto& non_null_branch = union_node->leafAt(1);
+ EXPECT_EQ(non_null_branch->type(), ::avro::AVRO_STRING);
+}
+
+TEST_F(NameMappingAvroSchemaTest, MissingFieldIdError) {
+ // Create a name mapping with missing field ID
+ std::vector<MappedField> fields;
+ fields.emplace_back(MappedField{.names = {"id"}, .field_id = 1});
+ fields.emplace_back(MappedField{.names = {"name"}}); // Missing field_id
+ auto name_mapping = NameMapping::Make(std::move(fields));
+
+ // Create a simple Avro record schema
+ std::string avro_schema_json = R"({
+ "type": "record",
+ "name": "test_record",
+ "fields": [
+ {"name": "id", "type": "int"},
+ {"name": "name", "type": "string"}
+ ]
+ })";
+ auto avro_schema = ::avro::compileJsonSchemaFromString(avro_schema_json);
+
+ auto result = MakeAvroNodeWithFieldIds(avro_schema.root(), *name_mapping);
+ ASSERT_THAT(result, IsError(ErrorKind::kInvalidSchema));
+ ASSERT_THAT(result,
+ HasErrorMessage("Field ID is missing for field 'name' in nested
mapping"));
+}
+
+TEST_F(NameMappingAvroSchemaTest, MissingFieldError) {
+ // Create a name mapping
+ auto name_mapping = CreateSimpleNameMapping();
+
+ // Create an Avro record schema with a field not in the mapping
+ std::string avro_schema_json = R"({
+ "type": "record",
+ "name": "test_record",
+ "fields": [
+ {"name": "id", "type": "int"},
+ {"name": "name", "type": "string"},
+ {"name": "unknown_field", "type": "int"}
+ ]
+ })";
+ auto avro_schema = ::avro::compileJsonSchemaFromString(avro_schema_json);
+
+ auto result = MakeAvroNodeWithFieldIds(avro_schema.root(), *name_mapping);
+ ASSERT_THAT(result, IsError(ErrorKind::kInvalidSchema));
+ ASSERT_THAT(result,
+ HasErrorMessage("Field 'unknown_field' not found in nested
mapping"));
+}
+
+TEST_F(NameMappingAvroSchemaTest, MissingArrayElementError) {
Review Comment:
Should we remove `MissingArrayElementError` and `MissingMapKeyValueError`?
They are actually testing the top-level fields instead of specific array or map
type. So I believe these cases are duplicates of the above one.
##########
test/avro_schema_test.cc:
##########
@@ -1057,4 +1059,454 @@ TEST(AvroSchemaProjectionTest,
ProjectDecimalIncompatible) {
ASSERT_THAT(projection_result, HasErrorMessage("Cannot read"));
}
+// NameMapping tests for Avro schema context
+class NameMappingAvroSchemaTest : public ::testing::Test {
+ protected:
+ // Helper function to create a simple name mapping
+ std::unique_ptr<NameMapping> CreateSimpleNameMapping() {
+ std::vector<MappedField> fields;
+ fields.emplace_back(MappedField{.names = {"id"}, .field_id = 1});
+ fields.emplace_back(MappedField{.names = {"name"}, .field_id = 2});
+ fields.emplace_back(MappedField{.names = {"age"}, .field_id = 3});
+ return NameMapping::Make(std::move(fields));
+ }
+
+ // Helper function to create a nested name mapping
+ std::unique_ptr<NameMapping> CreateNestedNameMapping() {
+ std::vector<MappedField> fields;
+ fields.emplace_back(MappedField{.names = {"id"}, .field_id = 1});
+
+ // Nested mapping for address
+ std::vector<MappedField> address_fields;
+ address_fields.emplace_back(MappedField{.names = {"street"}, .field_id =
10});
+ address_fields.emplace_back(MappedField{.names = {"city"}, .field_id =
11});
+ address_fields.emplace_back(MappedField{.names = {"zip"}, .field_id = 12});
+ auto address_mapping = MappedFields::Make(std::move(address_fields));
+
+ fields.emplace_back(MappedField{.names = {"address"},
+ .field_id = 2,
+ .nested_mapping =
std::move(address_mapping)});
+
+ return NameMapping::Make(std::move(fields));
+ }
+
+ // Helper function to create a name mapping for array types
+ std::unique_ptr<NameMapping> CreateArrayNameMapping() {
+ std::vector<MappedField> fields;
+ fields.emplace_back(MappedField{.names = {"id"}, .field_id = 1});
+
+ // Nested mapping for array element
+ std::vector<MappedField> element_fields;
+ element_fields.emplace_back(MappedField{.names = {"element"}, .field_id =
20});
+ auto element_mapping = MappedFields::Make(std::move(element_fields));
+
+ fields.emplace_back(MappedField{
+ .names = {"items"}, .field_id = 2, .nested_mapping =
std::move(element_mapping)});
+
+ return NameMapping::Make(std::move(fields));
+ }
+
+ // Helper function to create a name mapping for map types
+ std::unique_ptr<NameMapping> CreateMapNameMapping() {
+ std::vector<MappedField> fields;
+ fields.emplace_back(MappedField{.names = {"id"}, .field_id = 1});
+
+ // Nested mapping for map key-value
+ std::vector<MappedField> map_fields;
+ map_fields.emplace_back(MappedField{.names = {"key"}, .field_id = 30});
+ map_fields.emplace_back(MappedField{.names = {"value"}, .field_id = 31});
+ auto map_mapping = MappedFields::Make(std::move(map_fields));
+
+ fields.emplace_back(MappedField{.names = {"properties"},
+ .field_id = 2,
+ .nested_mapping = std::move(map_mapping)});
+
+ return NameMapping::Make(std::move(fields));
+ }
+
+ // Helper function to create a name mapping for union types
+ std::unique_ptr<NameMapping> CreateUnionNameMapping() {
+ std::vector<MappedField> fields;
+ fields.emplace_back(MappedField{.names = {"id"}, .field_id = 1});
+ fields.emplace_back(MappedField{.names = {"data"}, .field_id = 2});
+ return NameMapping::Make(std::move(fields));
+ }
+
+ // Helper function to create a name mapping for complex map types
+ // (array<struct<key,value>>)
+ std::unique_ptr<NameMapping> CreateComplexMapNameMapping() {
+ std::vector<MappedField> fields;
+ fields.emplace_back(MappedField{.names = {"id"}, .field_id = 1});
+
+ // Nested mapping for array element (struct<key,value>)
+ std::vector<MappedField> element_fields;
+ element_fields.emplace_back(MappedField{.names = {"key"}, .field_id = 40});
+ element_fields.emplace_back(MappedField{.names = {"value"}, .field_id =
41});
+ auto element_mapping = MappedFields::Make(std::move(element_fields));
+
+ // Nested mapping for array
+ std::vector<MappedField> array_fields;
+ array_fields.emplace_back(MappedField{.names = {"element"},
+ .field_id = 50,
+ .nested_mapping =
std::move(element_mapping)});
+ auto array_mapping = MappedFields::Make(std::move(array_fields));
+
+ fields.emplace_back(MappedField{
+ .names = {"entries"}, .field_id = 2, .nested_mapping =
std::move(array_mapping)});
+
+ return NameMapping::Make(std::move(fields));
+ }
+};
+
+TEST_F(NameMappingAvroSchemaTest, ApplyNameMappingToRecord) {
+ // Create a simple Avro record schema without field IDs
+ std::string avro_schema_json = R"({
+ "type": "record",
+ "name": "test_record",
+ "fields": [
+ {"name": "id", "type": "int"},
+ {"name": "name", "type": "string"},
+ {"name": "age", "type": "int"}
+ ]
+ })";
+ auto avro_schema = ::avro::compileJsonSchemaFromString(avro_schema_json);
+
+ auto name_mapping = CreateSimpleNameMapping();
+
+ auto result = MakeAvroNodeWithFieldIds(avro_schema.root(), *name_mapping);
+ ASSERT_THAT(result, IsOk());
+
+ const auto& node = *result;
+ EXPECT_EQ(node->type(), ::avro::AVRO_RECORD);
+ EXPECT_EQ(node->names(), 3);
+ EXPECT_EQ(node->leaves(), 3);
+
+ // Check that field IDs are properly applied
+ ASSERT_EQ(node->customAttributes(), 3);
+ ASSERT_NO_FATAL_FAILURE(CheckFieldIdAt(node, /*index=*/0, /*field_id=*/1));
+ ASSERT_NO_FATAL_FAILURE(CheckFieldIdAt(node, /*index=*/1, /*field_id=*/2));
+ ASSERT_NO_FATAL_FAILURE(CheckFieldIdAt(node, /*index=*/2, /*field_id=*/3));
+}
+
+TEST_F(NameMappingAvroSchemaTest, ApplyNameMappingToNestedRecord) {
+ // Create a nested Avro record schema without field IDs
+ std::string avro_schema_json = R"({
+ "type": "record",
+ "name": "test_record",
+ "fields": [
+ {"name": "id", "type": "int"},
+ {"name": "address", "type": {
+ "type": "record",
+ "name": "address",
+ "fields": [
+ {"name": "street", "type": "string"},
+ {"name": "city", "type": "string"},
+ {"name": "zip", "type": "string"}
+ ]
+ }}
+ ]
+ })";
+ auto avro_schema = ::avro::compileJsonSchemaFromString(avro_schema_json);
+
+ auto name_mapping = CreateNestedNameMapping();
+
+ auto result = MakeAvroNodeWithFieldIds(avro_schema.root(), *name_mapping);
+ ASSERT_THAT(result, IsOk());
+
+ const auto& node = *result;
+ EXPECT_EQ(node->type(), ::avro::AVRO_RECORD);
+ EXPECT_EQ(node->names(), 2);
+ EXPECT_EQ(node->leaves(), 2);
+
+ // Check that field IDs are properly applied to top-level fields
+ ASSERT_EQ(node->customAttributes(), 2);
+ ASSERT_NO_FATAL_FAILURE(CheckFieldIdAt(node, /*index=*/0, /*field_id=*/1));
+ ASSERT_NO_FATAL_FAILURE(CheckFieldIdAt(node, /*index=*/1, /*field_id=*/2));
+
+ // Check nested record
+ const auto& address_node = node->leafAt(1);
+ EXPECT_EQ(address_node->type(), ::avro::AVRO_RECORD);
+ EXPECT_EQ(address_node->names(), 3);
+ EXPECT_EQ(address_node->leaves(), 3);
+
+ // Check that field IDs are properly applied to nested fields
+ ASSERT_EQ(address_node->customAttributes(), 3);
+ ASSERT_NO_FATAL_FAILURE(CheckFieldIdAt(address_node, /*index=*/0,
/*field_id=*/10));
+ ASSERT_NO_FATAL_FAILURE(CheckFieldIdAt(address_node, /*index=*/1,
/*field_id=*/11));
+ ASSERT_NO_FATAL_FAILURE(CheckFieldIdAt(address_node, /*index=*/2,
/*field_id=*/12));
+}
+
+TEST_F(NameMappingAvroSchemaTest, ApplyNameMappingToArray) {
+ // Create an Avro array schema without field IDs
+ std::string avro_schema_json = R"({
+ "type": "record",
+ "name": "test_record",
+ "fields": [
+ {"name": "id", "type": "int"},
+ {"name": "items", "type": {
+ "type": "array",
+ "items": "string"
+ }}
+ ]
+ })";
+ auto avro_schema = ::avro::compileJsonSchemaFromString(avro_schema_json);
+
+ auto name_mapping = CreateArrayNameMapping();
+
+ auto result = MakeAvroNodeWithFieldIds(avro_schema.root(), *name_mapping);
+ ASSERT_THAT(result, IsOk());
+
+ const auto& new_node = *result;
+ EXPECT_EQ(new_node->type(), ::avro::AVRO_RECORD);
+ EXPECT_EQ(new_node->names(), 2);
+ EXPECT_EQ(new_node->leaves(), 2);
+
+ // Check that field IDs are properly applied to top-level fields
+ ASSERT_EQ(new_node->customAttributes(), 2);
+ ASSERT_NO_FATAL_FAILURE(CheckFieldIdAt(new_node, /*index=*/0,
/*field_id=*/1));
+ ASSERT_NO_FATAL_FAILURE(CheckFieldIdAt(new_node, /*index=*/1,
/*field_id=*/2));
+
+ // Check array field structure and element field ID
+ const auto& array_node = new_node->leafAt(1);
+ EXPECT_EQ(array_node->type(), ::avro::AVRO_ARRAY);
+ EXPECT_EQ(array_node->leaves(), 1);
+
+ // Check that array element has field ID applied
+ const auto& element_node = array_node->leafAt(0);
+ EXPECT_EQ(element_node->type(), ::avro::AVRO_STRING);
+ ASSERT_NO_FATAL_FAILURE(CheckFieldIdAt(array_node, /*index=*/0,
/*field_id=*/20));
+}
+
+TEST_F(NameMappingAvroSchemaTest, ApplyNameMappingToMap) {
+ // Create an Avro map schema without field IDs
+ std::string avro_schema_json = R"({
+ "type": "record",
+ "name": "test_record",
+ "fields": [
+ {"name": "id", "type": "int"},
+ {"name": "properties", "type": {
+ "type": "map",
+ "values": "string"
+ }}
+ ]
+ })";
+ auto avro_schema = ::avro::compileJsonSchemaFromString(avro_schema_json);
+
+ auto name_mapping = CreateMapNameMapping();
+
+ auto result = MakeAvroNodeWithFieldIds(avro_schema.root(), *name_mapping);
+ ASSERT_THAT(result, IsOk());
+
+ const auto& new_node = *result;
+ EXPECT_EQ(new_node->type(), ::avro::AVRO_RECORD);
+ EXPECT_EQ(new_node->names(), 2);
+ EXPECT_EQ(new_node->leaves(), 2);
+
+ // Check that field IDs are properly applied to top-level fields
+ ASSERT_EQ(new_node->customAttributes(), 2);
+ ASSERT_NO_FATAL_FAILURE(CheckFieldIdAt(new_node, /*index=*/0,
/*field_id=*/1));
+ ASSERT_NO_FATAL_FAILURE(CheckFieldIdAt(new_node, /*index=*/1,
/*field_id=*/2));
+
+ // Check map field structure and key-value field IDs
+ const auto& map_node = new_node->leafAt(1);
+ EXPECT_EQ(map_node->type(), ::avro::AVRO_MAP);
+ ASSERT_GE(map_node->leaves(), 2);
+ EXPECT_EQ(map_node->leafAt(0)->type(), ::avro::AVRO_STRING);
+ EXPECT_EQ(map_node->leafAt(1)->type(), ::avro::AVRO_STRING);
+ ASSERT_EQ(map_node->customAttributes(), 2);
+ ASSERT_NO_FATAL_FAILURE(CheckFieldIdAt(map_node, /*index=*/0,
/*field_id=*/30));
+ ASSERT_NO_FATAL_FAILURE(CheckFieldIdAt(map_node, /*index=*/1,
/*field_id=*/31));
+}
+
+TEST_F(NameMappingAvroSchemaTest, ApplyNameMappingToComplexMap) {
+ // Create an Avro schema for complex map (array<struct<key,value>>) without
field IDs
+ // This represents a map where key is not a string type
+ std::string avro_schema_json = R"({
+ "type": "record",
+ "name": "test_record",
+ "fields": [
+ {"name": "id", "type": "int"},
+ {"name": "entries", "type": {
+ "type": "array",
+ "items": {
+ "type": "record",
+ "name": "entry",
+ "fields": [
+ {"name": "key", "type": "int"},
+ {"name": "value", "type": "string"}
+ ]
+ }
+ }}
+ ]
+ })";
+ auto avro_schema = ::avro::compileJsonSchemaFromString(avro_schema_json);
+
+ auto name_mapping = CreateComplexMapNameMapping();
+
+ auto result = MakeAvroNodeWithFieldIds(avro_schema.root(), *name_mapping);
+ ASSERT_THAT(result, IsOk());
+
+ const auto& new_node = *result;
+ EXPECT_EQ(new_node->type(), ::avro::AVRO_RECORD);
+ EXPECT_EQ(new_node->names(), 2);
+ EXPECT_EQ(new_node->leaves(), 2);
+
+ // Check that field IDs are properly applied to top-level fields
+ ASSERT_EQ(new_node->customAttributes(), 2);
+ ASSERT_NO_FATAL_FAILURE(CheckFieldIdAt(new_node, /*index=*/0,
/*field_id=*/1));
+ ASSERT_NO_FATAL_FAILURE(CheckFieldIdAt(new_node, /*index=*/1,
/*field_id=*/2));
+
+ // Check array field structure (representing the map)
+ const auto& array_node = new_node->leafAt(1);
+ EXPECT_EQ(array_node->type(), ::avro::AVRO_ARRAY);
+ EXPECT_EQ(array_node->leaves(), 1);
+
+ // Check array element (struct<key,value>)
+ const auto& element_node = array_node->leafAt(0);
+ EXPECT_EQ(element_node->type(), ::avro::AVRO_RECORD);
+ EXPECT_EQ(element_node->names(), 2);
+ EXPECT_EQ(element_node->leaves(), 2);
+
+ // Check that field IDs are properly applied to struct fields
+ ASSERT_EQ(element_node->customAttributes(), 2);
+ ASSERT_NO_FATAL_FAILURE(CheckFieldIdAt(element_node, /*index=*/0,
/*field_id=*/40));
+ ASSERT_NO_FATAL_FAILURE(CheckFieldIdAt(element_node, /*index=*/1,
/*field_id=*/41));
+
+ // Check key and value types
+ EXPECT_EQ(element_node->leafAt(0)->type(), ::avro::AVRO_INT);
+ EXPECT_EQ(element_node->leafAt(1)->type(), ::avro::AVRO_STRING);
+}
+
+TEST_F(NameMappingAvroSchemaTest, ApplyNameMappingToUnion) {
+ // Create an Avro union schema without field IDs
+ std::string avro_schema_json = R"({
+ "type": "record",
+ "name": "test_record",
+ "fields": [
+ {"name": "id", "type": "int"},
+ {"name": "data", "type": ["null", "string"]}
+ ]
+ })";
+ auto avro_schema = ::avro::compileJsonSchemaFromString(avro_schema_json);
+
+ auto name_mapping = CreateUnionNameMapping();
+
+ auto result = MakeAvroNodeWithFieldIds(avro_schema.root(), *name_mapping);
+ ASSERT_THAT(result, IsOk());
+
+ const auto& new_node = *result;
+ EXPECT_EQ(new_node->type(), ::avro::AVRO_RECORD);
+ EXPECT_EQ(new_node->names(), 2);
+ EXPECT_EQ(new_node->leaves(), 2);
+
+ // Check that field IDs are properly applied to top-level fields
+ ASSERT_EQ(new_node->customAttributes(), 2);
+ ASSERT_NO_FATAL_FAILURE(CheckFieldIdAt(new_node, /*index=*/0,
/*field_id=*/1));
+ ASSERT_NO_FATAL_FAILURE(CheckFieldIdAt(new_node, /*index=*/1,
/*field_id=*/2));
+
+ // Check union field
+ const auto& union_node = new_node->leafAt(1);
+ EXPECT_EQ(union_node->type(), ::avro::AVRO_UNION);
+ EXPECT_EQ(union_node->leaves(), 2);
+
+ // Check that the non-null branch has field ID applied
Review Comment:
Remove this as it is confusing.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]