MisterRaindrop commented on code in PR #127: URL: https://github.com/apache/iceberg-cpp/pull/127#discussion_r2222798801
########## test/avro_schema_test.cc: ########## @@ -1057,4 +1059,366 @@ TEST(AvroSchemaProjectionTest, ProjectDecimalIncompatible) { ASSERT_THAT(projection_result, HasErrorMessage("Cannot read")); } +// NameMapping tests for Avro schema context +class NameMappingAvroSchemaTest : public ::testing::Test { + protected: + // Helper function to create a simple name mapping + std::unique_ptr<NameMapping> CreateSimpleNameMapping() { + std::vector<MappedField> fields; + fields.emplace_back(MappedField{.names = {"id"}, .field_id = 1}); + fields.emplace_back(MappedField{.names = {"name"}, .field_id = 2}); + fields.emplace_back(MappedField{.names = {"age"}, .field_id = 3}); + return NameMapping::Make(std::move(fields)); + } + + // Helper function to create a nested name mapping + std::unique_ptr<NameMapping> CreateNestedNameMapping() { + std::vector<MappedField> fields; + fields.emplace_back(MappedField{.names = {"id"}, .field_id = 1}); + + // Nested mapping for address + std::vector<MappedField> address_fields; + address_fields.emplace_back(MappedField{.names = {"street"}, .field_id = 10}); + address_fields.emplace_back(MappedField{.names = {"city"}, .field_id = 11}); + address_fields.emplace_back(MappedField{.names = {"zip"}, .field_id = 12}); + auto address_mapping = MappedFields::Make(std::move(address_fields)); + + fields.emplace_back(MappedField{.names = {"address"}, + .field_id = 2, + .nested_mapping = std::move(address_mapping)}); + + return NameMapping::Make(std::move(fields)); + } + + // Helper function to create a name mapping for array types + std::unique_ptr<NameMapping> CreateArrayNameMapping() { + std::vector<MappedField> fields; + fields.emplace_back(MappedField{.names = {"id"}, .field_id = 1}); + + // Nested mapping for array element + std::vector<MappedField> element_fields; + element_fields.emplace_back(MappedField{.names = {"element"}, .field_id = 20}); + auto element_mapping = MappedFields::Make(std::move(element_fields)); + + fields.emplace_back(MappedField{ + .names = {"items"}, .field_id = 2, .nested_mapping = std::move(element_mapping)}); + + return NameMapping::Make(std::move(fields)); + } + + // Helper function to create a name mapping for map types + std::unique_ptr<NameMapping> CreateMapNameMapping() { + std::vector<MappedField> fields; + fields.emplace_back(MappedField{.names = {"id"}, .field_id = 1}); + + // Nested mapping for map key-value + std::vector<MappedField> map_fields; + map_fields.emplace_back(MappedField{.names = {"key"}, .field_id = 30}); + map_fields.emplace_back(MappedField{.names = {"value"}, .field_id = 31}); + auto map_mapping = MappedFields::Make(std::move(map_fields)); + + fields.emplace_back(MappedField{.names = {"properties"}, + .field_id = 2, + .nested_mapping = std::move(map_mapping)}); + + return NameMapping::Make(std::move(fields)); + } + + // Helper function to create a name mapping for union types + std::unique_ptr<NameMapping> CreateUnionNameMapping() { + std::vector<MappedField> fields; + fields.emplace_back(MappedField{.names = {"id"}, .field_id = 1}); + fields.emplace_back(MappedField{.names = {"data"}, .field_id = 2}); + return NameMapping::Make(std::move(fields)); + } +}; + +TEST_F(NameMappingAvroSchemaTest, ApplyNameMappingToRecord) { + // Create a simple Avro record schema without field IDs + std::string avro_schema_json = R"({ + "type": "record", + "name": "test_record", + "fields": [ + {"name": "id", "type": "int"}, + {"name": "name", "type": "string"}, + {"name": "age", "type": "int"} + ] + })"; + auto avro_schema = ::avro::compileJsonSchemaFromString(avro_schema_json); + + auto name_mapping = CreateSimpleNameMapping(); + + auto result = CreateAvroNodeWithFieldIds(avro_schema.root(), *name_mapping); + ASSERT_THAT(result, IsOk()); + + const auto& node = *result; + EXPECT_EQ(node->type(), ::avro::AVRO_RECORD); + EXPECT_EQ(node->names(), 3); + EXPECT_EQ(node->leaves(), 3); + + // Check that field IDs are properly applied + ASSERT_EQ(node->customAttributes(), 3); + ASSERT_NO_FATAL_FAILURE(CheckFieldIdAt(node, /*index=*/0, /*field_id=*/1)); + ASSERT_NO_FATAL_FAILURE(CheckFieldIdAt(node, /*index=*/1, /*field_id=*/2)); + ASSERT_NO_FATAL_FAILURE(CheckFieldIdAt(node, /*index=*/2, /*field_id=*/3)); +} + +TEST_F(NameMappingAvroSchemaTest, ApplyNameMappingToNestedRecord) { + // Create a nested Avro record schema without field IDs + std::string avro_schema_json = R"({ + "type": "record", + "name": "test_record", + "fields": [ + {"name": "id", "type": "int"}, + {"name": "address", "type": { + "type": "record", + "name": "address", + "fields": [ + {"name": "street", "type": "string"}, + {"name": "city", "type": "string"}, + {"name": "zip", "type": "string"} + ] + }} + ] + })"; + auto avro_schema = ::avro::compileJsonSchemaFromString(avro_schema_json); + + auto name_mapping = CreateNestedNameMapping(); + + auto result = CreateAvroNodeWithFieldIds(avro_schema.root(), *name_mapping); + ASSERT_THAT(result, IsOk()); + + const auto& node = *result; + EXPECT_EQ(node->type(), ::avro::AVRO_RECORD); + EXPECT_EQ(node->names(), 2); + EXPECT_EQ(node->leaves(), 2); + + // Check that field IDs are properly applied to top-level fields + ASSERT_EQ(node->customAttributes(), 2); + ASSERT_NO_FATAL_FAILURE(CheckFieldIdAt(node, /*index=*/0, /*field_id=*/1)); + ASSERT_NO_FATAL_FAILURE(CheckFieldIdAt(node, /*index=*/1, /*field_id=*/2)); + + // Check nested record + const auto& address_node = node->leafAt(1); + EXPECT_EQ(address_node->type(), ::avro::AVRO_RECORD); + EXPECT_EQ(address_node->names(), 3); + EXPECT_EQ(address_node->leaves(), 3); + + // Check that field IDs are properly applied to nested fields + ASSERT_EQ(address_node->customAttributes(), 3); + ASSERT_NO_FATAL_FAILURE(CheckFieldIdAt(address_node, /*index=*/0, /*field_id=*/10)); + ASSERT_NO_FATAL_FAILURE(CheckFieldIdAt(address_node, /*index=*/1, /*field_id=*/11)); + ASSERT_NO_FATAL_FAILURE(CheckFieldIdAt(address_node, /*index=*/2, /*field_id=*/12)); +} + +TEST_F(NameMappingAvroSchemaTest, ApplyNameMappingToArray) { + // Create an Avro array schema without field IDs + std::string avro_schema_json = R"({ + "type": "record", + "name": "test_record", + "fields": [ + {"name": "id", "type": "int"}, + {"name": "items", "type": { + "type": "array", + "items": "string" + }} + ] + })"; + auto avro_schema = ::avro::compileJsonSchemaFromString(avro_schema_json); + + auto name_mapping = CreateArrayNameMapping(); + + auto result = CreateAvroNodeWithFieldIds(avro_schema.root(), *name_mapping); + ASSERT_THAT(result, IsOk()); + + const auto& new_node = *result; + EXPECT_EQ(new_node->type(), ::avro::AVRO_RECORD); + EXPECT_EQ(new_node->names(), 2); + EXPECT_EQ(new_node->leaves(), 2); + + // Check that field IDs are properly applied to top-level fields + ASSERT_EQ(new_node->customAttributes(), 2); + ASSERT_NO_FATAL_FAILURE(CheckFieldIdAt(new_node, /*index=*/0, /*field_id=*/1)); + ASSERT_NO_FATAL_FAILURE(CheckFieldIdAt(new_node, /*index=*/1, /*field_id=*/2)); + + // Check array field structure and element field ID + const auto& array_node = new_node->leafAt(1); + EXPECT_EQ(array_node->type(), ::avro::AVRO_ARRAY); + EXPECT_EQ(array_node->leaves(), 1); + + // Check that array element has field ID applied + const auto& element_node = array_node->leafAt(0); + EXPECT_EQ(element_node->type(), ::avro::AVRO_STRING); Review Comment: I don’t understand this test case. Do we need to test failure scenarios? `CheckFieldIdAt(array_node, /*index=*/0, /*field_id=*/20)` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org