wgtmac commented on code in PR #127: URL: https://github.com/apache/iceberg-cpp/pull/127#discussion_r2223156940
########## test/avro_schema_test.cc: ########## @@ -1057,4 +1059,366 @@ TEST(AvroSchemaProjectionTest, ProjectDecimalIncompatible) { ASSERT_THAT(projection_result, HasErrorMessage("Cannot read")); } +// NameMapping tests for Avro schema context +class NameMappingAvroSchemaTest : public ::testing::Test { + protected: + // Helper function to create a simple name mapping + std::unique_ptr<NameMapping> CreateSimpleNameMapping() { + std::vector<MappedField> fields; + fields.emplace_back(MappedField{.names = {"id"}, .field_id = 1}); + fields.emplace_back(MappedField{.names = {"name"}, .field_id = 2}); + fields.emplace_back(MappedField{.names = {"age"}, .field_id = 3}); + return NameMapping::Make(std::move(fields)); + } + + // Helper function to create a nested name mapping + std::unique_ptr<NameMapping> CreateNestedNameMapping() { + std::vector<MappedField> fields; + fields.emplace_back(MappedField{.names = {"id"}, .field_id = 1}); + + // Nested mapping for address + std::vector<MappedField> address_fields; + address_fields.emplace_back(MappedField{.names = {"street"}, .field_id = 10}); + address_fields.emplace_back(MappedField{.names = {"city"}, .field_id = 11}); + address_fields.emplace_back(MappedField{.names = {"zip"}, .field_id = 12}); + auto address_mapping = MappedFields::Make(std::move(address_fields)); + + fields.emplace_back(MappedField{.names = {"address"}, + .field_id = 2, + .nested_mapping = std::move(address_mapping)}); + + return NameMapping::Make(std::move(fields)); + } + + // Helper function to create a name mapping for array types + std::unique_ptr<NameMapping> CreateArrayNameMapping() { + std::vector<MappedField> fields; + fields.emplace_back(MappedField{.names = {"id"}, .field_id = 1}); + + // Nested mapping for array element + std::vector<MappedField> element_fields; + element_fields.emplace_back(MappedField{.names = {"element"}, .field_id = 20}); + auto element_mapping = MappedFields::Make(std::move(element_fields)); + + fields.emplace_back(MappedField{ + .names = {"items"}, .field_id = 2, .nested_mapping = std::move(element_mapping)}); + + return NameMapping::Make(std::move(fields)); + } + + // Helper function to create a name mapping for map types + std::unique_ptr<NameMapping> CreateMapNameMapping() { + std::vector<MappedField> fields; + fields.emplace_back(MappedField{.names = {"id"}, .field_id = 1}); + + // Nested mapping for map key-value + std::vector<MappedField> map_fields; + map_fields.emplace_back(MappedField{.names = {"key"}, .field_id = 30}); + map_fields.emplace_back(MappedField{.names = {"value"}, .field_id = 31}); + auto map_mapping = MappedFields::Make(std::move(map_fields)); + + fields.emplace_back(MappedField{.names = {"properties"}, + .field_id = 2, + .nested_mapping = std::move(map_mapping)}); + + return NameMapping::Make(std::move(fields)); + } + + // Helper function to create a name mapping for union types + std::unique_ptr<NameMapping> CreateUnionNameMapping() { + std::vector<MappedField> fields; + fields.emplace_back(MappedField{.names = {"id"}, .field_id = 1}); + fields.emplace_back(MappedField{.names = {"data"}, .field_id = 2}); + return NameMapping::Make(std::move(fields)); + } +}; + +TEST_F(NameMappingAvroSchemaTest, ApplyNameMappingToRecord) { + // Create a simple Avro record schema without field IDs + std::string avro_schema_json = R"({ + "type": "record", + "name": "test_record", + "fields": [ + {"name": "id", "type": "int"}, + {"name": "name", "type": "string"}, + {"name": "age", "type": "int"} + ] + })"; + auto avro_schema = ::avro::compileJsonSchemaFromString(avro_schema_json); + + auto name_mapping = CreateSimpleNameMapping(); + + auto result = CreateAvroNodeWithFieldIds(avro_schema.root(), *name_mapping); + ASSERT_THAT(result, IsOk()); + + const auto& node = *result; + EXPECT_EQ(node->type(), ::avro::AVRO_RECORD); + EXPECT_EQ(node->names(), 3); + EXPECT_EQ(node->leaves(), 3); + + // Check that field IDs are properly applied + ASSERT_EQ(node->customAttributes(), 3); + ASSERT_NO_FATAL_FAILURE(CheckFieldIdAt(node, /*index=*/0, /*field_id=*/1)); + ASSERT_NO_FATAL_FAILURE(CheckFieldIdAt(node, /*index=*/1, /*field_id=*/2)); + ASSERT_NO_FATAL_FAILURE(CheckFieldIdAt(node, /*index=*/2, /*field_id=*/3)); +} + +TEST_F(NameMappingAvroSchemaTest, ApplyNameMappingToNestedRecord) { + // Create a nested Avro record schema without field IDs + std::string avro_schema_json = R"({ + "type": "record", + "name": "test_record", + "fields": [ + {"name": "id", "type": "int"}, + {"name": "address", "type": { + "type": "record", + "name": "address", + "fields": [ + {"name": "street", "type": "string"}, + {"name": "city", "type": "string"}, + {"name": "zip", "type": "string"} + ] + }} + ] + })"; + auto avro_schema = ::avro::compileJsonSchemaFromString(avro_schema_json); + + auto name_mapping = CreateNestedNameMapping(); + + auto result = CreateAvroNodeWithFieldIds(avro_schema.root(), *name_mapping); + ASSERT_THAT(result, IsOk()); + + const auto& node = *result; + EXPECT_EQ(node->type(), ::avro::AVRO_RECORD); + EXPECT_EQ(node->names(), 2); + EXPECT_EQ(node->leaves(), 2); + + // Check that field IDs are properly applied to top-level fields + ASSERT_EQ(node->customAttributes(), 2); + ASSERT_NO_FATAL_FAILURE(CheckFieldIdAt(node, /*index=*/0, /*field_id=*/1)); + ASSERT_NO_FATAL_FAILURE(CheckFieldIdAt(node, /*index=*/1, /*field_id=*/2)); + + // Check nested record + const auto& address_node = node->leafAt(1); + EXPECT_EQ(address_node->type(), ::avro::AVRO_RECORD); + EXPECT_EQ(address_node->names(), 3); + EXPECT_EQ(address_node->leaves(), 3); + + // Check that field IDs are properly applied to nested fields + ASSERT_EQ(address_node->customAttributes(), 3); + ASSERT_NO_FATAL_FAILURE(CheckFieldIdAt(address_node, /*index=*/0, /*field_id=*/10)); + ASSERT_NO_FATAL_FAILURE(CheckFieldIdAt(address_node, /*index=*/1, /*field_id=*/11)); + ASSERT_NO_FATAL_FAILURE(CheckFieldIdAt(address_node, /*index=*/2, /*field_id=*/12)); +} + +TEST_F(NameMappingAvroSchemaTest, ApplyNameMappingToArray) { + // Create an Avro array schema without field IDs + std::string avro_schema_json = R"({ + "type": "record", + "name": "test_record", + "fields": [ + {"name": "id", "type": "int"}, + {"name": "items", "type": { + "type": "array", + "items": "string" + }} + ] + })"; + auto avro_schema = ::avro::compileJsonSchemaFromString(avro_schema_json); + + auto name_mapping = CreateArrayNameMapping(); + + auto result = CreateAvroNodeWithFieldIds(avro_schema.root(), *name_mapping); + ASSERT_THAT(result, IsOk()); + + const auto& new_node = *result; + EXPECT_EQ(new_node->type(), ::avro::AVRO_RECORD); + EXPECT_EQ(new_node->names(), 2); + EXPECT_EQ(new_node->leaves(), 2); + + // Check that field IDs are properly applied to top-level fields + ASSERT_EQ(new_node->customAttributes(), 2); + ASSERT_NO_FATAL_FAILURE(CheckFieldIdAt(new_node, /*index=*/0, /*field_id=*/1)); + ASSERT_NO_FATAL_FAILURE(CheckFieldIdAt(new_node, /*index=*/1, /*field_id=*/2)); + + // Check array field structure and element field ID + const auto& array_node = new_node->leafAt(1); + EXPECT_EQ(array_node->type(), ::avro::AVRO_ARRAY); + EXPECT_EQ(array_node->leaves(), 1); + + // Check that array element has field ID applied + const auto& element_node = array_node->leafAt(0); + EXPECT_EQ(element_node->type(), ::avro::AVRO_STRING); Review Comment: No, it should succeed. Otherwise this PR is incomplete because child fields of map and list types do not have correct field ids. I think I have left some comments like https://github.com/apache/iceberg-cpp/pull/127/files#r2203809525 to address this but they are still unresolved. That's why I have asked for completely passing test cases again and again during the review. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org