This is an automated email from the ASF dual-hosted git repository.

mgrigorov pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/avro.git


The following commit(s) were added to refs/heads/main by this push:
     new 86fb29c77 AVRO-4058: [C++] Improve readability of SchemaTests  (#3167)
86fb29c77 is described below

commit 86fb29c7721b23f0e11a4f5d9592e1db41793f7b
Author: Pascal Ginter <[email protected]>
AuthorDate: Tue Sep 24 08:30:01 2024 +0200

    AVRO-4058: [C++] Improve readability of SchemaTests  (#3167)
    
    * Converted majority of escaped strings into raw string literals
    
    * Fixed undefined behavior in whitespace trimming
    
    ---------
    
    Co-authored-by: Pascal Ginter <[email protected]~>
---
 lang/c++/test/SchemaTests.cc | 401 +++++++++++++++++++++++++++----------------
 1 file changed, 253 insertions(+), 148 deletions(-)

diff --git a/lang/c++/test/SchemaTests.cc b/lang/c++/test/SchemaTests.cc
index 029be79d5..6f9c93d6d 100644
--- a/lang/c++/test/SchemaTests.cc
+++ b/lang/c++/test/SchemaTests.cc
@@ -20,6 +20,7 @@
 #include "GenericDatum.hh"
 #include "ValidSchema.hh"
 
+#include <boost/algorithm/string/replace.hpp>
 #include <boost/test/included/unit_test.hpp>
 #include <boost/test/parameterized_test.hpp>
 #include <boost/test/unit_test.hpp>
@@ -28,14 +29,14 @@ namespace avro {
 namespace schema {
 
 const char *basicSchemas[] = {
-    "\"null\"",
-    "\"boolean\"",
-    "\"int\"",
-    "\"long\"",
-    "\"float\"",
-    "\"double\"",
-    "\"bytes\"",
-    "\"string\"",
+    R"("null")",
+    R"("boolean")",
+    R"("int")",
+    R"("long")",
+    R"("float")",
+    R"("double")",
+    R"("bytes")",
+    R"("string")",
 
     // Primitive types - longer
     R"({ "type": "null" })",
@@ -48,50 +49,100 @@ const char *basicSchemas[] = {
     R"({ "type": "string" })",
 
     // Record
-    R"({"type":"record","name":"Test","doc":"Doc_string","fields":[]})",
-    "{\"type\":\"record\",\"name\":\"Test\",\"fields\":"
-    "[{\"name\":\"f\",\"type\":\"long\"}]}",
-    "{\"type\":\"record\",\"name\":\"Test\",\"fields\":"
-    "[{\"name\":\"f1\",\"type\":\"long\",\"doc\":\"field_doc\"},"
-    "{\"name\":\"f2\",\"type\":\"int\"}]}",
-    "{\"type\":\"error\",\"name\":\"Test\",\"fields\":"
-    "[{\"name\":\"f1\",\"type\":\"long\"},"
-    "{\"name\":\"f2\",\"type\":\"int\"}]}",
-
+    R"({
+        "type":"record",
+        "name":"Test",
+        "doc":"Doc_string",
+        "fields":[]
+    })",
+    R"({
+        "type":"record",
+        "name":"Test",
+        "fields": [
+            {"name":"f","type":"long"}
+        ]
+    })",
+    R"({
+        "type":"record",
+        "name":"Test",
+        "fields":[
+            {"name":"f1","type":"long","doc":"field_doc"},
+            {"name":"f2","type":"int"}
+        ]
+    })",
+    R"({
+        "type":"error",
+        "name":"Test",
+        "fields":[
+            {"name":"f1","type":"long"},
+            {"name":"f2","type":"int"}
+        ]
+    })",
     // Recursive.
-    "{\"type\":\"record\",\"name\":\"LongList\","
-    
"\"fields\":[{\"name\":\"value\",\"type\":\"long\",\"doc\":\"recursive_doc\"},"
-    "{\"name\":\"next\",\"type\":[\"LongList\",\"null\"]}]}",
+    R"({
+        "type":"record",
+        "name":"LongList",
+        "fields":[
+            {"name":"value","type":"long","doc":"recursive_doc"},
+            {"name":"next","type":["LongList","null"]}
+        ]
+    })",
+
     // Enum
-    R"({"type":"enum","doc":"enum_doc","name":"Test","symbols":["A","B"]})",
+    R"({
+        "type":"enum",
+        "doc":"enum_doc",
+        "name":"Test",
+        "symbols":["A","B"]
+    })",
 
     // Array
-    R"({"type":"array","doc":"array_doc","items":"long"})",
-    "{\"type\":\"array\",\"items\":{\"type\":\"enum\","
-    "\"name\":\"Test\",\"symbols\":[\"A\",\"B\"]}}",
+    R"({
+        "type":"array",
+        "doc":"array_doc",
+        "items":"long"
+    })",
+    R"({
+        "type":"array",
+        "items":{
+            "type":"enum",
+            "name":"Test",
+            "symbols":["A","B"]
+        }
+    })",
 
     // Map
     R"({"type":"map","doc":"map_doc","values":"long"})",
-    "{\"type\":\"map\",\"values\":{\"type\":\"enum\", "
-    "\"name\":\"Test\",\"symbols\":[\"A\",\"B\"]}}",
+    R"({
+        "type":"map",
+        "values":{
+            "type":"enum",
+            "name":"Test",
+            "symbols":["A","B"]
+        }
+    })",
 
     // Union
     R"(["string","null","long"])",
 
     // Fixed
     R"({"type":"fixed","doc":"fixed_doc","name":"Test","size":1})",
-    "{\"type\":\"fixed\",\"name\":\"MyFixed\","
-    "\"namespace\":\"org.apache.hadoop.avro\",\"size\":1}",
+    
R"({"type":"fixed","name":"MyFixed","namespace":"org.apache.hadoop.avro","size":1})",
     R"({"type":"fixed","name":"Test","size":1})",
     R"({"type":"fixed","name":"Test","size":1})",
 
     // Extra attributes (should be ignored)
     R"({"type": "null", "extra attribute": "should be ignored"})",
     R"({"type": "boolean", "extra1": 1, "extra2": 2, "extra3": 3})",
-    "{\"type\": \"record\",\"name\": \"Test\",\"fields\": "
-    "[{\"name\": \"f\",\"type\": \"long\"}], \"extra attribute\": 1}",
-    "{\"type\": \"enum\", \"name\": \"Test\", \"symbols\": [\"A\", \"B\"],"
-    "\"extra attribute\": 1}",
+    R"({
+        "type": "record",
+        "name": "Test",
+        "fields":[
+            {"name": "f","type":"long"}
+        ],
+        "extra attribute": 1
+    })",
+    R"({"type": "enum", "name": "Test", "symbols": ["A", "B"],"extra 
attribute": 1})",
     R"({"type": "array", "items": "long", "extra attribute": 1})",
     R"({"type": "map", "values": "long", "extra attribute": 1})",
     R"({"type": "fixed", "name": "Test", "size": 1, "extra attribute": 1})",
@@ -103,15 +154,31 @@ const char *basicSchemas[] = {
     R"({ "name":"test", "type": "record", "fields": [ {"name": 
"double","type": "double","default" : 1.2 }]})",
 
     // namespace with '$' in it.
-    "{\"type\":\"record\",\"name\":\"Test\",\"namespace\":\"a.b$\",\"fields\":"
-    "[{\"name\":\"f\",\"type\":\"long\"}]}",
+    R"({
+        "type":"record",
+        "name":"Test",
+        "namespace":"a.b$",
+        "fields":[
+            {"name":"f","type":"long"}
+        ]
+    })",
 
     // Custom attribute(s) for field in record
-    "{\"type\": \"record\",\"name\": \"Test\",\"fields\": "
-    "[{\"name\": \"f1\",\"type\": \"long\",\"extra field\": \"1\"}]}",
-    "{\"type\": \"record\",\"name\": \"Test\",\"fields\": "
-    "[{\"name\": \"f1\",\"type\": \"long\","
-    "\"extra field1\": \"1\",\"extra field2\": \"2\"}]}"};
+    R"({
+        "type": "record",
+        "name": "Test",
+        "fields":[
+            {"name": "f1","type": "long","extra field": "1"}
+        ]
+    })",
+    R"({
+        "type": "record",
+        "name": "Test",
+        "fields":[
+            {"name": "f1","type": "long","extra field1": "1","extra field2": 
"2"}
+        ]
+    })"
+};
 
 const char *basicSchemaErrors[] = {
     // Record
@@ -121,30 +188,33 @@ const char *basicSchemaErrors[] = {
     R"({"type":"record","name":"LongList", "fields": "hi"})",
 
     // Undefined name
-    "{\"type\":\"record\",\"name\":\"LongList\","
-    "\"fields\":[{\"name\":\"value\",\"type\":\"long\"},"
-    "{\"name\":\"next\",\"type\":[\"LongListA\",\"null\"]}]}",
+    R"({
+        "type":"record",
+        "name":"LongList",
+        "fields":[
+            {"name":"value","type":"long"},
+            {"name":"next","type":["LongListA","null"]}
+        ]
+    })",
 
     // Enum
     // Symbols not an array
-    "{\"type\": \"enum\", \"name\": \"Status\", \"symbols\": "
-    "\"Normal Caution Critical\"}",
+    R"({"type": "enum", "name": "Status", "symbols":"Normal Caution 
Critical"})",
     // Name not a string
-    "{\"type\": \"enum\", \"name\": [ 0, 1, 1, 2, 3, 5, 8 ], "
-    "\"symbols\": [\"Golden\", \"Mean\"]}",
+    R"({"type": "enum", "name": [ 0, 1, 1, 2, 3, 5, 8 ], "symbols": ["Golden", 
"Mean"]})",
     // No name
-    "{\"type\": \"enum\", \"symbols\" : [\"I\", \"will\", "
-    "\"fail\", \"no\", \"name\"]}",
+    R"({"type": "enum", "symbols" : ["I", "will", "fail", "no", "name"]})",
     // Duplicate symbol
-    "{\"type\": \"enum\", \"name\": \"Test\","
-    "\"symbols\" : [\"AA\", \"AA\"]}",
+    R"({"type": "enum", "name": "Test", "symbols" : ["AA", "AA"]})",
 
     // Union
     // Duplicate type
     R"(["string", "long", "long"])",
     // Duplicate type
-    "[{\"type\": \"array\", \"items\": \"long\"}, "
-    "{\"type\": \"array\", \"items\": \"string\"}]",
+    R"([
+        {"type": "array", "items": "long"},
+        {"type": "array", "items": "string"}
+    ])",
 
     // Fixed
     // No size
@@ -161,50 +231,85 @@ const char *basicSchemaErrors[] = {
 };
 
 const char *roundTripSchemas[] = {
-    "\"null\"",
-    "\"boolean\"",
-    "\"int\"",
-    "\"long\"",
-    "\"float\"",
-    "\"double\"",
-    "\"bytes\"",
-    "\"string\"",
+    R"("null")",
+    R"("boolean")",
+    R"("int")",
+    R"("long")",
+    R"("float")",
+    R"("double")",
+    R"("bytes")",
+    R"("string")",
+
     // Record
     R"({"type":"record","name":"Test","fields":[]})",
-    "{\"type\":\"record\",\"name\":\"Test\",\"fields\":"
-    "[{\"name\":\"f\",\"type\":\"long\"}]}",
-    "{\"type\":\"record\",\"name\":\"Test\",\"fields\":"
-    "[{\"name\":\"f1\",\"type\":\"long\"},"
-    "{\"name\":\"f2\",\"type\":\"int\"}]}",
+    R"({
+        "type":"record",
+        "name":"Test",
+        "fields":[
+            {"name":"f","type":"long"}
+        ]
+    })",
+    R"({
+        "type":"record",
+        "name":"Test",
+        "fields":[
+            {"name":"f1","type":"long"},
+            {"name":"f2","type":"int"}
+        ]
+    })",
+
     /* Avro-C++ cannot do a round-trip on error schemas.
- * "{\"type\":\"error\",\"name\":\"Test\",\"fields\":"
- *       "[{\"name\":\"f1\",\"type\":\"long\"},"
- *       "{\"name\":\"f2\",\"type\":\"int\"}]}"
- */
+     * R"({
+     *      "type":"error",
+     *      "name":"Test",
+     *      "fields":[
+     *          {"name":"f1","type":"long"},
+     *          {"name":"f2","type":"int"}
+     *          ]
+     * })",
+     */
+
     // Recursive.
-    "{\"type\":\"record\",\"name\":\"LongList\","
-    "\"fields\":[{\"name\":\"value\",\"type\":\"long\"},"
-    "{\"name\":\"next\",\"type\":[\"LongList\",\"null\"]}]}",
+    R"({
+        "type":"record",
+        "name":"LongList",
+        "fields":[
+            {"name":"value","type":"long"},
+            {"name":"next","type":["LongList","null"]}
+        ]
+    })",
+
     // Enum
     R"({"type":"enum","name":"Test","symbols":["A","B"]})",
 
     // Array
     R"({"type":"array","items":"long"})",
-    "{\"type\":\"array\",\"items\":{\"type\":\"enum\","
-    "\"name\":\"Test\",\"symbols\":[\"A\",\"B\"]}}",
+    R"({
+        "type":"array",
+        "items":{
+            "type":"enum",
+            "name":"Test",
+            "symbols":["A","B"]
+        }
+    })",
 
     // Map
     R"({"type":"map","values":"long"})",
-    "{\"type\":\"map\",\"values\":{\"type\":\"enum\","
-    "\"name\":\"Test\",\"symbols\":[\"A\",\"B\"]}}",
+    R"({
+        "type":"map",
+        "values":{
+            "type":"enum",
+            "name":"Test",
+            "symbols":["A","B"]
+        }
+    })",
 
     // Union
     R"(["string","null","long"])",
 
     // Fixed
     R"({"type":"fixed","name":"Test","size":1})",
-    "{\"type\":\"fixed\",\"namespace\":\"org.apache.hadoop.avro\","
-    "\"name\":\"MyFixed\",\"size\":1}",
+    
R"({"type":"fixed","namespace":"org.apache.hadoop.avro","name":"MyFixed","size":1})",
     R"({"type":"fixed","name":"Test","size":1})",
     R"({"type":"fixed","name":"Test","size":1})",
 
@@ -225,17 +330,32 @@ const char *roundTripSchemas[] = {
     R"({"type":"string","logicalType":"uuid"})",
 
     // namespace with '$' in it.
-    "{\"type\":\"record\",\"namespace\":\"a.b$\",\"name\":\"Test\",\"fields\":"
-    "[{\"name\":\"f\",\"type\":\"long\"}]}",
+    R"({
+        "type":"record",
+        "namespace":"a.b$",
+        "name":"Test",
+        "fields":[
+            {"name":"f","type":"long"}
+        ]
+    })",
 
     // Custom fields
-    "{\"type\":\"record\",\"name\":\"Test\",\"fields\":"
-    "[{\"name\":\"f1\",\"type\":\"long\",\"extra_field\":\"1\"},"
-    "{\"name\":\"f2\",\"type\":\"int\"}]}",
-    "{\"type\":\"record\",\"name\":\"Test\",\"fields\":"
-    "[{\"name\":\"f1\",\"type\":\"long\",\"extra_field\":\"1\"},"
-    "{\"name\":\"f2\",\"type\":\"int\","
-    "\"extra_field1\":\"21\",\"extra_field2\":\"22\"}]}",
+    R"({
+        "type":"record",
+        "name":"Test",
+        "fields":[
+            {"name":"f1","type":"long","extra_field":"1"},
+            {"name":"f2","type":"int"}
+        ]
+    })",
+    R"({
+        "type":"record",
+        "name":"Test",
+        "fields":[
+            {"name":"f1","type":"long","extra_field":"1"},
+            {"name":"f2","type":"int","extra_field1":"21","extra_field2":"22"}
+        ]
+    })"
 };
 
 const char *malformedLogicalTypes[] = {
@@ -258,7 +378,9 @@ const char *malformedLogicalTypes[] = {
     
R"({"type":"fixed","logicalType":"decimal","size":4,"name":"a","precision":20})",
     
R"({"type":"fixed","logicalType":"decimal","size":129,"name":"a","precision":311})",
     // Scale is larger than precision.
-    R"({"type":"bytes","logicalType":"decimal","precision":5,"scale":10})"};
+    R"({"type":"bytes","logicalType":"decimal","precision":5,"scale":10})"
+};
+
 const char *schemasToCompact[] = {
     // Schema without any whitespace
     R"({"type":"record","name":"Test","fields":[]})",
@@ -284,7 +406,18 @@ const char *compactSchemas[] = {
     "\"fields\":["
     "{\"name\":\"re1\",\"type\":\"long\",\"doc\":\"A \\\"quoted doc\\\"\"},"
     "{\"name\":\"re2\",\"type\":\"long\",\"doc\":\"extra slashes\\\\\\\\\"}"
-    "]}"};
+    "]}"
+};
+
+static const std::vector<char> whitespaces = {' ', '\f', '\n', '\r', '\t', 
'\v'};
+
+static std::string removeWhitespaceFromSchema(const std::string& schema){
+    std::string trimmedSchema = schema;
+    for (char toReplace : whitespaces){
+        boost::algorithm::replace_all(trimmedSchema, std::string{toReplace}, 
"");
+    }
+    return trimmedSchema;
+}
 
 void testTypes() {
     BOOST_CHECK_EQUAL(isAvroType(AVRO_BOOL), true);
@@ -313,13 +446,13 @@ static void testRoundTrip(const char *schema) {
         compileJsonSchemaFromString(std::string(schema));
     std::ostringstream os;
     compiledSchema.toJson(os);
-    std::string result = os.str();
-    result.erase(std::remove_if(result.begin(), result.end(), ::isspace), 
result.end()); // Remove whitespace
-    BOOST_CHECK(result == std::string(schema));
+    std::string result = removeWhitespaceFromSchema(os.str());
+    std::string trimmedSchema = removeWhitespaceFromSchema(schema);
+    BOOST_CHECK(result == trimmedSchema);
     // Verify that the compact schema from toJson has the same content as the
     // schema.
     std::string result2 = compiledSchema.toJson(false);
-    BOOST_CHECK(result2 == std::string(schema));
+    BOOST_CHECK(result2 == trimmedSchema);
 }
 
 static void testCompactSchemas() {
@@ -335,61 +468,33 @@ static void testCompactSchemas() {
 }
 
 static void testLogicalTypes() {
-    const char *bytesDecimalType = "{\n\
-        \"type\": \"bytes\",\n\
-        \"logicalType\": \"decimal\",\n\
-        \"precision\": 10,\n\
-        \"scale\": 2\n\
-    }";
-    const char *fixedDecimalType = "{\n\
-        \"type\": \"fixed\",\n\
-        \"size\": 16,\n\
-        \"name\": \"fixedDecimalType\",\n\
-        \"logicalType\": \"decimal\",\n\
-        \"precision\": 12,\n\
-        \"scale\": 6\n\
-    }";
-    const char *dateType = "{\n\
-        \"type\": \"int\", \"logicalType\": \"date\"\n\
-    }";
-    const char *timeMillisType = "{\n\
-        \"type\": \"int\", \"logicalType\": \"time-millis\"\n\
-    }";
-    const char *timeMicrosType = "{\n\
-        \"type\": \"long\", \"logicalType\": \"time-micros\"\n\
-    }";
-    const char *timestampMillisType = "{\n\
-        \"type\": \"long\", \"logicalType\": \"timestamp-millis\"\n\
-    }";
-    const char *timestampMicrosType = "{\n\
-        \"type\": \"long\", \"logicalType\": \"timestamp-micros\"\n\
-    }";
-    const char *timestampNanosType = "{\n\
-        \"type\": \"long\", \"logicalType\": \"timestamp-nanos\"\n\
-    }";
-    const char *localTimestampMillisType = "{\n\
-        \"type\": \"long\", \"logicalType\": \"local-timestamp-millis\"\n\
-    }";
-    const char *localTimestampMicrosType = "{\n\
-        \"type\": \"long\", \"logicalType\": \"local-timestamp-micros\"\n\
-    }";
-    const char *localTimestampNanosType = "{\n\
-        \"type\": \"long\", \"logicalType\": \"local-timestamp-nanos\"\n\
-    }";
-    const char *durationType = "{\n\
-        \"type\": \"fixed\",\n\
-        \"size\": 12,\n\
-        \"name\": \"durationType\",\n\
-        \"logicalType\": \"duration\"\n\
-    }";
-    const char *uuidType = "{\n\
-        \"type\": \"string\",\n\
-        \"logicalType\": \"uuid\"\n\
-    }";
+    const char *bytesDecimalType = R"({
+        "type": "bytes",
+        "logicalType": "decimal",
+        "precision": 10,
+        "scale": 2
+    })";
+    const char *fixedDecimalType = R"({
+        "type": "fixed",
+        "size": 16,
+        "name": "fixedDecimalType",
+        "logicalType": "decimal",
+        "precision": 12,
+        "scale": 6
+    })";
+    const char *dateType = R"({"type": "int", "logicalType": "date"})";
+    const char *timeMillisType = R"({"type": "int", "logicalType": 
"time-millis"})";
+    const char *timeMicrosType = R"({"type": "long", "logicalType": 
"time-micros"})";
+    const char *timestampMillisType = R"({"type": "long", "logicalType": 
"timestamp-millis"})";
+    const char *timestampMicrosType = R"({"type": "long", "logicalType": 
"timestamp-micros"})";
+    const char *timestampNanosType = R"({"type": "long", "logicalType": 
"timestamp-nanos"})";
+    const char *localTimestampMillisType = R"({"type": "long", "logicalType": 
"local-timestamp-millis"})";
+    const char *localTimestampMicrosType = R"({"type": "long", "logicalType": 
"local-timestamp-micros"})";
+    const char *localTimestampNanosType = R"({"type": "long", "logicalType": 
"local-timestamp-nanos"})";
+    const char *durationType = R"({"type": "fixed","size": 12,"name": 
"durationType","logicalType": "duration"})";
+    const char *uuidType = R"({"type": "string","logicalType": "uuid"})";
     // AVRO-2923 Union with LogicalType
-    const char *unionType = "[\n\
-        {\"type\":\"string\", \"logicalType\":\"uuid\"},\"null\"\n\
-    ]";
+    const char *unionType = R"([{"type":"string", 
"logicalType":"uuid"},"null"]})";
     {
         BOOST_TEST_CHECKPOINT(bytesDecimalType);
         ValidSchema schema1 = compileJsonSchemaFromString(bytesDecimalType);

Reply via email to