This is an automated email from the ASF dual-hosted git repository.
mgrigorov pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/avro.git
The following commit(s) were added to refs/heads/main by this push:
new 86fb29c77 AVRO-4058: [C++] Improve readability of SchemaTests (#3167)
86fb29c77 is described below
commit 86fb29c7721b23f0e11a4f5d9592e1db41793f7b
Author: Pascal Ginter <[email protected]>
AuthorDate: Tue Sep 24 08:30:01 2024 +0200
AVRO-4058: [C++] Improve readability of SchemaTests (#3167)
* Converted majority of escaped strings into raw string literals
* Fixed undefined behavior in whitespace trimming
---------
Co-authored-by: Pascal Ginter <[email protected]~>
---
lang/c++/test/SchemaTests.cc | 401 +++++++++++++++++++++++++++----------------
1 file changed, 253 insertions(+), 148 deletions(-)
diff --git a/lang/c++/test/SchemaTests.cc b/lang/c++/test/SchemaTests.cc
index 029be79d5..6f9c93d6d 100644
--- a/lang/c++/test/SchemaTests.cc
+++ b/lang/c++/test/SchemaTests.cc
@@ -20,6 +20,7 @@
#include "GenericDatum.hh"
#include "ValidSchema.hh"
+#include <boost/algorithm/string/replace.hpp>
#include <boost/test/included/unit_test.hpp>
#include <boost/test/parameterized_test.hpp>
#include <boost/test/unit_test.hpp>
@@ -28,14 +29,14 @@ namespace avro {
namespace schema {
const char *basicSchemas[] = {
- "\"null\"",
- "\"boolean\"",
- "\"int\"",
- "\"long\"",
- "\"float\"",
- "\"double\"",
- "\"bytes\"",
- "\"string\"",
+ R"("null")",
+ R"("boolean")",
+ R"("int")",
+ R"("long")",
+ R"("float")",
+ R"("double")",
+ R"("bytes")",
+ R"("string")",
// Primitive types - longer
R"({ "type": "null" })",
@@ -48,50 +49,100 @@ const char *basicSchemas[] = {
R"({ "type": "string" })",
// Record
- R"({"type":"record","name":"Test","doc":"Doc_string","fields":[]})",
- "{\"type\":\"record\",\"name\":\"Test\",\"fields\":"
- "[{\"name\":\"f\",\"type\":\"long\"}]}",
- "{\"type\":\"record\",\"name\":\"Test\",\"fields\":"
- "[{\"name\":\"f1\",\"type\":\"long\",\"doc\":\"field_doc\"},"
- "{\"name\":\"f2\",\"type\":\"int\"}]}",
- "{\"type\":\"error\",\"name\":\"Test\",\"fields\":"
- "[{\"name\":\"f1\",\"type\":\"long\"},"
- "{\"name\":\"f2\",\"type\":\"int\"}]}",
-
+ R"({
+ "type":"record",
+ "name":"Test",
+ "doc":"Doc_string",
+ "fields":[]
+ })",
+ R"({
+ "type":"record",
+ "name":"Test",
+ "fields": [
+ {"name":"f","type":"long"}
+ ]
+ })",
+ R"({
+ "type":"record",
+ "name":"Test",
+ "fields":[
+ {"name":"f1","type":"long","doc":"field_doc"},
+ {"name":"f2","type":"int"}
+ ]
+ })",
+ R"({
+ "type":"error",
+ "name":"Test",
+ "fields":[
+ {"name":"f1","type":"long"},
+ {"name":"f2","type":"int"}
+ ]
+ })",
// Recursive.
- "{\"type\":\"record\",\"name\":\"LongList\","
-
"\"fields\":[{\"name\":\"value\",\"type\":\"long\",\"doc\":\"recursive_doc\"},"
- "{\"name\":\"next\",\"type\":[\"LongList\",\"null\"]}]}",
+ R"({
+ "type":"record",
+ "name":"LongList",
+ "fields":[
+ {"name":"value","type":"long","doc":"recursive_doc"},
+ {"name":"next","type":["LongList","null"]}
+ ]
+ })",
+
// Enum
- R"({"type":"enum","doc":"enum_doc","name":"Test","symbols":["A","B"]})",
+ R"({
+ "type":"enum",
+ "doc":"enum_doc",
+ "name":"Test",
+ "symbols":["A","B"]
+ })",
// Array
- R"({"type":"array","doc":"array_doc","items":"long"})",
- "{\"type\":\"array\",\"items\":{\"type\":\"enum\","
- "\"name\":\"Test\",\"symbols\":[\"A\",\"B\"]}}",
+ R"({
+ "type":"array",
+ "doc":"array_doc",
+ "items":"long"
+ })",
+ R"({
+ "type":"array",
+ "items":{
+ "type":"enum",
+ "name":"Test",
+ "symbols":["A","B"]
+ }
+ })",
// Map
R"({"type":"map","doc":"map_doc","values":"long"})",
- "{\"type\":\"map\",\"values\":{\"type\":\"enum\", "
- "\"name\":\"Test\",\"symbols\":[\"A\",\"B\"]}}",
+ R"({
+ "type":"map",
+ "values":{
+ "type":"enum",
+ "name":"Test",
+ "symbols":["A","B"]
+ }
+ })",
// Union
R"(["string","null","long"])",
// Fixed
R"({"type":"fixed","doc":"fixed_doc","name":"Test","size":1})",
- "{\"type\":\"fixed\",\"name\":\"MyFixed\","
- "\"namespace\":\"org.apache.hadoop.avro\",\"size\":1}",
+
R"({"type":"fixed","name":"MyFixed","namespace":"org.apache.hadoop.avro","size":1})",
R"({"type":"fixed","name":"Test","size":1})",
R"({"type":"fixed","name":"Test","size":1})",
// Extra attributes (should be ignored)
R"({"type": "null", "extra attribute": "should be ignored"})",
R"({"type": "boolean", "extra1": 1, "extra2": 2, "extra3": 3})",
- "{\"type\": \"record\",\"name\": \"Test\",\"fields\": "
- "[{\"name\": \"f\",\"type\": \"long\"}], \"extra attribute\": 1}",
- "{\"type\": \"enum\", \"name\": \"Test\", \"symbols\": [\"A\", \"B\"],"
- "\"extra attribute\": 1}",
+ R"({
+ "type": "record",
+ "name": "Test",
+ "fields":[
+ {"name": "f","type":"long"}
+ ],
+ "extra attribute": 1
+ })",
+ R"({"type": "enum", "name": "Test", "symbols": ["A", "B"],"extra
attribute": 1})",
R"({"type": "array", "items": "long", "extra attribute": 1})",
R"({"type": "map", "values": "long", "extra attribute": 1})",
R"({"type": "fixed", "name": "Test", "size": 1, "extra attribute": 1})",
@@ -103,15 +154,31 @@ const char *basicSchemas[] = {
R"({ "name":"test", "type": "record", "fields": [ {"name":
"double","type": "double","default" : 1.2 }]})",
// namespace with '$' in it.
- "{\"type\":\"record\",\"name\":\"Test\",\"namespace\":\"a.b$\",\"fields\":"
- "[{\"name\":\"f\",\"type\":\"long\"}]}",
+ R"({
+ "type":"record",
+ "name":"Test",
+ "namespace":"a.b$",
+ "fields":[
+ {"name":"f","type":"long"}
+ ]
+ })",
// Custom attribute(s) for field in record
- "{\"type\": \"record\",\"name\": \"Test\",\"fields\": "
- "[{\"name\": \"f1\",\"type\": \"long\",\"extra field\": \"1\"}]}",
- "{\"type\": \"record\",\"name\": \"Test\",\"fields\": "
- "[{\"name\": \"f1\",\"type\": \"long\","
- "\"extra field1\": \"1\",\"extra field2\": \"2\"}]}"};
+ R"({
+ "type": "record",
+ "name": "Test",
+ "fields":[
+ {"name": "f1","type": "long","extra field": "1"}
+ ]
+ })",
+ R"({
+ "type": "record",
+ "name": "Test",
+ "fields":[
+ {"name": "f1","type": "long","extra field1": "1","extra field2":
"2"}
+ ]
+ })"
+};
const char *basicSchemaErrors[] = {
// Record
@@ -121,30 +188,33 @@ const char *basicSchemaErrors[] = {
R"({"type":"record","name":"LongList", "fields": "hi"})",
// Undefined name
- "{\"type\":\"record\",\"name\":\"LongList\","
- "\"fields\":[{\"name\":\"value\",\"type\":\"long\"},"
- "{\"name\":\"next\",\"type\":[\"LongListA\",\"null\"]}]}",
+ R"({
+ "type":"record",
+ "name":"LongList",
+ "fields":[
+ {"name":"value","type":"long"},
+ {"name":"next","type":["LongListA","null"]}
+ ]
+ })",
// Enum
// Symbols not an array
- "{\"type\": \"enum\", \"name\": \"Status\", \"symbols\": "
- "\"Normal Caution Critical\"}",
+ R"({"type": "enum", "name": "Status", "symbols":"Normal Caution
Critical"})",
// Name not a string
- "{\"type\": \"enum\", \"name\": [ 0, 1, 1, 2, 3, 5, 8 ], "
- "\"symbols\": [\"Golden\", \"Mean\"]}",
+ R"({"type": "enum", "name": [ 0, 1, 1, 2, 3, 5, 8 ], "symbols": ["Golden",
"Mean"]})",
// No name
- "{\"type\": \"enum\", \"symbols\" : [\"I\", \"will\", "
- "\"fail\", \"no\", \"name\"]}",
+ R"({"type": "enum", "symbols" : ["I", "will", "fail", "no", "name"]})",
// Duplicate symbol
- "{\"type\": \"enum\", \"name\": \"Test\","
- "\"symbols\" : [\"AA\", \"AA\"]}",
+ R"({"type": "enum", "name": "Test", "symbols" : ["AA", "AA"]})",
// Union
// Duplicate type
R"(["string", "long", "long"])",
// Duplicate type
- "[{\"type\": \"array\", \"items\": \"long\"}, "
- "{\"type\": \"array\", \"items\": \"string\"}]",
+ R"([
+ {"type": "array", "items": "long"},
+ {"type": "array", "items": "string"}
+ ])",
// Fixed
// No size
@@ -161,50 +231,85 @@ const char *basicSchemaErrors[] = {
};
const char *roundTripSchemas[] = {
- "\"null\"",
- "\"boolean\"",
- "\"int\"",
- "\"long\"",
- "\"float\"",
- "\"double\"",
- "\"bytes\"",
- "\"string\"",
+ R"("null")",
+ R"("boolean")",
+ R"("int")",
+ R"("long")",
+ R"("float")",
+ R"("double")",
+ R"("bytes")",
+ R"("string")",
+
// Record
R"({"type":"record","name":"Test","fields":[]})",
- "{\"type\":\"record\",\"name\":\"Test\",\"fields\":"
- "[{\"name\":\"f\",\"type\":\"long\"}]}",
- "{\"type\":\"record\",\"name\":\"Test\",\"fields\":"
- "[{\"name\":\"f1\",\"type\":\"long\"},"
- "{\"name\":\"f2\",\"type\":\"int\"}]}",
+ R"({
+ "type":"record",
+ "name":"Test",
+ "fields":[
+ {"name":"f","type":"long"}
+ ]
+ })",
+ R"({
+ "type":"record",
+ "name":"Test",
+ "fields":[
+ {"name":"f1","type":"long"},
+ {"name":"f2","type":"int"}
+ ]
+ })",
+
/* Avro-C++ cannot do a round-trip on error schemas.
- * "{\"type\":\"error\",\"name\":\"Test\",\"fields\":"
- * "[{\"name\":\"f1\",\"type\":\"long\"},"
- * "{\"name\":\"f2\",\"type\":\"int\"}]}"
- */
+ * R"({
+ * "type":"error",
+ * "name":"Test",
+ * "fields":[
+ * {"name":"f1","type":"long"},
+ * {"name":"f2","type":"int"}
+ * ]
+ * })",
+ */
+
// Recursive.
- "{\"type\":\"record\",\"name\":\"LongList\","
- "\"fields\":[{\"name\":\"value\",\"type\":\"long\"},"
- "{\"name\":\"next\",\"type\":[\"LongList\",\"null\"]}]}",
+ R"({
+ "type":"record",
+ "name":"LongList",
+ "fields":[
+ {"name":"value","type":"long"},
+ {"name":"next","type":["LongList","null"]}
+ ]
+ })",
+
// Enum
R"({"type":"enum","name":"Test","symbols":["A","B"]})",
// Array
R"({"type":"array","items":"long"})",
- "{\"type\":\"array\",\"items\":{\"type\":\"enum\","
- "\"name\":\"Test\",\"symbols\":[\"A\",\"B\"]}}",
+ R"({
+ "type":"array",
+ "items":{
+ "type":"enum",
+ "name":"Test",
+ "symbols":["A","B"]
+ }
+ })",
// Map
R"({"type":"map","values":"long"})",
- "{\"type\":\"map\",\"values\":{\"type\":\"enum\","
- "\"name\":\"Test\",\"symbols\":[\"A\",\"B\"]}}",
+ R"({
+ "type":"map",
+ "values":{
+ "type":"enum",
+ "name":"Test",
+ "symbols":["A","B"]
+ }
+ })",
// Union
R"(["string","null","long"])",
// Fixed
R"({"type":"fixed","name":"Test","size":1})",
- "{\"type\":\"fixed\",\"namespace\":\"org.apache.hadoop.avro\","
- "\"name\":\"MyFixed\",\"size\":1}",
+
R"({"type":"fixed","namespace":"org.apache.hadoop.avro","name":"MyFixed","size":1})",
R"({"type":"fixed","name":"Test","size":1})",
R"({"type":"fixed","name":"Test","size":1})",
@@ -225,17 +330,32 @@ const char *roundTripSchemas[] = {
R"({"type":"string","logicalType":"uuid"})",
// namespace with '$' in it.
- "{\"type\":\"record\",\"namespace\":\"a.b$\",\"name\":\"Test\",\"fields\":"
- "[{\"name\":\"f\",\"type\":\"long\"}]}",
+ R"({
+ "type":"record",
+ "namespace":"a.b$",
+ "name":"Test",
+ "fields":[
+ {"name":"f","type":"long"}
+ ]
+ })",
// Custom fields
- "{\"type\":\"record\",\"name\":\"Test\",\"fields\":"
- "[{\"name\":\"f1\",\"type\":\"long\",\"extra_field\":\"1\"},"
- "{\"name\":\"f2\",\"type\":\"int\"}]}",
- "{\"type\":\"record\",\"name\":\"Test\",\"fields\":"
- "[{\"name\":\"f1\",\"type\":\"long\",\"extra_field\":\"1\"},"
- "{\"name\":\"f2\",\"type\":\"int\","
- "\"extra_field1\":\"21\",\"extra_field2\":\"22\"}]}",
+ R"({
+ "type":"record",
+ "name":"Test",
+ "fields":[
+ {"name":"f1","type":"long","extra_field":"1"},
+ {"name":"f2","type":"int"}
+ ]
+ })",
+ R"({
+ "type":"record",
+ "name":"Test",
+ "fields":[
+ {"name":"f1","type":"long","extra_field":"1"},
+ {"name":"f2","type":"int","extra_field1":"21","extra_field2":"22"}
+ ]
+ })"
};
const char *malformedLogicalTypes[] = {
@@ -258,7 +378,9 @@ const char *malformedLogicalTypes[] = {
R"({"type":"fixed","logicalType":"decimal","size":4,"name":"a","precision":20})",
R"({"type":"fixed","logicalType":"decimal","size":129,"name":"a","precision":311})",
// Scale is larger than precision.
- R"({"type":"bytes","logicalType":"decimal","precision":5,"scale":10})"};
+ R"({"type":"bytes","logicalType":"decimal","precision":5,"scale":10})"
+};
+
const char *schemasToCompact[] = {
// Schema without any whitespace
R"({"type":"record","name":"Test","fields":[]})",
@@ -284,7 +406,18 @@ const char *compactSchemas[] = {
"\"fields\":["
"{\"name\":\"re1\",\"type\":\"long\",\"doc\":\"A \\\"quoted doc\\\"\"},"
"{\"name\":\"re2\",\"type\":\"long\",\"doc\":\"extra slashes\\\\\\\\\"}"
- "]}"};
+ "]}"
+};
+
+static const std::vector<char> whitespaces = {' ', '\f', '\n', '\r', '\t',
'\v'};
+
+static std::string removeWhitespaceFromSchema(const std::string& schema){
+ std::string trimmedSchema = schema;
+ for (char toReplace : whitespaces){
+ boost::algorithm::replace_all(trimmedSchema, std::string{toReplace},
"");
+ }
+ return trimmedSchema;
+}
void testTypes() {
BOOST_CHECK_EQUAL(isAvroType(AVRO_BOOL), true);
@@ -313,13 +446,13 @@ static void testRoundTrip(const char *schema) {
compileJsonSchemaFromString(std::string(schema));
std::ostringstream os;
compiledSchema.toJson(os);
- std::string result = os.str();
- result.erase(std::remove_if(result.begin(), result.end(), ::isspace),
result.end()); // Remove whitespace
- BOOST_CHECK(result == std::string(schema));
+ std::string result = removeWhitespaceFromSchema(os.str());
+ std::string trimmedSchema = removeWhitespaceFromSchema(schema);
+ BOOST_CHECK(result == trimmedSchema);
// Verify that the compact schema from toJson has the same content as the
// schema.
std::string result2 = compiledSchema.toJson(false);
- BOOST_CHECK(result2 == std::string(schema));
+ BOOST_CHECK(result2 == trimmedSchema);
}
static void testCompactSchemas() {
@@ -335,61 +468,33 @@ static void testCompactSchemas() {
}
static void testLogicalTypes() {
- const char *bytesDecimalType = "{\n\
- \"type\": \"bytes\",\n\
- \"logicalType\": \"decimal\",\n\
- \"precision\": 10,\n\
- \"scale\": 2\n\
- }";
- const char *fixedDecimalType = "{\n\
- \"type\": \"fixed\",\n\
- \"size\": 16,\n\
- \"name\": \"fixedDecimalType\",\n\
- \"logicalType\": \"decimal\",\n\
- \"precision\": 12,\n\
- \"scale\": 6\n\
- }";
- const char *dateType = "{\n\
- \"type\": \"int\", \"logicalType\": \"date\"\n\
- }";
- const char *timeMillisType = "{\n\
- \"type\": \"int\", \"logicalType\": \"time-millis\"\n\
- }";
- const char *timeMicrosType = "{\n\
- \"type\": \"long\", \"logicalType\": \"time-micros\"\n\
- }";
- const char *timestampMillisType = "{\n\
- \"type\": \"long\", \"logicalType\": \"timestamp-millis\"\n\
- }";
- const char *timestampMicrosType = "{\n\
- \"type\": \"long\", \"logicalType\": \"timestamp-micros\"\n\
- }";
- const char *timestampNanosType = "{\n\
- \"type\": \"long\", \"logicalType\": \"timestamp-nanos\"\n\
- }";
- const char *localTimestampMillisType = "{\n\
- \"type\": \"long\", \"logicalType\": \"local-timestamp-millis\"\n\
- }";
- const char *localTimestampMicrosType = "{\n\
- \"type\": \"long\", \"logicalType\": \"local-timestamp-micros\"\n\
- }";
- const char *localTimestampNanosType = "{\n\
- \"type\": \"long\", \"logicalType\": \"local-timestamp-nanos\"\n\
- }";
- const char *durationType = "{\n\
- \"type\": \"fixed\",\n\
- \"size\": 12,\n\
- \"name\": \"durationType\",\n\
- \"logicalType\": \"duration\"\n\
- }";
- const char *uuidType = "{\n\
- \"type\": \"string\",\n\
- \"logicalType\": \"uuid\"\n\
- }";
+ const char *bytesDecimalType = R"({
+ "type": "bytes",
+ "logicalType": "decimal",
+ "precision": 10,
+ "scale": 2
+ })";
+ const char *fixedDecimalType = R"({
+ "type": "fixed",
+ "size": 16,
+ "name": "fixedDecimalType",
+ "logicalType": "decimal",
+ "precision": 12,
+ "scale": 6
+ })";
+ const char *dateType = R"({"type": "int", "logicalType": "date"})";
+ const char *timeMillisType = R"({"type": "int", "logicalType":
"time-millis"})";
+ const char *timeMicrosType = R"({"type": "long", "logicalType":
"time-micros"})";
+ const char *timestampMillisType = R"({"type": "long", "logicalType":
"timestamp-millis"})";
+ const char *timestampMicrosType = R"({"type": "long", "logicalType":
"timestamp-micros"})";
+ const char *timestampNanosType = R"({"type": "long", "logicalType":
"timestamp-nanos"})";
+ const char *localTimestampMillisType = R"({"type": "long", "logicalType":
"local-timestamp-millis"})";
+ const char *localTimestampMicrosType = R"({"type": "long", "logicalType":
"local-timestamp-micros"})";
+ const char *localTimestampNanosType = R"({"type": "long", "logicalType":
"local-timestamp-nanos"})";
+ const char *durationType = R"({"type": "fixed","size": 12,"name":
"durationType","logicalType": "duration"})";
+ const char *uuidType = R"({"type": "string","logicalType": "uuid"})";
// AVRO-2923 Union with LogicalType
- const char *unionType = "[\n\
- {\"type\":\"string\", \"logicalType\":\"uuid\"},\"null\"\n\
- ]";
+ const char *unionType = R"([{"type":"string",
"logicalType":"uuid"},"null"]})";
{
BOOST_TEST_CHECKPOINT(bytesDecimalType);
ValidSchema schema1 = compileJsonSchemaFromString(bytesDecimalType);