This is an automated email from the ASF dual-hosted git repository.

thiru pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/avro.git


The following commit(s) were added to refs/heads/main by this push:
     new ab148b2e5 [AVRO-4081][C++] Add big decimal support and update 
documentation (#3148)
ab148b2e5 is described below

commit ab148b2e5ed8eac00100f1f328f48bec7f703873
Author: glywk <[email protected]>
AuthorDate: Tue Dec 24 05:30:13 2024 +0100

    [AVRO-4081][C++] Add big decimal support and update documentation (#3148)
---
 .../en/docs/++version++/Specification/_index.md    |  8 +++++---
 lang/c++/impl/Compiler.cc                          |  6 +++++-
 lang/c++/impl/LogicalType.cc                       |  3 +++
 lang/c++/impl/Node.cc                              |  7 +++++++
 lang/c++/include/avro/LogicalType.hh               |  1 +
 lang/c++/test/SchemaTests.cc                       | 22 +++++++++++++++++++---
 6 files changed, 40 insertions(+), 7 deletions(-)

diff --git a/doc/content/en/docs/++version++/Specification/_index.md 
b/doc/content/en/docs/++version++/Specification/_index.md
index 53b815c10..950bae117 100755
--- a/doc/content/en/docs/++version++/Specification/_index.md
+++ b/doc/content/en/docs/++version++/Specification/_index.md
@@ -787,6 +787,8 @@ A logical type is always serialized using its underlying 
Avro type so that value
 Language implementations must ignore unknown logical types when reading, and 
should use the underlying Avro type. If a logical type is invalid, for example 
a decimal with scale greater than its precision, then implementations should 
ignore the logical type and use the underlying Avro type.
 
 ### Decimal
+
+#### Fixed precision
 The `decimal` logical type represents an arbitrary-precision signed decimal 
number of the form _unscaled × 10<sup>-scale</sup>_.
 
 A `decimal` logical type annotates Avro _bytes_ or _fixed_ types. The byte 
array must contain the two's-complement representation of the unscaled integer 
value in big-endian byte order. The scale is fixed, and is specified using an 
attribute.
@@ -810,11 +812,11 @@ Scale must be zero or a positive integer less than or 
equal to the precision.
 
 For the purposes of schema resolution, two schemas that are `decimal` logical 
types _match_ if their scales and precisions match.
 
-**alternative**
+#### Scalable precision
 
 As it's not always possible to fix scale and precision in advance for a 
decimal field, `big-decimal` is another `decimal` logical type restrict to Avro 
_bytes_.
 
-_Currently only available in Java and Rust_.
+_Currently only available in C++, Java and Rust_.
 
 ```json
 {
@@ -822,7 +824,7 @@ _Currently only available in Java and Rust_.
   "logicalType": "big-decimal"
 }
 ```
-Here, as scale property is stored in value itself it needs more bytes than 
preceding `decimal` type, but it allows more flexibility.
+Here, bytes array contains two serialized properties. First part is an Avro 
byte arrays which is the two's-complement representation of the unscaled 
integer value in big-endian byte order. The second part is the scale property 
stored as an Avro integer. Scale must be zero or a positive integer less than 
or equal to the precision. Value itself needs more bytes than preceding 
`decimal` type, but it allows more flexibility.
 
 ### UUID
 
diff --git a/lang/c++/impl/Compiler.cc b/lang/c++/impl/Compiler.cc
index f1e2dfd96..73aaa9bbb 100644
--- a/lang/c++/impl/Compiler.cc
+++ b/lang/c++/impl/Compiler.cc
@@ -359,7 +359,11 @@ static LogicalType makeLogicalType(const Entity &e, const 
Object &m) {
     }
 
     LogicalType::Type t = LogicalType::NONE;
-    if (typeField == "date")
+    if (typeField == "big-decimal"
+        && !containsField(m, "precision")
+        && !containsField(m, "scale"))
+        t = LogicalType::BIG_DECIMAL;
+    else if (typeField == "date")
         t = LogicalType::DATE;
     else if (typeField == "time-millis")
         t = LogicalType::TIME_MILLIS;
diff --git a/lang/c++/impl/LogicalType.cc b/lang/c++/impl/LogicalType.cc
index ed6a12f08..18da72a23 100644
--- a/lang/c++/impl/LogicalType.cc
+++ b/lang/c++/impl/LogicalType.cc
@@ -51,6 +51,9 @@ void LogicalType::setScale(int32_t scale) {
 void LogicalType::printJson(std::ostream &os) const {
     switch (type_) {
         case LogicalType::NONE: break;
+        case LogicalType::BIG_DECIMAL:
+            os << R"("logicalType": "big-decimal")";
+            break;
         case LogicalType::DECIMAL:
             os << R"("logicalType": "decimal")";
             os << ", \"precision\": " << precision_;
diff --git a/lang/c++/impl/Node.cc b/lang/c++/impl/Node.cc
index fd9ee9d6d..615727128 100644
--- a/lang/c++/impl/Node.cc
+++ b/lang/c++/impl/Node.cc
@@ -139,6 +139,13 @@ void Node::setLogicalType(LogicalType logicalType) {
     // Check that the logical type is applicable to the node type.
     switch (logicalType.type()) {
         case LogicalType::NONE: break;
+        case LogicalType::BIG_DECIMAL: {
+            if (type_ != AVRO_BYTES) {
+                throw Exception("BIG_DECIMAL logical type can annotate "
+                                "only BYTES type");
+            }
+            break;
+        }
         case LogicalType::DECIMAL: {
             if (type_ != AVRO_BYTES && type_ != AVRO_FIXED) {
                 throw Exception("DECIMAL logical type can annotate "
diff --git a/lang/c++/include/avro/LogicalType.hh 
b/lang/c++/include/avro/LogicalType.hh
index b2a7d0294..5b274bcb7 100644
--- a/lang/c++/include/avro/LogicalType.hh
+++ b/lang/c++/include/avro/LogicalType.hh
@@ -29,6 +29,7 @@ class AVRO_DECL LogicalType {
 public:
     enum Type {
         NONE,
+        BIG_DECIMAL,
         DECIMAL,
         DATE,
         TIME_MILLIS,
diff --git a/lang/c++/test/SchemaTests.cc b/lang/c++/test/SchemaTests.cc
index 477e36046..bda02ad4c 100644
--- a/lang/c++/test/SchemaTests.cc
+++ b/lang/c++/test/SchemaTests.cc
@@ -314,6 +314,7 @@ const char *roundTripSchemas[] = {
     R"({"type":"fixed","name":"Test","size":1})",
 
     // Logical types
+    R"({"type":"bytes","logicalType":"big-decimal"})",
     R"({"type":"bytes","logicalType":"decimal","precision":12,"scale":6})",
     
R"({"type":"fixed","name":"test","size":16,"logicalType":"decimal","precision":38,"scale":9})",
     
R"({"type":"fixed","name":"test","size":129,"logicalType":"decimal","precision":310,"scale":155})",
@@ -361,6 +362,7 @@ const char *roundTripSchemas[] = {
 
 const char *malformedLogicalTypes[] = {
     // Wrong base type.
+    R"({"type":"long","logicalType": "big-decimal"})",
     R"({"type":"long","logicalType": "decimal","precision": 10})",
     R"({"type":"string","logicalType":"date"})",
     R"({"type":"string","logicalType":"time-millis"})",
@@ -379,9 +381,12 @@ const char *malformedLogicalTypes[] = {
     
R"({"type":"fixed","logicalType":"decimal","size":4,"name":"a","precision":20})",
     
R"({"type":"fixed","logicalType":"decimal","size":129,"name":"a","precision":311})",
     // Scale is larger than precision.
-    R"({"type":"bytes","logicalType":"decimal","precision":5,"scale":10})"
-};
-
+    R"({"type":"bytes","logicalType":"decimal","precision":5,"scale":10})",
+    // Precision is not supported by the big-decimal logical type
+    // and scale is integrated in bytes.
+    R"({"type":"bytes","logicalType": "big-decimal","precision": 9})",
+    R"({"type":"bytes","logicalType": "big-decimal","scale": 2})",
+    R"({"type":"bytes","logicalType": "big-decimal","precision": 9,"scale": 
2})"};
 const char *schemasToCompact[] = {
     // Schema without any whitespace
     R"({"type":"record","name":"Test","fields":[]})",
@@ -469,6 +474,10 @@ static void testCompactSchemas() {
 }
 
 static void testLogicalTypes() {
+    const char *bytesBigDecimalType = R"({
+        "type": "bytes",
+        "logicalType": "big-decimal"
+    })";
     const char *bytesDecimalType = R"({
         "type": "bytes",
         "logicalType": "decimal",
@@ -496,6 +505,13 @@ static void testLogicalTypes() {
     const char *uuidType = R"({"type": "string","logicalType": "uuid"})";
     // AVRO-2923 Union with LogicalType
     const char *unionType = R"([{"type":"string", 
"logicalType":"uuid"},"null"]})";
+    {
+        BOOST_TEST_CHECKPOINT(bytesBigDecimalType);
+        ValidSchema schema = compileJsonSchemaFromString(bytesBigDecimalType);
+        BOOST_CHECK(schema.root()->type() == AVRO_BYTES);
+        LogicalType logicalType = schema.root()->logicalType();
+        BOOST_CHECK(logicalType.type() == LogicalType::BIG_DECIMAL);
+    }
     {
         BOOST_TEST_CHECKPOINT(bytesDecimalType);
         ValidSchema schema1 = compileJsonSchemaFromString(bytesDecimalType);

Reply via email to