This is an automated email from the ASF dual-hosted git repository.
thiru pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/avro.git
The following commit(s) were added to refs/heads/main by this push:
new ab148b2e5 [AVRO-4081][C++] Add big decimal support and update
documentation (#3148)
ab148b2e5 is described below
commit ab148b2e5ed8eac00100f1f328f48bec7f703873
Author: glywk <[email protected]>
AuthorDate: Tue Dec 24 05:30:13 2024 +0100
[AVRO-4081][C++] Add big decimal support and update documentation (#3148)
---
.../en/docs/++version++/Specification/_index.md | 8 +++++---
lang/c++/impl/Compiler.cc | 6 +++++-
lang/c++/impl/LogicalType.cc | 3 +++
lang/c++/impl/Node.cc | 7 +++++++
lang/c++/include/avro/LogicalType.hh | 1 +
lang/c++/test/SchemaTests.cc | 22 +++++++++++++++++++---
6 files changed, 40 insertions(+), 7 deletions(-)
diff --git a/doc/content/en/docs/++version++/Specification/_index.md
b/doc/content/en/docs/++version++/Specification/_index.md
index 53b815c10..950bae117 100755
--- a/doc/content/en/docs/++version++/Specification/_index.md
+++ b/doc/content/en/docs/++version++/Specification/_index.md
@@ -787,6 +787,8 @@ A logical type is always serialized using its underlying
Avro type so that value
Language implementations must ignore unknown logical types when reading, and
should use the underlying Avro type. If a logical type is invalid, for example
a decimal with scale greater than its precision, then implementations should
ignore the logical type and use the underlying Avro type.
### Decimal
+
+#### Fixed precision
The `decimal` logical type represents an arbitrary-precision signed decimal
number of the form _unscaled × 10<sup>-scale</sup>_.
A `decimal` logical type annotates Avro _bytes_ or _fixed_ types. The byte
array must contain the two's-complement representation of the unscaled integer
value in big-endian byte order. The scale is fixed, and is specified using an
attribute.
@@ -810,11 +812,11 @@ Scale must be zero or a positive integer less than or
equal to the precision.
For the purposes of schema resolution, two schemas that are `decimal` logical
types _match_ if their scales and precisions match.
-**alternative**
+#### Scalable precision
As it's not always possible to fix scale and precision in advance for a
decimal field, `big-decimal` is another `decimal` logical type restrict to Avro
_bytes_.
-_Currently only available in Java and Rust_.
+_Currently only available in C++, Java and Rust_.
```json
{
@@ -822,7 +824,7 @@ _Currently only available in Java and Rust_.
"logicalType": "big-decimal"
}
```
-Here, as scale property is stored in value itself it needs more bytes than
preceding `decimal` type, but it allows more flexibility.
+Here, bytes array contains two serialized properties. First part is an Avro
byte arrays which is the two's-complement representation of the unscaled
integer value in big-endian byte order. The second part is the scale property
stored as an Avro integer. Scale must be zero or a positive integer less than
or equal to the precision. Value itself needs more bytes than preceding
`decimal` type, but it allows more flexibility.
### UUID
diff --git a/lang/c++/impl/Compiler.cc b/lang/c++/impl/Compiler.cc
index f1e2dfd96..73aaa9bbb 100644
--- a/lang/c++/impl/Compiler.cc
+++ b/lang/c++/impl/Compiler.cc
@@ -359,7 +359,11 @@ static LogicalType makeLogicalType(const Entity &e, const
Object &m) {
}
LogicalType::Type t = LogicalType::NONE;
- if (typeField == "date")
+ if (typeField == "big-decimal"
+ && !containsField(m, "precision")
+ && !containsField(m, "scale"))
+ t = LogicalType::BIG_DECIMAL;
+ else if (typeField == "date")
t = LogicalType::DATE;
else if (typeField == "time-millis")
t = LogicalType::TIME_MILLIS;
diff --git a/lang/c++/impl/LogicalType.cc b/lang/c++/impl/LogicalType.cc
index ed6a12f08..18da72a23 100644
--- a/lang/c++/impl/LogicalType.cc
+++ b/lang/c++/impl/LogicalType.cc
@@ -51,6 +51,9 @@ void LogicalType::setScale(int32_t scale) {
void LogicalType::printJson(std::ostream &os) const {
switch (type_) {
case LogicalType::NONE: break;
+ case LogicalType::BIG_DECIMAL:
+ os << R"("logicalType": "big-decimal")";
+ break;
case LogicalType::DECIMAL:
os << R"("logicalType": "decimal")";
os << ", \"precision\": " << precision_;
diff --git a/lang/c++/impl/Node.cc b/lang/c++/impl/Node.cc
index fd9ee9d6d..615727128 100644
--- a/lang/c++/impl/Node.cc
+++ b/lang/c++/impl/Node.cc
@@ -139,6 +139,13 @@ void Node::setLogicalType(LogicalType logicalType) {
// Check that the logical type is applicable to the node type.
switch (logicalType.type()) {
case LogicalType::NONE: break;
+ case LogicalType::BIG_DECIMAL: {
+ if (type_ != AVRO_BYTES) {
+ throw Exception("BIG_DECIMAL logical type can annotate "
+ "only BYTES type");
+ }
+ break;
+ }
case LogicalType::DECIMAL: {
if (type_ != AVRO_BYTES && type_ != AVRO_FIXED) {
throw Exception("DECIMAL logical type can annotate "
diff --git a/lang/c++/include/avro/LogicalType.hh
b/lang/c++/include/avro/LogicalType.hh
index b2a7d0294..5b274bcb7 100644
--- a/lang/c++/include/avro/LogicalType.hh
+++ b/lang/c++/include/avro/LogicalType.hh
@@ -29,6 +29,7 @@ class AVRO_DECL LogicalType {
public:
enum Type {
NONE,
+ BIG_DECIMAL,
DECIMAL,
DATE,
TIME_MILLIS,
diff --git a/lang/c++/test/SchemaTests.cc b/lang/c++/test/SchemaTests.cc
index 477e36046..bda02ad4c 100644
--- a/lang/c++/test/SchemaTests.cc
+++ b/lang/c++/test/SchemaTests.cc
@@ -314,6 +314,7 @@ const char *roundTripSchemas[] = {
R"({"type":"fixed","name":"Test","size":1})",
// Logical types
+ R"({"type":"bytes","logicalType":"big-decimal"})",
R"({"type":"bytes","logicalType":"decimal","precision":12,"scale":6})",
R"({"type":"fixed","name":"test","size":16,"logicalType":"decimal","precision":38,"scale":9})",
R"({"type":"fixed","name":"test","size":129,"logicalType":"decimal","precision":310,"scale":155})",
@@ -361,6 +362,7 @@ const char *roundTripSchemas[] = {
const char *malformedLogicalTypes[] = {
// Wrong base type.
+ R"({"type":"long","logicalType": "big-decimal"})",
R"({"type":"long","logicalType": "decimal","precision": 10})",
R"({"type":"string","logicalType":"date"})",
R"({"type":"string","logicalType":"time-millis"})",
@@ -379,9 +381,12 @@ const char *malformedLogicalTypes[] = {
R"({"type":"fixed","logicalType":"decimal","size":4,"name":"a","precision":20})",
R"({"type":"fixed","logicalType":"decimal","size":129,"name":"a","precision":311})",
// Scale is larger than precision.
- R"({"type":"bytes","logicalType":"decimal","precision":5,"scale":10})"
-};
-
+ R"({"type":"bytes","logicalType":"decimal","precision":5,"scale":10})",
+ // Precision is not supported by the big-decimal logical type
+ // and scale is integrated in bytes.
+ R"({"type":"bytes","logicalType": "big-decimal","precision": 9})",
+ R"({"type":"bytes","logicalType": "big-decimal","scale": 2})",
+ R"({"type":"bytes","logicalType": "big-decimal","precision": 9,"scale":
2})"};
const char *schemasToCompact[] = {
// Schema without any whitespace
R"({"type":"record","name":"Test","fields":[]})",
@@ -469,6 +474,10 @@ static void testCompactSchemas() {
}
static void testLogicalTypes() {
+ const char *bytesBigDecimalType = R"({
+ "type": "bytes",
+ "logicalType": "big-decimal"
+ })";
const char *bytesDecimalType = R"({
"type": "bytes",
"logicalType": "decimal",
@@ -496,6 +505,13 @@ static void testLogicalTypes() {
const char *uuidType = R"({"type": "string","logicalType": "uuid"})";
// AVRO-2923 Union with LogicalType
const char *unionType = R"([{"type":"string",
"logicalType":"uuid"},"null"]})";
+ {
+ BOOST_TEST_CHECKPOINT(bytesBigDecimalType);
+ ValidSchema schema = compileJsonSchemaFromString(bytesBigDecimalType);
+ BOOST_CHECK(schema.root()->type() == AVRO_BYTES);
+ LogicalType logicalType = schema.root()->logicalType();
+ BOOST_CHECK(logicalType.type() == LogicalType::BIG_DECIMAL);
+ }
{
BOOST_TEST_CHECKPOINT(bytesDecimalType);
ValidSchema schema1 = compileJsonSchemaFromString(bytesDecimalType);