This is an automated email from the ASF dual-hosted git repository.
mgrigorov pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/avro.git
The following commit(s) were added to refs/heads/main by this push:
new a440eb67d7 AVRO 4175: [C++] Allow previously parsed schemas to be
referenced when parsing a schema (#3475)
a440eb67d7 is described below
commit a440eb67d781322d08544c2096a1b0877aa5e027
Author: Robert Yokota <[email protected]>
AuthorDate: Mon Sep 22 10:34:54 2025 -0700
AVRO 4175: [C++] Allow previously parsed schemas to be referenced when
parsing a schema (#3475)
---
lang/c++/impl/Compiler.cc | 29 +++++++++++++++++-
lang/c++/include/avro/Compiler.hh | 5 ++++
lang/c++/test/CompilerTests.cc | 63 +++++++++++++++++++++++++++++++++++++++
3 files changed, 96 insertions(+), 1 deletion(-)
diff --git a/lang/c++/impl/Compiler.cc b/lang/c++/impl/Compiler.cc
index 5f59fb5bac..d0ac20d020 100644
--- a/lang/c++/impl/Compiler.cc
+++ b/lang/c++/impl/Compiler.cc
@@ -94,7 +94,15 @@ static NodePtr makeNode(const string &t, SymbolTable &st,
const string &ns) {
auto it = st.find(n);
if (it != st.end()) {
- return NodePtr(new NodeSymbolic(asSingleAttribute(n), it->second));
+ // Return the raw NodePtr instead of creating a new "NodeSymbolic"
+ // via "NodePtr(new NodeSymbolic(asSingleAttribute(n), it->second))"
+ // in order to support externally resolved named references.
+ // This is safe because the validator canonicalizes duplicates:
+ // when it sees the same named node again (including self-recursion),
+ // it replaces that leaf with a NodeSymbolic via "setLeafToSymbolic".
+ // So even if the raw NodePtr is returned initially, validation
+ // converts repeats to symbolic links.
+ return it->second;
}
throw Exception("Unknown type: {}", n);
}
@@ -638,4 +646,23 @@ AVRO_DECL bool compileJsonSchema(std::istream &is,
ValidSchema &schema, string &
}
}
+AVRO_DECL ValidSchema compileJsonSchemaWithNamedReferences(std::istream &is,
+ const
std::map<Name, ValidSchema> &namedReferences) {
+ if (!is.good()) {
+ throw Exception("Input stream is not good");
+ }
+
+ std::unique_ptr<InputStream> in = istreamInputStream(is);
+ json::Entity e = json::loadEntity(*in);
+
+ // Convert the map<Name, ValidSchema> to SymbolTable (map<Name, NodePtr>)
+ SymbolTable st;
+ for (const auto &entry : namedReferences) {
+ st[entry.first] = entry.second.root();
+ }
+
+ NodePtr n = makeNode(e, st, "");
+ return ValidSchema(n);
+}
+
} // namespace avro
diff --git a/lang/c++/include/avro/Compiler.hh
b/lang/c++/include/avro/Compiler.hh
index bdcbb355e2..911a8aed0c 100644
--- a/lang/c++/include/avro/Compiler.hh
+++ b/lang/c++/include/avro/Compiler.hh
@@ -22,6 +22,7 @@
#include "Config.hh"
#include <cstdint>
#include <istream>
+#include <map>
namespace avro {
@@ -32,6 +33,7 @@ class AVRO_DECL InputStream;
/// lexer object for each parse. The bison parser also uses this class to
/// build up an avro parse tree as the avro spec is parsed.
+class AVRO_DECL Name;
class AVRO_DECL ValidSchema;
/// Given a stream containing a JSON schema, compiles the schema to a
@@ -58,6 +60,9 @@ AVRO_DECL ValidSchema compileJsonSchemaFromString(const
std::string &input);
AVRO_DECL ValidSchema compileJsonSchemaFromFile(const char *filename);
+AVRO_DECL ValidSchema compileJsonSchemaWithNamedReferences(std::istream &is,
+ const
std::map<Name, ValidSchema> &namedReferences);
+
} // namespace avro
#endif
diff --git a/lang/c++/test/CompilerTests.cc b/lang/c++/test/CompilerTests.cc
index 6fa5344ac1..072b977566 100644
--- a/lang/c++/test/CompilerTests.cc
+++ b/lang/c++/test/CompilerTests.cc
@@ -22,6 +22,7 @@
#include <boost/test/unit_test.hpp>
#include "Compiler.hh"
+#include "Node.hh"
#include "ValidSchema.hh"
// Assert that empty defaults don't make json schema compilation violate bounds
@@ -82,6 +83,66 @@ void test2dArray() {
BOOST_CHECK_EQUAL(expected, actual.str());
}
+void testRecordWithNamedReference() {
+ std::string nestedSchema =
"{\"name\":\"NestedRecord\",\"type\":\"record\",\"fields\":[{\"name\":\"stringField\",\"type\":\"string\"}]}";
+ // The root schema references the nested schema above by name only.
+ // This mimics tools that allow schemas to have references to other
schemas.
+ std::string rootSchema =
"{\"name\":\"RootRecord\",\"type\":\"record\",\"fields\":[{\"name\":
\"nestedField\",\"type\":\"NestedRecord\"}]}";
+
+ // First compile the nested schema
+ avro::ValidSchema nestedRecord =
avro::compileJsonSchemaFromString(nestedSchema);
+
+ // Create a map of named references
+ std::map<avro::Name, avro::ValidSchema> namedReferences;
+ namedReferences[avro::Name("NestedRecord")] = nestedRecord;
+
+ // Parse the root schema with named references
+ std::istringstream rootSchemaStream(rootSchema);
+ avro::ValidSchema rootRecord =
avro::compileJsonSchemaWithNamedReferences(rootSchemaStream, namedReferences);
+
+ // Verify the schema was compiled correctly
+ BOOST_CHECK_EQUAL("RootRecord", rootRecord.root()->name().simpleName());
+
+ // Get the nested field and verify its type
+ const avro::NodePtr &rootNode = rootRecord.root();
+ BOOST_CHECK_EQUAL(avro::AVRO_RECORD, rootNode->type());
+ BOOST_CHECK_EQUAL(1, rootNode->leaves());
+
+ const avro::NodePtr &nestedFieldNode = rootNode->leafAt(0);
+ BOOST_CHECK_EQUAL("NestedRecord", nestedFieldNode->name().simpleName());
+}
+
+// Verify recursive schemas don't create shared_ptr cycles by ensuring the
+// root node expires once the ValidSchema goes out of scope. Example: binary
+// tree node with left/right as union of null and the node type itself.
+void testRecursiveBinaryTreeWeakPtrExpires() {
+ std::weak_ptr<avro::Node> weakRoot;
+
+ {
+ const std::string schema = R"({
+ "type": "record",
+ "name": "Node",
+ "fields": [
+ {"name": "value", "type": "int"},
+ {"name": "left", "type": ["null", "Node"], "default": null},
+ {"name": "right", "type": ["null", "Node"], "default": null}
+ ]
+ })";
+
+ avro::ValidSchema s = avro::compileJsonSchemaFromString(schema);
+ // Capture a weak reference to the root node while the schema is alive.
+ weakRoot = s.root();
+
+ // Optionally exercise the schema to ensure validation completed.
+ BOOST_CHECK_EQUAL(avro::AVRO_RECORD, s.root()->type());
+ BOOST_CHECK_EQUAL("Node", s.root()->name().simpleName());
+ }
+
+ // After the ValidSchema (and any strong references) go out of scope,
+ // the weak pointer must not be lockable if there are no cycles.
+ BOOST_CHECK(weakRoot.expired());
+}
+
boost::unit_test::test_suite *
init_unit_test_suite(int /*argc*/, char * /*argv*/[]) {
using namespace boost::unit_test;
@@ -89,5 +150,7 @@ init_unit_test_suite(int /*argc*/, char * /*argv*/[]) {
auto *ts = BOOST_TEST_SUITE("Avro C++ unit tests for Compiler.cc");
ts->add(BOOST_TEST_CASE(&testEmptyBytesDefault));
ts->add(BOOST_TEST_CASE(&test2dArray));
+ ts->add(BOOST_TEST_CASE(&testRecordWithNamedReference));
+ ts->add(BOOST_TEST_CASE(&testRecursiveBinaryTreeWeakPtrExpires));
return ts;
}