This is an automated email from the ASF dual-hosted git repository.
thiru pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/avro.git
The following commit(s) were added to refs/heads/main by this push:
new 9893ff7ae AVRO-4035 [C++] Add doc strings to generated classes (#3128)
9893ff7ae is described below
commit 9893ff7ae1fecd4cfaba71bd657d2ccc02796ffe
Author: Gerrit Birkeland <[email protected]>
AuthorDate: Thu Jan 2 20:23:30 2025 -0700
AVRO-4035 [C++] Add doc strings to generated classes (#3128)
* AVRO-4035 [C++] Add doc strings to generated classes
* AVRO-4035: Address review comments
* Fix another edge case in generated doc comments
---
lang/c++/impl/avrogencpp.cc | 33 +++++++++++++++++++++++++++++++--
lang/c++/jsonschemas/bigrecord | 3 ++-
2 files changed, 33 insertions(+), 3 deletions(-)
diff --git a/lang/c++/impl/avrogencpp.cc b/lang/c++/impl/avrogencpp.cc
index 143515215..8fa145f81 100644
--- a/lang/c++/impl/avrogencpp.cc
+++ b/lang/c++/impl/avrogencpp.cc
@@ -113,6 +113,7 @@ class CodeGen {
void generateTraits(const NodePtr &n);
void generateRecordTraits(const NodePtr &n);
void generateUnionTraits(const NodePtr &n);
+ void generateDocComment(const NodePtr &n, const char *indent = "");
void emitCopyright();
void emitGeneratedWarning();
@@ -253,6 +254,7 @@ string CodeGen::generateRecordType(const NodePtr &n) {
return it->second;
}
+ generateDocComment(n);
os_ << "struct " << decoratedName << " {\n";
if (!noUnion_) {
for (size_t i = 0; i < c; ++i) {
@@ -271,6 +273,7 @@ string CodeGen::generateRecordType(const NodePtr &n) {
// the nameAt(i) does not take c++ reserved words into account
// so we need to call decorate on it
std::string decoratedNameAt = decorate(n->nameAt(i));
+ generateDocComment(n->leafAt(i), " ");
os_ << " " << types[i];
os_ << ' ' << decoratedNameAt << ";\n";
}
@@ -409,7 +412,7 @@ string CodeGen::generateUnionType(const NodePtr &n) {
for (size_t i = 0; i < c; ++i) {
// escape reserved literals for c++
auto branch_name = decorate(names[i]);
- // avoid rare collisions, e.g. somone might name their struct int_
+ // avoid rare collisions, e.g. someone might name their struct int_
if (used_branch_names.find(branch_name) != used_branch_names.end()) {
size_t postfix = 2;
std::string escaped_name = branch_name + "_" +
std::to_string(postfix);
@@ -739,6 +742,32 @@ void CodeGen::generateTraits(const NodePtr &n) {
}
}
+void CodeGen::generateDocComment(const NodePtr &n, const char *indent) {
+ if (!n->getDoc().empty()) {
+ std::vector<std::string> lines;
+ boost::algorithm::split(lines, n->getDoc(),
boost::algorithm::is_any_of("\n"));
+ for (auto &line : lines) {
+ boost::algorithm::replace_all(line, "\r", "");
+
+ if (line.empty()) {
+ os_ << indent << "//\n";
+ } else {
+ // If a comment line ends with a backslash or backslash and
whitespace,
+ // avoid generating code which will generate multi-line
comment warnings
+ // on GCC. We can't just append whitespace here as escaped
newlines ignore
+ // trailing whitespace.
+ auto lastBackslash = std::find(line.rbegin(), line.rend(),
'\\');
+ auto lastNonWs = std::find_if(line.rbegin(), line.rend(),
[](char c) { return !std::isspace(static_cast<int>(c)); });
+ // Note: lastBackslash <= lastNonWs because the iterators are
reversed, "less" is later in the string.
+ if (lastBackslash != line.rend() && lastBackslash <=
lastNonWs) {
+ line.append("(backslash)");
+ }
+ os_ << indent << "// " << line << "\n";
+ }
+ }
+ }
+}
+
void CodeGen::emitCopyright() {
os_ << "/**\n"
" * Licensed to the Apache Software Foundation (ASF) under one\n"
@@ -954,4 +983,4 @@ bool UnionCodeTracker::unionTraitsAlreadyGenerated(const
std::string &unionClass
void UnionCodeTracker::setTraitsGenerated(const std::string &unionClassName) {
generatedUnionTraits_.insert(unionClassName);
-}
\ No newline at end of file
+}
diff --git a/lang/c++/jsonschemas/bigrecord b/lang/c++/jsonschemas/bigrecord
index af8a5ad39..e7fd7fd7b 100644
--- a/lang/c++/jsonschemas/bigrecord
+++ b/lang/c++/jsonschemas/bigrecord
@@ -1,6 +1,6 @@
{
"type": "record",
- "doc": "Top level Doc.",
+ "doc": "Top level Doc.\nWith multiple lines",
"name": "RootRecord",
"fields": [
{
@@ -10,6 +10,7 @@
},
{
"name": "nestedrecord",
+ "doc": "Doc edge cases\r\nwith trailing backslash\\\t \n\\\n\\
\n\\x",
"type": {
"type": "record",
"name": "Nested",