[clang-tools-extra] Add proper handling of \note, \warning and \retval command + change render kind of command arguments. (PR #156365)

via cfe-commits Mon, 15 Sep 2025 04:43:12 -0700

https://github.com/tcottin updated 
https://github.com/llvm/llvm-project/pull/156365


>From 5e5b87c6985c12b8afeea077a723c969e2153f49 Mon Sep 17 00:00:00 2001
From: Tim Cottin <[email protected]>
Date: Mon, 1 Sep 2025 17:54:53 +0000
Subject: [PATCH 1/2] fix note, warning and retval command. Change arguments of
 doxygen commands to be rendered as monospaced

---
 clang-tools-extra/clangd/Hover.cpp            |   6 +-
 .../clangd/SymbolDocumentation.cpp            | 111 ++++++++-------
 .../clangd/SymbolDocumentation.h              |  33 +----
 .../clangd/unittests/HoverTests.cpp           |  43 +++++-
 .../unittests/SymbolDocumentationTests.cpp    | 126 ++++++++++++++++--
 5 files changed, 227 insertions(+), 92 deletions(-)

diff --git a/clang-tools-extra/clangd/Hover.cpp 
b/clang-tools-extra/clangd/Hover.cpp
index 9eec322fe5963..3b9baed1c2838 100644
--- a/clang-tools-extra/clangd/Hover.cpp
+++ b/clang-tools-extra/clangd/Hover.cpp
@@ -1589,13 +1589,11 @@ markup::Document HoverInfo::presentDoxygen() const {
       P.appendText(" - ");
       SymbolDoc.returnToMarkup(P);
     }
+
+    SymbolDoc.retvalsToMarkup(Output);
     Output.addRuler();
   }
 
-  // add specially handled doxygen commands.
-  SymbolDoc.warningsToMarkup(Output);
-  SymbolDoc.notesToMarkup(Output);
-
   // add any other documentation.
   SymbolDoc.docToMarkup(Output);
 
diff --git a/clang-tools-extra/clangd/SymbolDocumentation.cpp 
b/clang-tools-extra/clangd/SymbolDocumentation.cpp
index 9ae1ef3f828e0..86bd6162b6f72 100644
--- a/clang-tools-extra/clangd/SymbolDocumentation.cpp
+++ b/clang-tools-extra/clangd/SymbolDocumentation.cpp
@@ -34,10 +34,20 @@ void commandToMarkup(markup::Paragraph &Out, StringRef 
Command,
                      StringRef Args) {
   Out.appendBoldText(commandMarkerAsString(CommandMarker) + Command.str());
   Out.appendSpace();
-  if (!Args.empty()) {
-    Out.appendEmphasizedText(Args.str());
+  if (!Args.empty())
+    Out.appendCode(Args.str());
+}
+
+template <typename T> std::string getArgText(const T *Command) {
+  std::string ArgText;
+  for (unsigned I = 0; I < Command->getNumArgs(); ++I) {
+    if (!ArgText.empty())
+      ArgText += " ";
+    ArgText += Command->getArgText(I);
   }
+  return ArgText;
 }
+
 } // namespace
 
 class ParagraphToMarkupDocument
@@ -70,12 +80,7 @@ class ParagraphToMarkupDocument
   void visitInlineCommandComment(const comments::InlineCommandComment *C) {
 
     if (C->getNumArgs() > 0) {
-      std::string ArgText;
-      for (unsigned I = 0; I < C->getNumArgs(); ++I) {
-        if (!ArgText.empty())
-          ArgText += " ";
-        ArgText += C->getArgText(I);
-      }
+      std::string ArgText = getArgText(C);
 
       switch (C->getRenderKind()) {
       case comments::InlineCommandRenderKind::Monospaced:
@@ -158,10 +163,9 @@ class ParagraphToString
   void visitInlineCommandComment(const comments::InlineCommandComment *C) {
     Out << commandMarkerAsString(C->getCommandMarker());
     Out << C->getCommandName(Traits);
-    if (C->getNumArgs() > 0) {
-      for (unsigned I = 0; I < C->getNumArgs(); ++I)
-        Out << " " << C->getArgText(I);
-    }
+    std::string ArgText = getArgText(C);
+    if (!ArgText.empty())
+      Out << " " << ArgText;
     Out << " ";
   }
 
@@ -210,16 +214,27 @@ class BlockCommentToMarkupDocument
                                 Traits)
           .visit(B->getParagraph());
       break;
+    case comments::CommandTraits::KCI_note:
+    case comments::CommandTraits::KCI_warning:
+      commandToHeadedParagraph(B);
+      break;
+    case comments::CommandTraits::KCI_retval: {
+      // The \retval command describes the return value given as its single
+      // argument in the corresponding paragraph.
+      // Note: We know that we have exactly one argument but not if it has an
+      // associated paragraph.
+      auto &P = Out.addParagraph().appendCode(getArgText(B));
+      if (B->getParagraph() && !B->getParagraph()->isWhitespace()) {
+        P.appendText(" - ");
+        ParagraphToMarkupDocument(P, Traits).visit(B->getParagraph());
+      }
+      return;
+    }
     default: {
       // Some commands have arguments, like \throws.
       // The arguments are not part of the paragraph.
       // We need reconstruct them here.
-      std::string ArgText;
-      for (unsigned I = 0; I < B->getNumArgs(); ++I) {
-        if (!ArgText.empty())
-          ArgText += " ";
-        ArgText += B->getArgText(I);
-      }
+      std::string ArgText = getArgText(B);
       auto &P = Out.addParagraph();
       commandToMarkup(P, B->getCommandName(Traits), B->getCommandMarker(),
                       ArgText);
@@ -262,6 +277,19 @@ class BlockCommentToMarkupDocument
   markup::Document &Out;
   const comments::CommandTraits &Traits;
   StringRef CommentEscapeMarker;
+
+  /// Emphasize the given command in a paragraph.
+  /// Uses the command name with the first letter capitalized as the heading.
+  void commandToHeadedParagraph(const comments::BlockCommandComment *B) {
+    Out.addRuler();
+    auto &P = Out.addParagraph();
+    std::string Heading = B->getCommandName(Traits).slice(0, 1).upper() +
+                          B->getCommandName(Traits).drop_front().str();
+    P.appendBoldText(Heading + ":");
+    P.appendText("  \n");
+    ParagraphToMarkupDocument(P, Traits).visit(B->getParagraph());
+    Out.addRuler();
+  }
 };
 
 void SymbolDocCommentVisitor::visitBlockCommandComment(
@@ -282,36 +310,22 @@ void SymbolDocCommentVisitor::visitBlockCommandComment(
     }
     break;
   case comments::CommandTraits::KCI_retval:
-    RetvalParagraphs.push_back(B->getParagraph());
-    return;
-  case comments::CommandTraits::KCI_warning:
-    WarningParagraphs.push_back(B->getParagraph());
-    return;
-  case comments::CommandTraits::KCI_note:
-    NoteParagraphs.push_back(B->getParagraph());
+    // Only consider retval commands having an argument.
+    // The argument contains the described return value which is needed to
+    // convert it to markup.
+    if (B->getNumArgs() == 1)
+      RetvalCommands.push_back(B);
     return;
   default:
     break;
   }
 
-  // For all other commands, we store them in the UnhandledCommands map.
+  // For all other commands, we store them in the BlockCommands map.
   // This allows us to keep the order of the comments.
-  UnhandledCommands[CommentPartIndex] = B;
+  BlockCommands[CommentPartIndex] = B;
   CommentPartIndex++;
 }
 
-void SymbolDocCommentVisitor::paragraphsToMarkup(
-    markup::Document &Out,
-    const llvm::SmallVectorImpl<const comments::ParagraphComment *> 
&Paragraphs)
-    const {
-  if (Paragraphs.empty())
-    return;
-
-  for (const auto *P : Paragraphs) {
-    ParagraphToMarkupDocument(Out.addParagraph(), Traits).visit(P);
-  }
-}
-
 void SymbolDocCommentVisitor::briefToMarkup(markup::Paragraph &Out) const {
   if (!BriefParagraph)
     return;
@@ -324,14 +338,6 @@ void 
SymbolDocCommentVisitor::returnToMarkup(markup::Paragraph &Out) const {
   ParagraphToMarkupDocument(Out, Traits).visit(ReturnParagraph);
 }
 
-void SymbolDocCommentVisitor::notesToMarkup(markup::Document &Out) const {
-  paragraphsToMarkup(Out, NoteParagraphs);
-}
-
-void SymbolDocCommentVisitor::warningsToMarkup(markup::Document &Out) const {
-  paragraphsToMarkup(Out, WarningParagraphs);
-}
-
 void SymbolDocCommentVisitor::parameterDocToMarkup(
     StringRef ParamName, markup::Paragraph &Out) const {
   if (ParamName.empty())
@@ -354,7 +360,7 @@ void SymbolDocCommentVisitor::parameterDocToString(
 
 void SymbolDocCommentVisitor::docToMarkup(markup::Document &Out) const {
   for (unsigned I = 0; I < CommentPartIndex; ++I) {
-    if (const auto *BC = UnhandledCommands.lookup(I)) {
+    if (const auto *BC = BlockCommands.lookup(I)) {
       BlockCommentToMarkupDocument(Out, Traits).visit(BC);
     } else if (const auto *P = FreeParagraphs.lookup(I)) {
       ParagraphToMarkupDocument(Out.addParagraph(), Traits).visit(P);
@@ -382,5 +388,14 @@ void SymbolDocCommentVisitor::templateTypeParmDocToString(
   }
 }
 
+void SymbolDocCommentVisitor::retvalsToMarkup(markup::Document &Out) const {
+  if (RetvalCommands.empty())
+    return;
+  markup::BulletList &BL = Out.addBulletList();
+  for (const auto *P : RetvalCommands) {
+    BlockCommentToMarkupDocument(BL.addItem(), Traits).visit(P);
+  }
+}
+
 } // namespace clangd
 } // namespace clang
diff --git a/clang-tools-extra/clangd/SymbolDocumentation.h 
b/clang-tools-extra/clangd/SymbolDocumentation.h
index b0d3428dfce20..1d48ee6d4d3f9 100644
--- a/clang-tools-extra/clangd/SymbolDocumentation.h
+++ b/clang-tools-extra/clangd/SymbolDocumentation.h
@@ -114,22 +114,14 @@ class SymbolDocCommentVisitor
 
   bool hasReturnCommand() const { return ReturnParagraph; }
 
-  bool hasRetvalCommands() const { return !RetvalParagraphs.empty(); }
-
-  bool hasNoteCommands() const { return !NoteParagraphs.empty(); }
-
-  bool hasWarningCommands() const { return !WarningParagraphs.empty(); }
-
   /// Converts all unhandled comment commands to a markup document.
   void docToMarkup(markup::Document &Out) const;
   /// Converts the "brief" command(s) to a markup document.
   void briefToMarkup(markup::Paragraph &Out) const;
   /// Converts the "return" command(s) to a markup document.
   void returnToMarkup(markup::Paragraph &Out) const;
-  /// Converts the "note" command(s) to a markup document.
-  void notesToMarkup(markup::Document &Out) const;
-  /// Converts the "warning" command(s) to a markup document.
-  void warningsToMarkup(markup::Document &Out) const;
+  /// Converts the "retval" command(s) to a markup document.
+  void retvalsToMarkup(markup::Document &Out) const;
 
   void visitBlockCommandComment(const comments::BlockCommandComment *B);
 
@@ -173,19 +165,13 @@ class SymbolDocCommentVisitor
   /// Paragraph of the "return" command.
   const comments::ParagraphComment *ReturnParagraph = nullptr;
 
-  /// Paragraph(s) of the "note" command(s)
-  llvm::SmallVector<const comments::ParagraphComment *> RetvalParagraphs;
+  /// All the "retval" command(s)
+  llvm::SmallVector<const comments::BlockCommandComment *> RetvalCommands;
 
-  /// Paragraph(s) of the "note" command(s)
-  llvm::SmallVector<const comments::ParagraphComment *> NoteParagraphs;
-
-  /// Paragraph(s) of the "warning" command(s)
-  llvm::SmallVector<const comments::ParagraphComment *> WarningParagraphs;
-
-  /// All the paragraphs we don't have any special handling for,
-  /// e.g. "details".
+  /// All the parsed doxygen block commands.
+  /// They might have special handling internally like \\note or \\warning
   llvm::SmallDenseMap<unsigned, const comments::BlockCommandComment *>
-      UnhandledCommands;
+      BlockCommands;
 
   /// Parsed paragaph(s) of the "param" comamnd(s)
   llvm::SmallDenseMap<StringRef, const comments::ParamCommandComment *>
@@ -198,11 +184,6 @@ class SymbolDocCommentVisitor
   /// All "free" text paragraphs.
   llvm::SmallDenseMap<unsigned, const comments::ParagraphComment *>
       FreeParagraphs;
-
-  void paragraphsToMarkup(
-      markup::Document &Out,
-      const llvm::SmallVectorImpl<const comments::ParagraphComment *>
-          &Paragraphs) const;
 };
 
 } // namespace clangd
diff --git a/clang-tools-extra/clangd/unittests/HoverTests.cpp 
b/clang-tools-extra/clangd/unittests/HoverTests.cpp
index 743c0dc0d0187..fbcc2992cf2b7 100644
--- a/clang-tools-extra/clangd/unittests/HoverTests.cpp
+++ b/clang-tools-extra/clangd/unittests/HoverTests.cpp
@@ -4045,9 +4045,19 @@ brief doc
 longer doc)"},
       {[](HoverInfo &HI) {
          HI.Kind = index::SymbolKind::Function;
-         HI.Documentation = "@brief brief doc\n\n"
-                            "longer doc\n@param a this is a param\n@return it "
-                            "returns something";
+         HI.Documentation = R"(@brief brief doc
+
+longer doc
+@note this is a note
+
+As you see, notes are "inlined".
+@warning this is a warning
+
+As well as warnings
+@param a this is a param
+@return it returns something
+@retval 0 if successful
+@retval 1 if failed)";
          HI.Definition = "int foo(int a)";
          HI.ReturnType = "int";
          HI.Name = "foo";
@@ -4068,8 +4078,16 @@ longer doc)"},
 @brief brief doc
 
 longer doc
+@note this is a note
+
+As you see, notes are "inlined".
+@warning this is a warning
+
+As well as warnings
 @param a this is a param
 @return it returns something
+@retval 0 if successful
+@retval 1 if failed
 
 ---
 ```cpp
@@ -4095,8 +4113,25 @@ brief doc
 
 `int` - it returns something
 
+- `0` - if successful
+- `1` - if failed
+
 ---
-longer doc)"},
+longer doc
+
+---
+**Note:**  
+this is a note
+
+---
+As you see, notes are "inlined".
+
+---
+**Warning:**  
+this is a warning
+
+---
+As well as warnings)"},
       {[](HoverInfo &HI) {
          HI.Kind = index::SymbolKind::Function;
          HI.Documentation = "@brief brief doc\n\n"
diff --git a/clang-tools-extra/clangd/unittests/SymbolDocumentationTests.cpp 
b/clang-tools-extra/clangd/unittests/SymbolDocumentationTests.cpp
index 31a6a149e33c4..ba692399a7093 100644
--- a/clang-tools-extra/clangd/unittests/SymbolDocumentationTests.cpp
+++ b/clang-tools-extra/clangd/unittests/SymbolDocumentationTests.cpp
@@ -15,7 +15,7 @@
 namespace clang {
 namespace clangd {
 
-TEST(SymbolDocumentation, UnhandledDocs) {
+TEST(SymbolDocumentation, DocToMarkup) {
 
   CommentOptions CommentOpts;
 
@@ -63,15 +63,15 @@ TEST(SymbolDocumentation, UnhandledDocs) {
       },
       {
           "foo \\ref bar baz",
-          "foo \\*\\*\\\\ref\\*\\* \\*bar\\* baz",
-          "foo **\\ref** *bar* baz",
-          "foo **\\ref** *bar* baz",
+          "foo \\*\\*\\\\ref\\*\\* `bar` baz",
+          "foo **\\ref** `bar` baz",
+          "foo **\\ref** bar baz",
       },
       {
           "foo @ref bar baz",
-          "foo \\*\\*@ref\\*\\* \\*bar\\* baz",
-          "foo **@ref** *bar* baz",
-          "foo **@ref** *bar* baz",
+          "foo \\*\\*@ref\\*\\* `bar` baz",
+          "foo **@ref** `bar` baz",
+          "foo **@ref** bar baz",
       },
       {
           "\\brief this is a \\n\nbrief description",
@@ -81,9 +81,9 @@ TEST(SymbolDocumentation, UnhandledDocs) {
       },
       {
           "\\throw exception foo",
-          "\\*\\*\\\\throw\\*\\* \\*exception\\* foo",
-          "**\\throw** *exception* foo",
-          "**\\throw** *exception* foo",
+          "\\*\\*\\\\throw\\*\\* `exception` foo",
+          "**\\throw** `exception` foo",
+          "**\\throw** exception foo",
       },
       {
           R"(\brief this is a brief description
@@ -198,6 +198,72 @@ normal text\<i>this is an italic text\</i>
           "<b>this is a bold text</b> normal text<i>this is an italic text</i> 
"
           "<code>this is a code block</code>",
       },
+      {"@note This is a note",
+       R"(\*\*Note:\*\*  
+This is a note)",
+       R"(**Note:**  
+This is a note)",
+       R"(**Note:**
+This is a note)"},
+      {R"(Paragraph 1
+@note This is a note
+
+Paragraph 2)",
+       R"(Paragraph 1
+
+---
+\*\*Note:\*\*  
+This is a note
+
+---
+Paragraph 2)",
+       R"(Paragraph 1
+
+---
+**Note:**  
+This is a note
+
+---
+Paragraph 2)",
+       R"(Paragraph 1
+
+**Note:**
+This is a note
+
+Paragraph 2)"},
+      {"@warning This is a warning",
+       R"(\*\*Warning:\*\*  
+This is a warning)",
+       R"(**Warning:**  
+This is a warning)",
+       R"(**Warning:**
+This is a warning)"},
+      {R"(Paragraph 1
+@warning This is a warning
+
+Paragraph 2)",
+       R"(Paragraph 1
+
+---
+\*\*Warning:\*\*  
+This is a warning
+
+---
+Paragraph 2)",
+       R"(Paragraph 1
+
+---
+**Warning:**  
+This is a warning
+
+---
+Paragraph 2)",
+       R"(Paragraph 1
+
+**Warning:**
+This is a warning
+
+Paragraph 2)"},
   };
   for (const auto &C : Cases) {
     markup::Document Doc;
@@ -211,5 +277,45 @@ normal text\<i>this is an italic text\</i>
   }
 }
 
+TEST(SymbolDocumentation, RetvalCommand) {
+
+  CommentOptions CommentOpts;
+
+  struct Case {
+    llvm::StringRef Documentation;
+    llvm::StringRef ExpectedRenderEscapedMarkdown;
+    llvm::StringRef ExpectedRenderMarkdown;
+    llvm::StringRef ExpectedRenderPlainText;
+  } Cases[] = {
+      {"@retval", "", "", ""},
+      {R"(@retval MyReturnVal
+@retval MyOtherReturnVal)",
+       R"(- `MyReturnVal`
+- `MyOtherReturnVal`)",
+       R"(- `MyReturnVal`
+- `MyOtherReturnVal`)",
+       R"(- MyReturnVal
+- MyOtherReturnVal)"},
+      {R"(@retval MyReturnVal if foo
+@retval MyOtherReturnVal if bar)",
+       R"(- `MyReturnVal` - if foo
+- `MyOtherReturnVal` - if bar)",
+       R"(- `MyReturnVal` - if foo
+- `MyOtherReturnVal` - if bar)",
+       R"(- MyReturnVal - if foo
+- MyOtherReturnVal - if bar)"},
+  };
+  for (const auto &C : Cases) {
+    markup::Document Doc;
+    SymbolDocCommentVisitor SymbolDoc(C.Documentation, CommentOpts);
+
+    SymbolDoc.retvalsToMarkup(Doc);
+
+    EXPECT_EQ(Doc.asPlainText(), C.ExpectedRenderPlainText);
+    EXPECT_EQ(Doc.asMarkdown(), C.ExpectedRenderMarkdown);
+    EXPECT_EQ(Doc.asEscapedMarkdown(), C.ExpectedRenderEscapedMarkdown);
+  }
+}
+
 } // namespace clangd
 } // namespace clang

>From 078294bf08f600262b369a6a855e63eaf004e9be Mon Sep 17 00:00:00 2001
From: Tim Cottin <[email protected]>
Date: Mon, 15 Sep 2025 11:42:02 +0000
Subject: [PATCH 2/2] add markdown preprocessing to handle code blocks and code
 spans correctly

---
 .../clangd/SymbolDocumentation.cpp            | 151 +++++++
 .../clangd/SymbolDocumentation.h              |  49 ++-
 clang-tools-extra/clangd/support/Markup.cpp   |  29 +-
 .../unittests/SymbolDocumentationTests.cpp    | 393 ++++++++++++++++++
 4 files changed, 595 insertions(+), 27 deletions(-)

diff --git a/clang-tools-extra/clangd/SymbolDocumentation.cpp 
b/clang-tools-extra/clangd/SymbolDocumentation.cpp
index 86bd6162b6f72..ee67f9ae66443 100644
--- a/clang-tools-extra/clangd/SymbolDocumentation.cpp
+++ b/clang-tools-extra/clangd/SymbolDocumentation.cpp
@@ -13,6 +13,7 @@
 #include "clang/AST/CommentCommandTraits.h"
 #include "clang/AST/CommentVisitor.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringRef.h"
 
 namespace clang {
@@ -249,7 +250,53 @@ class BlockCommentToMarkupDocument
     }
   }
 
+  void visitCodeCommand(const comments::VerbatimBlockComment *VB) {
+    std::string CodeLang = "";
+    auto *FirstLine = VB->child_begin();
+    // The \\code command has an optional language argument.
+    // This argument is currently not parsed by the clang doxygen parser.
+    // Therefore we try to extract it from the first line of the verbatim
+    // block.
+    if (VB->getNumLines() > 0) {
+      if (const auto *Line =
+              cast<comments::VerbatimBlockLineComment>(*FirstLine)) {
+        llvm::StringRef Text = Line->getText();
+        // Language is a single word enclosed in {}.
+        if (llvm::none_of(Text, llvm::isSpace) && Text.consume_front("{") &&
+            Text.consume_back("}")) {
+          // drop a potential . since this is not supported in Markdown
+          // fenced code blocks.
+          Text.consume_front(".");
+          // Language is alphanumeric or '+'.
+          CodeLang = Text.take_while([](char C) {
+                           return llvm::isAlnum(C) || C == '+';
+                         })
+                         .str();
+          // Skip the first line for the verbatim text.
+          ++FirstLine;
+        }
+      }
+    }
+
+    std::string CodeBlockText;
+
+    for (const auto *LI = FirstLine; LI != VB->child_end(); ++LI) {
+      if (const auto *Line = cast<comments::VerbatimBlockLineComment>(*LI)) {
+        CodeBlockText += Line->getText().str() + "\n";
+      }
+    }
+
+    Out.addCodeBlock(CodeBlockText, CodeLang);
+  }
+
   void visitVerbatimBlockComment(const comments::VerbatimBlockComment *VB) {
+    // The \\code command is a special verbatim block command which we handle
+    // separately.
+    if (VB->getCommandID() == comments::CommandTraits::KCI_code) {
+      visitCodeCommand(VB);
+      return;
+    }
+
     commandToMarkup(Out.addParagraph(), VB->getCommandName(Traits),
                     VB->getCommandMarker(), "");
 
@@ -292,6 +339,110 @@ class BlockCommentToMarkupDocument
   }
 };
 
+void SymbolDocCommentVisitor::preprocessDocumentation(StringRef Doc) {
+  enum State {
+    Normal,
+    FencedCodeblock,
+  } State = Normal;
+  std::string CodeFence;
+
+  llvm::raw_string_ostream OS(CommentWithMarkers);
+
+  // The documentation string is processed line by line.
+  // The raw documentation string does not contain the comment markers
+  // (e.g. /// or /** */).
+  // But the comment lexer expects doxygen markers, so add them back.
+  // We need to use the /// style doxygen markers because the comment could
+  // contain the closing tag "*/" of a C Style "/** */" comment
+  // which would break the parsing if we would just enclose the comment text
+  // with "/** */".
+
+  // Escape doxygen commands inside markdown inline code spans.
+  // This is required to not let the doxygen parser interpret them as
+  // commands.
+  // Note: This is a heuristic which may fail in some cases.
+  bool InCodeSpan = false;
+
+  llvm::StringRef Line, Rest;
+  for (std::tie(Line, Rest) = Doc.split('\n'); !(Line.empty() && Rest.empty());
+       std::tie(Line, Rest) = Rest.split('\n')) {
+
+    // Detect code fence (``` or ~~~)
+    if (State == Normal) {
+      llvm::StringRef Trimmed = Line.ltrim();
+      if (Trimmed.starts_with("```") || Trimmed.starts_with("~~~")) {
+        // https://www.doxygen.nl/manual/markdown.html#md_fenced
+        CodeFence =
+            Trimmed.take_while([](char C) { return C == '`' || C == '~'; })
+                .str();
+        // Try to detect language: first word after fence. Could also be
+        // enclosed in {}
+        llvm::StringRef AfterFence =
+            Trimmed.drop_front(CodeFence.size()).ltrim();
+        // ignore '{' at the beginning of the language name to not duplicate it
+        // for the doxygen command
+        AfterFence.consume_front("{");
+        // The name is alphanumeric or '.' or '+'
+        StringRef CodeLang = AfterFence.take_while(
+            [](char C) { return llvm::isAlnum(C) || C == '.' || C == '+'; });
+
+        OS << "///@code";
+
+        if (!CodeLang.empty())
+          OS << "{" << CodeLang.str() << "}";
+
+        OS << "\n";
+
+        State = FencedCodeblock;
+        continue;
+      }
+
+      // FIXME: handle indented code blocks too?
+      // In doxygen, the indentation which triggers a code block depends on the
+      // indentation of the previous paragraph.
+      // https://www.doxygen.nl/manual/markdown.html#mddox_code_blocks
+    } else if (State == FencedCodeblock) {
+      // End of code fence
+      if (Line.ltrim().starts_with(CodeFence)) {
+        OS << "///@endcode\n";
+        State = Normal;
+        continue;
+      }
+      OS << "///" << Line << "\n";
+      continue;
+    }
+
+    // Normal line preprocessing (add doxygen markers, handle escaping)
+    OS << "///";
+
+    if (Line.empty() || Line.trim().empty()) {
+      OS << "\n";
+      // Empty lines reset the InCodeSpan state.
+      InCodeSpan = false;
+      continue;
+    }
+
+    if (Line.starts_with("<"))
+      // A comment line starting with '///<' is treated as a doxygen
+      // command. To avoid this, we add a space before the '<'.
+      OS << ' ';
+
+    for (char C : Line) {
+      if (C == '`')
+        InCodeSpan = !InCodeSpan;
+      else if (InCodeSpan && (C == '@' || C == '\\'))
+        OS << '\\';
+      OS << C;
+    }
+
+    OS << "\n";
+  }
+
+  // Close any unclosed code block
+  if (State == FencedCodeblock)
+    OS << "///@endcode\n";
+}
+
 void SymbolDocCommentVisitor::visitBlockCommandComment(
     const comments::BlockCommandComment *B) {
   switch (B->getCommandID()) {
diff --git a/clang-tools-extra/clangd/SymbolDocumentation.h 
b/clang-tools-extra/clangd/SymbolDocumentation.h
index 1d48ee6d4d3f9..825c9a296df70 100644
--- a/clang-tools-extra/clangd/SymbolDocumentation.h
+++ b/clang-tools-extra/clangd/SymbolDocumentation.h
@@ -21,6 +21,7 @@
 #include "clang/AST/CommentSema.h"
 #include "clang/AST/CommentVisitor.h"
 #include "clang/Basic/SourceManager.h"
+#include "llvm/ADT/StringRef.h"
 #include "llvm/Support/raw_ostream.h"
 #include <string>
 
@@ -51,31 +52,8 @@ class SymbolDocCommentVisitor
     CommentWithMarkers.reserve(Documentation.size() +
                                Documentation.count('\n') * 3);
 
-    // The comment lexer expects doxygen markers, so add them back.
-    // We need to use the /// style doxygen markers because the comment could
-    // contain the closing the closing tag "*/" of a C Style "/** */" comment
-    // which would break the parsing if we would just enclose the comment text
-    // with "/** */".
-    CommentWithMarkers = "///";
-    bool NewLine = true;
-    for (char C : Documentation) {
-      if (C == '\n') {
-        CommentWithMarkers += "\n///";
-        NewLine = true;
-      } else {
-        if (NewLine && (C == '<')) {
-          // A comment line starting with '///<' is treated as a doxygen
-          // comment. Therefore add a space to separate the '<' from the 
comment
-          // marker. This allows to parse html tags at the beginning of a line
-          // and the escape marker prevents adding the artificial space in the
-          // markup documentation. The extra space will not be rendered, since
-          // we render it as markdown.
-          CommentWithMarkers += ' ';
-        }
-        CommentWithMarkers += C;
-        NewLine = false;
-      }
-    }
+    preprocessDocumentation(Documentation);
+
     SourceManagerForFile SourceMgrForFile("mock_file.cpp", CommentWithMarkers);
 
     SourceManager &SourceMgr = SourceMgrForFile.get();
@@ -149,6 +127,27 @@ class SymbolDocCommentVisitor
     TemplateParameters[TP->getParamNameAsWritten()] = std::move(TP);
   }
 
+  /// \brief Preprocesses the raw documentation string to prepare it for 
doxygen
+  /// parsing.
+  ///
+  /// This is a workaround to provide better support for markdown in
+  /// doxygen. Clang's doxygen parser e.g. does not handle markdown code 
blocks.
+  ///
+  /// The documentation string is preprocessed to replace some markdown
+  /// constructs with parsable doxygen commands. E.g. markdown code blocks are
+  /// replaced with doxygen \\code{.lang} ...
+  /// \\endcode blocks.
+  ///
+  /// Additionally, potential doxygen commands inside markdown
+  /// inline code spans are escaped to avoid that doxygen tries to interpret
+  /// them as commands.
+  ///
+  /// \note Although this is a workaround, it is very similar to what
+  /// doxygen itself does for markdown. In doxygen, the first parsing step is
+  /// also a markdown preprocessing step.
+  /// See https://www.doxygen.nl/manual/markdown.html
+  void preprocessDocumentation(StringRef Doc);
+
 private:
   comments::CommandTraits Traits;
   llvm::BumpPtrAllocator Allocator;
diff --git a/clang-tools-extra/clangd/support/Markup.cpp 
b/clang-tools-extra/clangd/support/Markup.cpp
index 89bdc656d440f..304917de252bf 100644
--- a/clang-tools-extra/clangd/support/Markup.cpp
+++ b/clang-tools-extra/clangd/support/Markup.cpp
@@ -199,10 +199,16 @@ bool needsLeadingEscape(char C, llvm::StringRef Before, 
llvm::StringRef After,
   return needsLeadingEscapeMarkdown(C, After);
 }
 
-/// Escape a markdown text block.
+/// \brief Render text for markdown output.
+///
 /// If \p EscapeMarkdown is true it ensures the punctuation will not introduce
 /// any of the markdown constructs.
+///
 /// Else, markdown syntax is not escaped, only HTML tags and entities.
+/// HTML is escaped because usually clients do not support HTML rendering by
+/// default. Passing unescaped HTML will therefore often result in not showing
+/// the HTML at all.
+/// \note In markdown code spans, we do not escape anything.
 std::string renderText(llvm::StringRef Input, bool StartsLine,
                        bool EscapeMarkdown) {
   std::string R;
@@ -213,6 +219,10 @@ std::string renderText(llvm::StringRef Input, bool 
StartsLine,
 
   bool IsFirstLine = true;
 
+  // Inside markdown code spans, we do not escape anything when EscapeMarkdown
+  // is false.
+  bool InCodeSpan = false;
+
   for (std::tie(Line, Rest) = Input.split('\n');
        !(Line.empty() && Rest.empty());
        std::tie(Line, Rest) = Rest.split('\n')) {
@@ -226,12 +236,27 @@ std::string renderText(llvm::StringRef Input, bool 
StartsLine,
       R.append(LeadingSpaces);
     }
 
+    // Handle the case where the user escaped a character themselves.
+    // This is relevant for markdown code spans if EscapeMarkdown is false,
+    // because if the user escaped a backtick, we must treat the enclosed text
+    // as normal markdown text.
+    bool UserEscape = false;
     for (unsigned I = LeadingSpaces.size(); I < Line.size(); ++I) {
-      if (needsLeadingEscape(Line[I], Line.substr(LeadingSpaces.size(), I),
+
+      if (!EscapeMarkdown && !UserEscape && Line[I] == '`')
+        InCodeSpan = !InCodeSpan;
+
+      if (!InCodeSpan &&
+          needsLeadingEscape(Line[I], Line.substr(LeadingSpaces.size(), I),
                              Line.substr(I + 1), StartsLineIntern,
                              EscapeMarkdown))
         R.push_back('\\');
       R.push_back(Line[I]);
+
+      if (Line[I] == '\\')
+        UserEscape = !UserEscape;
+      else
+        UserEscape = false;
     }
 
     IsFirstLine = false;
diff --git a/clang-tools-extra/clangd/unittests/SymbolDocumentationTests.cpp 
b/clang-tools-extra/clangd/unittests/SymbolDocumentationTests.cpp
index ba692399a7093..a2e7f00c933a7 100644
--- a/clang-tools-extra/clangd/unittests/SymbolDocumentationTests.cpp
+++ b/clang-tools-extra/clangd/unittests/SymbolDocumentationTests.cpp
@@ -317,5 +317,398 @@ TEST(SymbolDocumentation, RetvalCommand) {
   }
 }
 
+TEST(SymbolDocumentation, DoxygenCodeBlocks) {
+  CommentOptions CommentOpts;
+
+  struct Case {
+    llvm::StringRef Documentation;
+    llvm::StringRef ExpectedRenderEscapedMarkdown;
+    llvm::StringRef ExpectedRenderMarkdown;
+    llvm::StringRef ExpectedRenderPlainText;
+  } Cases[] = {
+      {R"(@code
+int code() { return 0; }
+@endcode
+@code{.cpp}
+int code_lang() { return 0; }
+@endcode
+@code{.c++}
+int code_lang_plus() { return 0; }
+@endcode
+@code{.py}
+class A:
+    pass
+@endcode
+@code{nolang}
+class B:
+    pass
+@endcode)",
+       R"(```
+int code() { return 0; }
+```
+
+```cpp
+int code_lang() { return 0; }
+```
+
+```c++
+int code_lang_plus() { return 0; }
+```
+
+```py
+class A:
+    pass
+```
+
+```nolang
+class B:
+    pass
+```)",
+       R"(```
+int code() { return 0; }
+```
+
+```cpp
+int code_lang() { return 0; }
+```
+
+```c++
+int code_lang_plus() { return 0; }
+```
+
+```py
+class A:
+    pass
+```
+
+```nolang
+class B:
+    pass
+```)",
+       R"(int code() { return 0; }
+
+int code_lang() { return 0; }
+
+int code_lang_plus() { return 0; }
+
+class A:
+    pass
+
+class B:
+    pass)"},
+  };
+  for (const auto &C : Cases) {
+    markup::Document Doc;
+    SymbolDocCommentVisitor SymbolDoc(C.Documentation, CommentOpts);
+
+    SymbolDoc.docToMarkup(Doc);
+
+    EXPECT_EQ(Doc.asPlainText(), C.ExpectedRenderPlainText);
+    EXPECT_EQ(Doc.asMarkdown(), C.ExpectedRenderMarkdown);
+    EXPECT_EQ(Doc.asEscapedMarkdown(), C.ExpectedRenderEscapedMarkdown);
+  }
+}
+
+TEST(SymbolDocumentation, MarkdownCodeBlocks) {
+  CommentOptions CommentOpts;
+
+  struct Case {
+    llvm::StringRef Documentation;
+    llvm::StringRef ExpectedRenderEscapedMarkdown;
+    llvm::StringRef ExpectedRenderMarkdown;
+    llvm::StringRef ExpectedRenderPlainText;
+  } Cases[] = {
+      {R"(```
+int backticks() { return 0; }
+```
+```cpp
+int backticks_lang() { return 0; }
+```
+```c++
+int backticks_lang_plus() { return 0; }
+```
+~~~
+int tilde() { return 0; }
+~~~
+~~~~~~~~~~~~~~~~~~~~~~~~
+int tilde_many() { return 0; }
+~~~~~~~~~~~~~~~~~~~~~~~~
+~~~~~~~~~~~~~~~~~~~~~~~~{.c++}
+int tilde_many_lang() { return 0; }
+~~~~~~~~~~~~~~~~~~~~~~~~
+```py
+class A:
+    pass
+```
+```python
+class B:
+    pass
+```
+~~~{.python}
+class C:
+    pass
+~~~
+)",
+       R"(```
+int backticks() { return 0; }
+```
+
+```cpp
+int backticks_lang() { return 0; }
+```
+
+```c++
+int backticks_lang_plus() { return 0; }
+```
+
+```
+int tilde() { return 0; }
+```
+
+```
+int tilde_many() { return 0; }
+```
+
+```c++
+int tilde_many_lang() { return 0; }
+```
+
+```py
+class A:
+    pass
+```
+
+```python
+class B:
+    pass
+```
+
+```python
+class C:
+    pass
+```)",
+       R"(```
+int backticks() { return 0; }
+```
+
+```cpp
+int backticks_lang() { return 0; }
+```
+
+```c++
+int backticks_lang_plus() { return 0; }
+```
+
+```
+int tilde() { return 0; }
+```
+
+```
+int tilde_many() { return 0; }
+```
+
+```c++
+int tilde_many_lang() { return 0; }
+```
+
+```py
+class A:
+    pass
+```
+
+```python
+class B:
+    pass
+```
+
+```python
+class C:
+    pass
+```)",
+       R"(int backticks() { return 0; }
+
+int backticks_lang() { return 0; }
+
+int backticks_lang_plus() { return 0; }
+
+int tilde() { return 0; }
+
+int tilde_many() { return 0; }
+
+int tilde_many_lang() { return 0; }
+
+class A:
+    pass
+
+class B:
+    pass
+
+class C:
+    pass)"},
+      {R"(```
+// this code block is missing end backticks
+
+)",
+       R"(```
+// this code block is missing end backticks
+```)",
+       R"(```
+// this code block is missing end backticks
+```)",
+       R"(// this code block is missing end backticks)"},
+  };
+  for (const auto &C : Cases) {
+    markup::Document Doc;
+    SymbolDocCommentVisitor SymbolDoc(C.Documentation, CommentOpts);
+
+    SymbolDoc.docToMarkup(Doc);
+
+    EXPECT_EQ(Doc.asPlainText(), C.ExpectedRenderPlainText);
+    EXPECT_EQ(Doc.asMarkdown(), C.ExpectedRenderMarkdown);
+    EXPECT_EQ(Doc.asEscapedMarkdown(), C.ExpectedRenderEscapedMarkdown);
+  }
+}
+
+TEST(SymbolDocumentation, MarkdownCodeBlocksSeparation) {
+  CommentOptions CommentOpts;
+
+  struct Case {
+    llvm::StringRef Documentation;
+    llvm::StringRef ExpectedRenderEscapedMarkdown;
+    llvm::StringRef ExpectedRenderMarkdown;
+    llvm::StringRef ExpectedRenderPlainText;
+  } Cases[] = {
+      {R"(@note Show that code blocks are correctly separated
+```
+/// Without the markdown preprocessing, this line and the line above would be 
part of the @note paragraph.
+
+/// With preprocessing, the code block is correctly separated from the @note 
paragraph.
+/// Also note that without preprocessing, all doxygen commands inside code 
blocks, like @p would be incorrectly interpreted.
+int function() { return 0; }
+```)",
+       R"(\*\*Note:\*\*  
+Show that code blocks are correctly separated
+
+---
+```
+/// Without the markdown preprocessing, this line and the line above would be 
part of the @note paragraph.
+
+/// With preprocessing, the code block is correctly separated from the @note 
paragraph.
+/// Also note that without preprocessing, all doxygen commands inside code 
blocks, like @p would be incorrectly interpreted.
+int function() { return 0; }
+```)",
+       R"(**Note:**  
+Show that code blocks are correctly separated
+
+---
+```
+/// Without the markdown preprocessing, this line and the line above would be 
part of the @note paragraph.
+
+/// With preprocessing, the code block is correctly separated from the @note 
paragraph.
+/// Also note that without preprocessing, all doxygen commands inside code 
blocks, like @p would be incorrectly interpreted.
+int function() { return 0; }
+```)",
+       R"(**Note:**
+Show that code blocks are correctly separated
+
+/// Without the markdown preprocessing, this line and the line above would be 
part of the @note paragraph.
+
+/// With preprocessing, the code block is correctly separated from the @note 
paragraph.
+/// Also note that without preprocessing, all doxygen commands inside code 
blocks, like @p would be incorrectly interpreted.
+int function() { return 0; })"},
+      {R"(@note Show that code blocks are correctly separated
+~~~~~~~~~
+/// Without the markdown preprocessing, this line and the line above would be 
part of the @note paragraph.
+
+/// With preprocessing, the code block is correctly separated from the @note 
paragraph.
+/// Also note that without preprocessing, all doxygen commands inside code 
blocks, like @p would be incorrectly interpreted.
+int function() { return 0; }
+~~~~~~~~~)",
+       R"(\*\*Note:\*\*  
+Show that code blocks are correctly separated
+
+---
+```
+/// Without the markdown preprocessing, this line and the line above would be 
part of the @note paragraph.
+
+/// With preprocessing, the code block is correctly separated from the @note 
paragraph.
+/// Also note that without preprocessing, all doxygen commands inside code 
blocks, like @p would be incorrectly interpreted.
+int function() { return 0; }
+```)",
+       R"(**Note:**  
+Show that code blocks are correctly separated
+
+---
+```
+/// Without the markdown preprocessing, this line and the line above would be 
part of the @note paragraph.
+
+/// With preprocessing, the code block is correctly separated from the @note 
paragraph.
+/// Also note that without preprocessing, all doxygen commands inside code 
blocks, like @p would be incorrectly interpreted.
+int function() { return 0; }
+```)",
+       R"(**Note:**
+Show that code blocks are correctly separated
+
+/// Without the markdown preprocessing, this line and the line above would be 
part of the @note paragraph.
+
+/// With preprocessing, the code block is correctly separated from the @note 
paragraph.
+/// Also note that without preprocessing, all doxygen commands inside code 
blocks, like @p would be incorrectly interpreted.
+int function() { return 0; })"},
+  };
+  for (const auto &C : Cases) {
+    markup::Document Doc;
+    SymbolDocCommentVisitor SymbolDoc(C.Documentation, CommentOpts);
+
+    SymbolDoc.docToMarkup(Doc);
+
+    EXPECT_EQ(Doc.asPlainText(), C.ExpectedRenderPlainText);
+    EXPECT_EQ(Doc.asMarkdown(), C.ExpectedRenderMarkdown);
+    EXPECT_EQ(Doc.asEscapedMarkdown(), C.ExpectedRenderEscapedMarkdown);
+  }
+}
+
+TEST(SymbolDocumentation, MarkdownCodeSpans) {
+  CommentOptions CommentOpts;
+
+  struct Case {
+    llvm::StringRef Documentation;
+    llvm::StringRef ExpectedRenderEscapedMarkdown;
+    llvm::StringRef ExpectedRenderMarkdown;
+    llvm::StringRef ExpectedRenderPlainText;
+  } Cases[] = {
+      {R"(`this is a code span with @p and \c inside`)",
+       R"(\`this is a code span with @p and \\c inside\`)",
+       R"(`this is a code span with @p and \c inside`)",
+       R"(`this is a code span with @p and \c inside`)"},
+      {R"(<escaped> `<not-escaped>`)", R"(\<escaped> \`\<not-escaped>\`)",
+       R"(\<escaped> `<not-escaped>`)", R"(<escaped> `<not-escaped>`)"},
+      {R"(<escaped> \`<escaped> doxygen commands not parsed @p, \c, @note, 
\warning \`)",
+       R"(\<escaped> \\\`\<escaped> doxygen commands not parsed @p, \\c, 
@note, \\warning \\\`)",
+       R"(\<escaped> \`\<escaped> doxygen commands not parsed @p, \c, @note, 
\warning \`)",
+       R"(<escaped> \`<escaped> doxygen commands not parsed @p, \c, @note, 
\warning \`)"},
+      {R"(`multi
+line
+\c span`)",
+       R"(\`multi
+line
+\\c span\`)",
+       R"(`multi
+line
+\c span`)",
+       R"(`multi line
+\c span`)"},
+  };
+  for (const auto &C : Cases) {
+    markup::Document Doc;
+    SymbolDocCommentVisitor SymbolDoc(C.Documentation, CommentOpts);
+
+    SymbolDoc.docToMarkup(Doc);
+
+    EXPECT_EQ(Doc.asPlainText(), C.ExpectedRenderPlainText);
+    EXPECT_EQ(Doc.asMarkdown(), C.ExpectedRenderMarkdown);
+    EXPECT_EQ(Doc.asEscapedMarkdown(), C.ExpectedRenderEscapedMarkdown);
+  }
+}
+
 } // namespace clangd
 } // namespace clang

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang-tools-extra] Add proper handling of \note, \warning and \retval command + change render kind of command arguments. (PR #156365)

Reply via email to