llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-clangd @llvm/pr-subscribers-clang-tools-extra Author: None (tcottin) <details> <summary>Changes</summary> PR for some followup issues from clangd/clangd#<!-- -->529. `\note` and `\warning`: In the hover card, they are now displayed with heading and enclosing rulers. `\retval` commands: Each `\retval` command is now a bullet point under the return section of the hover card. --- Patch is 36.46 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/156365.diff 6 Files Affected: - (modified) clang-tools-extra/clangd/Hover.cpp (+2-4) - (modified) clang-tools-extra/clangd/SymbolDocumentation.cpp (+214-48) - (modified) clang-tools-extra/clangd/SymbolDocumentation.h (+31-51) - (modified) clang-tools-extra/clangd/support/Markup.cpp (+27-2) - (modified) clang-tools-extra/clangd/unittests/HoverTests.cpp (+39-4) - (modified) clang-tools-extra/clangd/unittests/SymbolDocumentationTests.cpp (+509-10) ``````````diff diff --git a/clang-tools-extra/clangd/Hover.cpp b/clang-tools-extra/clangd/Hover.cpp index 9eec322fe5963..3b9baed1c2838 100644 --- a/clang-tools-extra/clangd/Hover.cpp +++ b/clang-tools-extra/clangd/Hover.cpp @@ -1589,13 +1589,11 @@ markup::Document HoverInfo::presentDoxygen() const { P.appendText(" - "); SymbolDoc.returnToMarkup(P); } + + SymbolDoc.retvalsToMarkup(Output); Output.addRuler(); } - // add specially handled doxygen commands. - SymbolDoc.warningsToMarkup(Output); - SymbolDoc.notesToMarkup(Output); - // add any other documentation. SymbolDoc.docToMarkup(Output); diff --git a/clang-tools-extra/clangd/SymbolDocumentation.cpp b/clang-tools-extra/clangd/SymbolDocumentation.cpp index 9ae1ef3f828e0..ee67f9ae66443 100644 --- a/clang-tools-extra/clangd/SymbolDocumentation.cpp +++ b/clang-tools-extra/clangd/SymbolDocumentation.cpp @@ -13,6 +13,7 @@ #include "clang/AST/CommentCommandTraits.h" #include "clang/AST/CommentVisitor.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" namespace clang { @@ -34,10 +35,20 @@ void commandToMarkup(markup::Paragraph &Out, StringRef Command, StringRef Args) { Out.appendBoldText(commandMarkerAsString(CommandMarker) + Command.str()); Out.appendSpace(); - if (!Args.empty()) { - Out.appendEmphasizedText(Args.str()); + if (!Args.empty()) + Out.appendCode(Args.str()); +} + +template <typename T> std::string getArgText(const T *Command) { + std::string ArgText; + for (unsigned I = 0; I < Command->getNumArgs(); ++I) { + if (!ArgText.empty()) + ArgText += " "; + ArgText += Command->getArgText(I); } + return ArgText; } + } // namespace class ParagraphToMarkupDocument @@ -70,12 +81,7 @@ class ParagraphToMarkupDocument void visitInlineCommandComment(const comments::InlineCommandComment *C) { if (C->getNumArgs() > 0) { - std::string ArgText; - for (unsigned I = 0; I < C->getNumArgs(); ++I) { - if (!ArgText.empty()) - ArgText += " "; - ArgText += C->getArgText(I); - } + std::string ArgText = getArgText(C); switch (C->getRenderKind()) { case comments::InlineCommandRenderKind::Monospaced: @@ -158,10 +164,9 @@ class ParagraphToString void visitInlineCommandComment(const comments::InlineCommandComment *C) { Out << commandMarkerAsString(C->getCommandMarker()); Out << C->getCommandName(Traits); - if (C->getNumArgs() > 0) { - for (unsigned I = 0; I < C->getNumArgs(); ++I) - Out << " " << C->getArgText(I); - } + std::string ArgText = getArgText(C); + if (!ArgText.empty()) + Out << " " << ArgText; Out << " "; } @@ -210,16 +215,27 @@ class BlockCommentToMarkupDocument Traits) .visit(B->getParagraph()); break; + case comments::CommandTraits::KCI_note: + case comments::CommandTraits::KCI_warning: + commandToHeadedParagraph(B); + break; + case comments::CommandTraits::KCI_retval: { + // The \retval command describes the return value given as its single + // argument in the corresponding paragraph. + // Note: We know that we have exactly one argument but not if it has an + // associated paragraph. + auto &P = Out.addParagraph().appendCode(getArgText(B)); + if (B->getParagraph() && !B->getParagraph()->isWhitespace()) { + P.appendText(" - "); + ParagraphToMarkupDocument(P, Traits).visit(B->getParagraph()); + } + return; + } default: { // Some commands have arguments, like \throws. // The arguments are not part of the paragraph. // We need reconstruct them here. - std::string ArgText; - for (unsigned I = 0; I < B->getNumArgs(); ++I) { - if (!ArgText.empty()) - ArgText += " "; - ArgText += B->getArgText(I); - } + std::string ArgText = getArgText(B); auto &P = Out.addParagraph(); commandToMarkup(P, B->getCommandName(Traits), B->getCommandMarker(), ArgText); @@ -234,7 +250,53 @@ class BlockCommentToMarkupDocument } } + void visitCodeCommand(const comments::VerbatimBlockComment *VB) { + std::string CodeLang = ""; + auto *FirstLine = VB->child_begin(); + // The \\code command has an optional language argument. + // This argument is currently not parsed by the clang doxygen parser. + // Therefore we try to extract it from the first line of the verbatim + // block. + if (VB->getNumLines() > 0) { + if (const auto *Line = + cast<comments::VerbatimBlockLineComment>(*FirstLine)) { + llvm::StringRef Text = Line->getText(); + // Language is a single word enclosed in {}. + if (llvm::none_of(Text, llvm::isSpace) && Text.consume_front("{") && + Text.consume_back("}")) { + // drop a potential . since this is not supported in Markdown + // fenced code blocks. + Text.consume_front("."); + // Language is alphanumeric or '+'. + CodeLang = Text.take_while([](char C) { + return llvm::isAlnum(C) || C == '+'; + }) + .str(); + // Skip the first line for the verbatim text. + ++FirstLine; + } + } + } + + std::string CodeBlockText; + + for (const auto *LI = FirstLine; LI != VB->child_end(); ++LI) { + if (const auto *Line = cast<comments::VerbatimBlockLineComment>(*LI)) { + CodeBlockText += Line->getText().str() + "\n"; + } + } + + Out.addCodeBlock(CodeBlockText, CodeLang); + } + void visitVerbatimBlockComment(const comments::VerbatimBlockComment *VB) { + // The \\code command is a special verbatim block command which we handle + // separately. + if (VB->getCommandID() == comments::CommandTraits::KCI_code) { + visitCodeCommand(VB); + return; + } + commandToMarkup(Out.addParagraph(), VB->getCommandName(Traits), VB->getCommandMarker(), ""); @@ -262,8 +324,125 @@ class BlockCommentToMarkupDocument markup::Document &Out; const comments::CommandTraits &Traits; StringRef CommentEscapeMarker; + + /// Emphasize the given command in a paragraph. + /// Uses the command name with the first letter capitalized as the heading. + void commandToHeadedParagraph(const comments::BlockCommandComment *B) { + Out.addRuler(); + auto &P = Out.addParagraph(); + std::string Heading = B->getCommandName(Traits).slice(0, 1).upper() + + B->getCommandName(Traits).drop_front().str(); + P.appendBoldText(Heading + ":"); + P.appendText(" \n"); + ParagraphToMarkupDocument(P, Traits).visit(B->getParagraph()); + Out.addRuler(); + } }; +void SymbolDocCommentVisitor::preprocessDocumentation(StringRef Doc) { + enum State { + Normal, + FencedCodeblock, + } State = Normal; + std::string CodeFence; + + llvm::raw_string_ostream OS(CommentWithMarkers); + + // The documentation string is processed line by line. + // The raw documentation string does not contain the comment markers + // (e.g. /// or /** */). + // But the comment lexer expects doxygen markers, so add them back. + // We need to use the /// style doxygen markers because the comment could + // contain the closing tag "*/" of a C Style "/** */" comment + // which would break the parsing if we would just enclose the comment text + // with "/** */". + + // Escape doxygen commands inside markdown inline code spans. + // This is required to not let the doxygen parser interpret them as + // commands. + // Note: This is a heuristic which may fail in some cases. + bool InCodeSpan = false; + + llvm::StringRef Line, Rest; + for (std::tie(Line, Rest) = Doc.split('\n'); !(Line.empty() && Rest.empty()); + std::tie(Line, Rest) = Rest.split('\n')) { + + // Detect code fence (``` or ~~~) + if (State == Normal) { + llvm::StringRef Trimmed = Line.ltrim(); + if (Trimmed.starts_with("```") || Trimmed.starts_with("~~~")) { + // https://www.doxygen.nl/manual/markdown.html#md_fenced + CodeFence = + Trimmed.take_while([](char C) { return C == '`' || C == '~'; }) + .str(); + // Try to detect language: first word after fence. Could also be + // enclosed in {} + llvm::StringRef AfterFence = + Trimmed.drop_front(CodeFence.size()).ltrim(); + // ignore '{' at the beginning of the language name to not duplicate it + // for the doxygen command + AfterFence.consume_front("{"); + // The name is alphanumeric or '.' or '+' + StringRef CodeLang = AfterFence.take_while( + [](char C) { return llvm::isAlnum(C) || C == '.' || C == '+'; }); + + OS << "///@code"; + + if (!CodeLang.empty()) + OS << "{" << CodeLang.str() << "}"; + + OS << "\n"; + + State = FencedCodeblock; + continue; + } + + // FIXME: handle indented code blocks too? + // In doxygen, the indentation which triggers a code block depends on the + // indentation of the previous paragraph. + // https://www.doxygen.nl/manual/markdown.html#mddox_code_blocks + } else if (State == FencedCodeblock) { + // End of code fence + if (Line.ltrim().starts_with(CodeFence)) { + OS << "///@endcode\n"; + State = Normal; + continue; + } + OS << "///" << Line << "\n"; + continue; + } + + // Normal line preprocessing (add doxygen markers, handle escaping) + OS << "///"; + + if (Line.empty() || Line.trim().empty()) { + OS << "\n"; + // Empty lines reset the InCodeSpan state. + InCodeSpan = false; + continue; + } + + if (Line.starts_with("<")) + // A comment line starting with '///<' is treated as a doxygen + // command. To avoid this, we add a space before the '<'. + OS << ' '; + + for (char C : Line) { + if (C == '`') + InCodeSpan = !InCodeSpan; + else if (InCodeSpan && (C == '@' || C == '\\')) + OS << '\\'; + OS << C; + } + + OS << "\n"; + } + + // Close any unclosed code block + if (State == FencedCodeblock) + OS << "///@endcode\n"; +} + void SymbolDocCommentVisitor::visitBlockCommandComment( const comments::BlockCommandComment *B) { switch (B->getCommandID()) { @@ -282,36 +461,22 @@ void SymbolDocCommentVisitor::visitBlockCommandComment( } break; case comments::CommandTraits::KCI_retval: - RetvalParagraphs.push_back(B->getParagraph()); - return; - case comments::CommandTraits::KCI_warning: - WarningParagraphs.push_back(B->getParagraph()); - return; - case comments::CommandTraits::KCI_note: - NoteParagraphs.push_back(B->getParagraph()); + // Only consider retval commands having an argument. + // The argument contains the described return value which is needed to + // convert it to markup. + if (B->getNumArgs() == 1) + RetvalCommands.push_back(B); return; default: break; } - // For all other commands, we store them in the UnhandledCommands map. + // For all other commands, we store them in the BlockCommands map. // This allows us to keep the order of the comments. - UnhandledCommands[CommentPartIndex] = B; + BlockCommands[CommentPartIndex] = B; CommentPartIndex++; } -void SymbolDocCommentVisitor::paragraphsToMarkup( - markup::Document &Out, - const llvm::SmallVectorImpl<const comments::ParagraphComment *> &Paragraphs) - const { - if (Paragraphs.empty()) - return; - - for (const auto *P : Paragraphs) { - ParagraphToMarkupDocument(Out.addParagraph(), Traits).visit(P); - } -} - void SymbolDocCommentVisitor::briefToMarkup(markup::Paragraph &Out) const { if (!BriefParagraph) return; @@ -324,14 +489,6 @@ void SymbolDocCommentVisitor::returnToMarkup(markup::Paragraph &Out) const { ParagraphToMarkupDocument(Out, Traits).visit(ReturnParagraph); } -void SymbolDocCommentVisitor::notesToMarkup(markup::Document &Out) const { - paragraphsToMarkup(Out, NoteParagraphs); -} - -void SymbolDocCommentVisitor::warningsToMarkup(markup::Document &Out) const { - paragraphsToMarkup(Out, WarningParagraphs); -} - void SymbolDocCommentVisitor::parameterDocToMarkup( StringRef ParamName, markup::Paragraph &Out) const { if (ParamName.empty()) @@ -354,7 +511,7 @@ void SymbolDocCommentVisitor::parameterDocToString( void SymbolDocCommentVisitor::docToMarkup(markup::Document &Out) const { for (unsigned I = 0; I < CommentPartIndex; ++I) { - if (const auto *BC = UnhandledCommands.lookup(I)) { + if (const auto *BC = BlockCommands.lookup(I)) { BlockCommentToMarkupDocument(Out, Traits).visit(BC); } else if (const auto *P = FreeParagraphs.lookup(I)) { ParagraphToMarkupDocument(Out.addParagraph(), Traits).visit(P); @@ -382,5 +539,14 @@ void SymbolDocCommentVisitor::templateTypeParmDocToString( } } +void SymbolDocCommentVisitor::retvalsToMarkup(markup::Document &Out) const { + if (RetvalCommands.empty()) + return; + markup::BulletList &BL = Out.addBulletList(); + for (const auto *P : RetvalCommands) { + BlockCommentToMarkupDocument(BL.addItem(), Traits).visit(P); + } +} + } // namespace clangd } // namespace clang diff --git a/clang-tools-extra/clangd/SymbolDocumentation.h b/clang-tools-extra/clangd/SymbolDocumentation.h index b0d3428dfce20..825c9a296df70 100644 --- a/clang-tools-extra/clangd/SymbolDocumentation.h +++ b/clang-tools-extra/clangd/SymbolDocumentation.h @@ -21,6 +21,7 @@ #include "clang/AST/CommentSema.h" #include "clang/AST/CommentVisitor.h" #include "clang/Basic/SourceManager.h" +#include "llvm/ADT/StringRef.h" #include "llvm/Support/raw_ostream.h" #include <string> @@ -51,31 +52,8 @@ class SymbolDocCommentVisitor CommentWithMarkers.reserve(Documentation.size() + Documentation.count('\n') * 3); - // The comment lexer expects doxygen markers, so add them back. - // We need to use the /// style doxygen markers because the comment could - // contain the closing the closing tag "*/" of a C Style "/** */" comment - // which would break the parsing if we would just enclose the comment text - // with "/** */". - CommentWithMarkers = "///"; - bool NewLine = true; - for (char C : Documentation) { - if (C == '\n') { - CommentWithMarkers += "\n///"; - NewLine = true; - } else { - if (NewLine && (C == '<')) { - // A comment line starting with '///<' is treated as a doxygen - // comment. Therefore add a space to separate the '<' from the comment - // marker. This allows to parse html tags at the beginning of a line - // and the escape marker prevents adding the artificial space in the - // markup documentation. The extra space will not be rendered, since - // we render it as markdown. - CommentWithMarkers += ' '; - } - CommentWithMarkers += C; - NewLine = false; - } - } + preprocessDocumentation(Documentation); + SourceManagerForFile SourceMgrForFile("mock_file.cpp", CommentWithMarkers); SourceManager &SourceMgr = SourceMgrForFile.get(); @@ -114,22 +92,14 @@ class SymbolDocCommentVisitor bool hasReturnCommand() const { return ReturnParagraph; } - bool hasRetvalCommands() const { return !RetvalParagraphs.empty(); } - - bool hasNoteCommands() const { return !NoteParagraphs.empty(); } - - bool hasWarningCommands() const { return !WarningParagraphs.empty(); } - /// Converts all unhandled comment commands to a markup document. void docToMarkup(markup::Document &Out) const; /// Converts the "brief" command(s) to a markup document. void briefToMarkup(markup::Paragraph &Out) const; /// Converts the "return" command(s) to a markup document. void returnToMarkup(markup::Paragraph &Out) const; - /// Converts the "note" command(s) to a markup document. - void notesToMarkup(markup::Document &Out) const; - /// Converts the "warning" command(s) to a markup document. - void warningsToMarkup(markup::Document &Out) const; + /// Converts the "retval" command(s) to a markup document. + void retvalsToMarkup(markup::Document &Out) const; void visitBlockCommandComment(const comments::BlockCommandComment *B); @@ -157,6 +127,27 @@ class SymbolDocCommentVisitor TemplateParameters[TP->getParamNameAsWritten()] = std::move(TP); } + /// \brief Preprocesses the raw documentation string to prepare it for doxygen + /// parsing. + /// + /// This is a workaround to provide better support for markdown in + /// doxygen. Clang's doxygen parser e.g. does not handle markdown code blocks. + /// + /// The documentation string is preprocessed to replace some markdown + /// constructs with parsable doxygen commands. E.g. markdown code blocks are + /// replaced with doxygen \\code{.lang} ... + /// \\endcode blocks. + /// + /// Additionally, potential doxygen commands inside markdown + /// inline code spans are escaped to avoid that doxygen tries to interpret + /// them as commands. + /// + /// \note Although this is a workaround, it is very similar to what + /// doxygen itself does for markdown. In doxygen, the first parsing step is + /// also a markdown preprocessing step. + /// See https://www.doxygen.nl/manual/markdown.html + void preprocessDocumentation(StringRef Doc); + private: comments::CommandTraits Traits; llvm::BumpPtrAllocator Allocator; @@ -173,19 +164,13 @@ class SymbolDocCommentVisitor /// Paragraph of the "return" command. const comments::ParagraphComment *ReturnParagraph = nullptr; - /// Paragraph(s) of the "note" command(s) - llvm::SmallVector<const comments::ParagraphComment *> RetvalParagraphs; - - /// Paragraph(s) of the "note" command(s) - llvm::SmallVector<const comments::ParagraphComment *> NoteParagraphs; + /// All the "retval" command(s) + llvm::SmallVector<const comments::BlockCommandComment *> RetvalCommands; - /// Paragraph(s) of the "warning" command(s) - llvm::SmallVector<const comments::ParagraphComment *> WarningParagraphs; - - /// All the paragraphs we don't have any special handling for, - /// e.g. "details". + /// All the parsed doxygen block commands. + /// They might have special handling internally like \\note or \\warning llvm::SmallDenseMap<unsigned, const comments::BlockCommandComment *> - UnhandledCommands; + BlockCommands; /// Parsed paragaph(s) of the "param" comamnd(s) llvm::SmallDenseMap<StringRef, const comments::ParamCommandComment *> @@ -198,11 +183,6 @@ class SymbolDocCommentVisitor /// All "free" text paragraphs. llvm::SmallDenseMap<unsigned, const comments::ParagraphComment *> FreeParagraphs; - - void paragraphsToMarkup( - markup::Document &Out, - const llvm::SmallVectorImpl<const comments::ParagraphComment *> - &Paragraphs) const; }; } // namespace clangd diff --git a/clang-tools-extra/clangd/support/Markup.cpp b/clang-tools-extra/clangd/support/Markup.cpp index 89bdc656d440f..304917de252bf 100644 --- a/clang-tools-extra/clangd/support/Markup.cpp +++ b/clang-tools-extra/clangd/support/Markup.cpp @@ -199,10 +199,16 @@ bool needsLeadingEscape(char C, llvm::StringRef Before, llvm::StringRef After, return needsLeadingEscapeMarkdown(C, After); } -/// Escape a markdown text block. +/// \brief Render text for markdown output. +/// /// If \p EscapeMarkdown is true it ensures the punctuation will not introduce /// any of the markdown constructs. +/// /// Else, markdown syntax is not escaped, only HTML tags and entities. +/// HTML is escaped because usually clients do not support HTML rendering by +/// default. Passing unescaped HTML will therefore often result in not showing +/// the HTML at all. +/// \note In markdown code spans, we do not escape anything. std::string renderText(llvm::StringRef Input, bool StartsLine, bool EscapeMarkdown) { std::string R; @@ -213,6 +219,10 @@ std::string renderText(llvm::StringRef Input, bool StartsLine, bool IsFirstLine = true; + // Inside markdown code spans, we do not escape anything when EscapeMarkdown + // is false. + bool InCodeSpan = false; + for (std::tie(L... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/156365 _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
