tom-anders created this revision.
Herald added subscribers: kadircet, arphaman.
Herald added a project: All.
tom-anders added reviewers: nridge, sammccall, kadircet.
tom-anders published this revision for review.
Herald added subscribers: cfe-commits, llvm-commits, MaskRay, ilya-biryukov.
Herald added projects: LLVM, clang-tools-extra.
1/3: Add SymbolDocumentation class to parse Doxygen comments
This commit just adds and tests the a new class for doxygen parsing.
Consumption in Hover and Index will be added in a follow-up commit.
Repository:
rG LLVM Github Monorepo
https://reviews.llvm.org/D134130
Files:
clang-tools-extra/clangd/CMakeLists.txt
clang-tools-extra/clangd/SymbolDocumentation.cpp
clang-tools-extra/clangd/SymbolDocumentation.h
clang-tools-extra/clangd/unittests/CodeCompletionStringsTests.cpp
clang-tools-extra/clangd/unittests/SymbolDocumentationMatchers.h
llvm/utils/gn/secondary/clang-tools-extra/clangd/BUILD.gn
Index: llvm/utils/gn/secondary/clang-tools-extra/clangd/BUILD.gn
===================================================================
--- llvm/utils/gn/secondary/clang-tools-extra/clangd/BUILD.gn
+++ llvm/utils/gn/secondary/clang-tools-extra/clangd/BUILD.gn
@@ -117,6 +117,7 @@
"SemanticHighlighting.cpp",
"SemanticSelection.cpp",
"SourceCode.cpp",
+ "SymbolDocumentation.cpp"
"TUScheduler.cpp",
"TidyProvider.cpp",
"URI.cpp",
Index: clang-tools-extra/clangd/unittests/SymbolDocumentationMatchers.h
===================================================================
--- /dev/null
+++ clang-tools-extra/clangd/unittests/SymbolDocumentationMatchers.h
@@ -0,0 +1,51 @@
+//===-- SymbolDocumentationMatchers.h ---------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// GMock matchers for the SymbolDocumentation class
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_SYMBOLDOCUMENTATION_MATCHERS_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_SYMBOLDOCUMENTATION_MATCHERS_H
+#include "SymbolDocumentation.h"
+#include "gmock/gmock.h"
+
+namespace clang {
+namespace clangd {
+
+template <class S>
+testing::Matcher<SymbolDocumentation<S>>
+matchesDoc(const SymbolDocumentation<S> &Expected) {
+ using namespace ::testing;
+
+ std::vector<Matcher<ParameterDocumentation<S>>> ParamMatchers;
+ for (const auto &P : Expected.Parameters)
+ ParamMatchers.push_back(
+ AllOf(Field("Name", &ParameterDocumentation<S>::Name, P.Name),
+ Field("Description", &ParameterDocumentation<S>::Description,
+ P.Description)));
+
+ return AllOf(
+ Field("Brief", &SymbolDocumentation<S>::Brief, Expected.Brief),
+ Field("Returns", &SymbolDocumentation<S>::Returns, Expected.Returns),
+ Field("Notes", &SymbolDocumentation<S>::Notes,
+ ElementsAreArray(Expected.Notes)),
+ Field("Warnings", &SymbolDocumentation<S>::Warnings,
+ ElementsAreArray(Expected.Warnings)),
+ Field("Parameters", &SymbolDocumentation<S>::Parameters,
+ ElementsAreArray(ParamMatchers)),
+ Field("Description", &SymbolDocumentation<S>::Description,
+ Expected.Description),
+ Field("CommentText", &SymbolDocumentation<S>::CommentText,
+ Expected.CommentText));
+}
+
+} // namespace clangd
+} // namespace clang
+
+#endif
Index: clang-tools-extra/clangd/unittests/CodeCompletionStringsTests.cpp
===================================================================
--- clang-tools-extra/clangd/unittests/CodeCompletionStringsTests.cpp
+++ clang-tools-extra/clangd/unittests/CodeCompletionStringsTests.cpp
@@ -7,6 +7,7 @@
//===----------------------------------------------------------------------===//
#include "CodeCompletionStrings.h"
+#include "SymbolDocumentationMatchers.h"
#include "TestTU.h"
#include "clang/Sema/CodeCompleteConsumer.h"
#include "gmock/gmock.h"
@@ -65,6 +66,90 @@
getDeclComment(AST.getASTContext(), findDecl(AST, "X")));
}
+TEST_F(CompletionStringTest, DoxygenParsing) {
+ struct {
+ const char *const Code;
+ const std::function<void(SymbolDocumentationOwned &)> ExpectedBuilder;
+ } Cases[] = {
+ {R"cpp(
+ // Hello world
+ void foo();
+ )cpp",
+ [](SymbolDocumentationOwned &Doc) { Doc.Description = "Hello world"; }},
+ {R"cpp(
+ /*!
+ * \brief brief
+ * \details details
+ */
+ void foo();
+ )cpp",
+ [](SymbolDocumentationOwned &Doc) {
+ Doc.Brief = "brief";
+ Doc.Description = "\\details details";
+ }},
+ {R"cpp(
+ /**
+ * @brief brief
+ * @details details
+ * @see somewhere else
+ */
+ void foo();
+ )cpp",
+ [](SymbolDocumentationOwned &Doc) {
+ Doc.Brief = "brief";
+ Doc.Description = "@details details\n\n@see somewhere else";
+ }},
+ {R"cpp(
+ /*!
+ * @brief brief
+ * @details details
+ * @param foo foodoc
+ * @throws ball at hoop
+ * @note note1
+ * @warning warning1
+ * @note note2
+ * @warning warning2
+ * @param bar bardoc
+ * @return something
+ */
+ void foo();
+ )cpp",
+ [](SymbolDocumentationOwned &Doc) {
+ Doc.Brief = "brief";
+ Doc.Description = "@details details\n\n@throws ball at hoop";
+ Doc.Parameters = {{"foo", "foodoc"}, {"bar", "bardoc"}};
+ Doc.Warnings = {"warning1", "warning2"};
+ Doc.Notes = {"note1", "note2"};
+ Doc.Returns = "something";
+ }},
+ {R"cpp(
+ /// @brief Here's \b bold \e italic and \p code
+ int foo;
+ )cpp",
+ [](SymbolDocumentationOwned &Doc) {
+ Doc.Brief = "Here's **bold** *italic* and `code`";
+ }}};
+
+ for (const auto &Case : Cases) {
+ SCOPED_TRACE(Case.Code);
+
+ auto TU = TestTU::withCode(Case.Code);
+ auto AST = TU.build();
+ auto &Ctx = AST.getASTContext();
+ const auto &Decl = findDecl(AST, "foo");
+
+ SymbolDocumentationOwned ExpectedDoc;
+ ExpectedDoc.CommentText =
+ getCompletionComment(Ctx, &Decl)
+ ->getFormattedText(Ctx.getSourceManager(), Ctx.getDiagnostics());
+ Case.ExpectedBuilder(ExpectedDoc);
+
+ const RawComment *RC = getCompletionComment(Ctx, &Decl);
+ EXPECT_THAT(RC, testing::NotNull());
+ EXPECT_THAT(parseDoxygenComment(*RC, Ctx, &Decl), matchesDoc(ExpectedDoc));
+ }
+}
+
TEST_F(CompletionStringTest, MultipleAnnotations) {
Builder.AddAnnotation("Ano1");
Builder.AddAnnotation("Ano2");
Index: clang-tools-extra/clangd/SymbolDocumentation.h
===================================================================
--- /dev/null
+++ clang-tools-extra/clangd/SymbolDocumentation.h
@@ -0,0 +1,101 @@
+//===--- SymbolDocumentation.h ==---------------------------------*- C++-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Class to parse doxygen comments into a flat structure for consumption
+// in e.g. Hover and Code Completion
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_SYMBOLDOCUMENTATION_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_SYMBOLDOCUMENTATION_H
+
+#include "clang/AST/ASTContext.h"
+#include "clang/AST/Comment.h"
+#include "clang/AST/CommentVisitor.h"
+
+namespace clang {
+namespace clangd {
+
+template <class String> struct ParameterDocumentation {
+ String Name;
+ String Description;
+
+ ParameterDocumentation<llvm::StringRef> toRef() const;
+ ParameterDocumentation<std::string> toOwned() const;
+};
+
+using ParameterDocumentationRef = ParameterDocumentation<llvm::StringRef>;
+using ParameterDocumentationOwned = ParameterDocumentation<std::string>;
+
+/// @brief Represents a parsed doxygen comment.
+/// @details Currently there's special handling for the "brief", "param"
+/// "returns", "note" and "warning" commands. The content of all other
+/// paragraphs will be appended to the #Description field.
+/// If you're only interested in the full comment, but with comment
+/// markers stripped, use the #CommentText field.
+/// \tparam String When built from a declaration, we're building the strings
+/// by ourselves, so in this case String==std::string.
+/// However, when storing the contents of this class in the index, we need to
+/// use llvm::StringRef. To connvert between std::string and llvm::StringRef
+/// versions of this class, use toRef() and toOwned().
+template <class String> class SymbolDocumentation {
+public:
+ friend class CommentToSymbolDocumentation;
+
+ static SymbolDocumentation<String> descriptionOnly(String &&Description) {
+ SymbolDocumentation<String> Doc;
+ Doc.Description = Description;
+ Doc.CommentText = Description;
+ return Doc;
+ }
+
+ /// Constructs with all fields as empty strings/vectors.
+ SymbolDocumentation() = default;
+
+ SymbolDocumentation<llvm::StringRef> toRef() const;
+ SymbolDocumentation<std::string> toOwned() const;
+
+ bool empty() const { return CommentText.empty(); }
+
+ /// Paragraph of the "brief" command.
+ String Brief;
+
+ /// Paragraph of the "return" command.
+ String Returns;
+
+ /// Paragraph(s) of the "note" command(s)
+ llvm::SmallVector<String, 1> Notes;
+ /// Paragraph(s) of the "warning" command(s)
+ llvm::SmallVector<String, 1> Warnings;
+
+ /// Parsed paragaph(s) of the "param" comamnd(s)
+ llvm::SmallVector<ParameterDocumentation<String>> Parameters;
+
+ /// All the paragraphs we don't have any special handling for,
+ /// e.g. "details".
+ String Description;
+
+ /// The full documentation comment with comment markers stripped.
+ /// See clang::RawComment::getFormattedText() for the detailed
+ /// explanation of how the comment text is transformed.
+ String CommentText;
+};
+
+using SymbolDocumentationOwned = SymbolDocumentation<std::string>;
+using SymbolDocumentationRef = SymbolDocumentation<llvm::StringRef>;
+
+/// @param RC the comment to parse
+/// @param D the declaration that \p RC belongs to
+/// @return parsed doxgen documentation.
+SymbolDocumentationOwned
+parseDoxygenComment(const RawComment &RC, const ASTContext &Ctx, const Decl *D);
+
+} // namespace clangd
+} // namespace clang
+
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_SYMBOLDOCUMENTATION_H
Index: clang-tools-extra/clangd/SymbolDocumentation.cpp
===================================================================
--- /dev/null
+++ clang-tools-extra/clangd/SymbolDocumentation.cpp
@@ -0,0 +1,212 @@
+//===--- SymbolDocumentation.cpp ==-------------------------------*- C++-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "SymbolDocumentation.h"
+#include "clang/AST/CommentVisitor.h"
+#include "llvm/Support/JSON.h"
+
+namespace clang {
+namespace clangd {
+
+void ensureUTF8(std::string &Str) {
+ if (!llvm::json::isUTF8(Str))
+ Str = llvm::json::fixUTF8(Str);
+}
+
+void ensureUTF8(llvm::MutableArrayRef<std::string> Strings) {
+ for (auto &Str : Strings) {
+ ensureUTF8(Str);
+ }
+}
+
+class BlockCommentToString
+ : public comments::ConstCommentVisitor<BlockCommentToString> {
+public:
+ BlockCommentToString(std::string &Out, const ASTContext &Ctx)
+ : Out(Out), Ctx(Ctx) {}
+
+ void visitParagraphComment(const comments::ParagraphComment *C) {
+ for (const auto *Child = C->child_begin(); Child != C->child_end();
+ ++Child) {
+ visit(*Child);
+ }
+ }
+
+ void visitBlockCommandComment(const comments::BlockCommandComment *B) {
+ Out << (B->getCommandMarker() == (comments::CommandMarkerKind::CMK_At)
+ ? '@'
+ : '\\')
+ << B->getCommandName(Ctx.getCommentCommandTraits());
+
+ visit(B->getParagraph());
+ }
+
+ void visitTextComment(const comments::TextComment *C) {
+ // If this is the very first node, the paragraph has no doxygen command,
+ // so there will be a leading space -> Trim it
+ // Otherwise just trim trailing space
+ if (Out.str().empty())
+ Out << C->getText().trim();
+ else
+ Out << C->getText().rtrim();
+ }
+
+ void visitInlineCommandComment(const comments::InlineCommandComment *C) {
+ const std::string SurroundWith = [C] {
+ switch (C->getRenderKind()) {
+ case comments::InlineCommandComment::RenderKind::RenderMonospaced:
+ return "`";
+ case comments::InlineCommandComment::RenderKind::RenderBold:
+ return "**";
+ case comments::InlineCommandComment::RenderKind::RenderEmphasized:
+ return "*";
+ default:
+ return "";
+ }
+ }();
+
+ Out << " " << SurroundWith;
+ for (unsigned I = 0; I < C->getNumArgs(); ++I) {
+ Out << C->getArgText(I);
+ }
+ Out << SurroundWith;
+ }
+
+private:
+ llvm::raw_string_ostream Out;
+ const ASTContext &Ctx;
+};
+
+class CommentToSymbolDocumentation
+ : public comments::ConstCommentVisitor<CommentToSymbolDocumentation> {
+public:
+ CommentToSymbolDocumentation(const RawComment &RC, const ASTContext &Ctx,
+ const Decl *D, SymbolDocumentationOwned &Doc)
+ : FullComment(RC.parse(Ctx, nullptr, D)), Output(Doc), Ctx(Ctx) {
+
+ Doc.CommentText =
+ RC.getFormattedText(Ctx.getSourceManager(), Ctx.getDiagnostics());
+
+ for (auto *Block : FullComment->getBlocks()) {
+ visit(Block);
+ }
+ }
+
+ void visitBlockCommandComment(const comments::BlockCommandComment *B) {
+ const llvm::StringRef CommandName =
+ B->getCommandName(Ctx.getCommentCommandTraits());
+
+ // Visit B->getParagraph() for commands that we have special fields for,
+ // so that the command name won't be included in the string.
+ // Otherwise, we want to keep the command name, so visit B itself.
+ if (CommandName == "brief") {
+ BlockCommentToString(Output.Brief, Ctx).visit(B->getParagraph());
+ } else if (CommandName == "return") {
+ BlockCommentToString(Output.Returns, Ctx).visit(B->getParagraph());
+ } else if (CommandName == "warning") {
+ BlockCommentToString(Output.Warnings.emplace_back(), Ctx)
+ .visit(B->getParagraph());
+ } else if (CommandName == "note") {
+ BlockCommentToString(Output.Notes.emplace_back(), Ctx)
+ .visit(B->getParagraph());
+ } else {
+ if (!Output.Description.empty())
+ Output.Description += "\n\n";
+
+ BlockCommentToString(Output.Description, Ctx).visit(B);
+ }
+ }
+
+ void visitParagraphComment(const comments::ParagraphComment *P) {
+ BlockCommentToString(Output.Description, Ctx).visit(P);
+ }
+
+ void visitParamCommandComment(const comments::ParamCommandComment *P) {
+ if (P->hasParamName() && P->hasNonWhitespaceParagraph()) {
+ ParameterDocumentationOwned Doc;
+ Doc.Name = P->getParamNameAsWritten().str();
+ BlockCommentToString(Doc.Description, Ctx).visit(P->getParagraph());
+ Output.Parameters.push_back(std::move(Doc));
+ }
+ }
+
+private:
+ comments::FullComment *FullComment;
+ SymbolDocumentationOwned &Output;
+ const ASTContext &Ctx;
+};
+
+SymbolDocumentationOwned parseDoxygenComment(const RawComment &RC,
+ const ASTContext &Ctx,
+ const Decl *D) {
+ SymbolDocumentationOwned Doc;
+ CommentToSymbolDocumentation(RC, Ctx, D, Doc);
+
+ // Clang requires source to be UTF-8, but doesn't enforce this in comments.
+ ensureUTF8(Doc.Brief);
+ ensureUTF8(Doc.Returns);
+
+ ensureUTF8(Doc.Notes);
+ ensureUTF8(Doc.Warnings);
+
+ for (auto &Param : Doc.Parameters) {
+ ensureUTF8(Param.Name);
+ ensureUTF8(Param.Description);
+ }
+
+ ensureUTF8(Doc.Description);
+ ensureUTF8(Doc.CommentText);
+
+ return Doc;
+}
+
+template struct ParameterDocumentation<std::string>;
+template struct ParameterDocumentation<llvm::StringRef>;
+
+template <class StrOut, class StrIn>
+SymbolDocumentation<StrOut> convert(const SymbolDocumentation<StrIn> &In) {
+ SymbolDocumentation<StrOut> Doc;
+
+ Doc.Brief = In.Brief;
+ Doc.Returns = In.Returns;
+
+ Doc.Notes.reserve(In.Notes.size());
+ for (const auto &Note : In.Notes) {
+ Doc.Notes.emplace_back(Note);
+ }
+
+ Doc.Warnings.reserve(In.Warnings.size());
+ for (const auto &Warning : In.Warnings) {
+ Doc.Warnings.emplace_back(Warning);
+ }
+
+ Doc.Parameters.reserve(In.Parameters.size());
+ for (const auto &ParamDoc : In.Parameters) {
+ Doc.Parameters.emplace_back(ParameterDocumentation<StrOut>{
+ StrOut(ParamDoc.Name), StrOut(ParamDoc.Description)});
+ }
+
+ Doc.Description = In.Description;
+ Doc.CommentText = In.CommentText;
+
+ return Doc;
+}
+
+template <> SymbolDocumentationRef SymbolDocumentationOwned::toRef() const {
+ return convert<llvm::StringRef>(*this);
+}
+
+template <> SymbolDocumentationOwned SymbolDocumentationRef::toOwned() const {
+ return convert<std::string>(*this);
+}
+
+template class SymbolDocumentation<std::string>;
+template class SymbolDocumentation<llvm::StringRef>;
+
+} // namespace clangd
+} // namespace clang
Index: clang-tools-extra/clangd/CMakeLists.txt
===================================================================
--- clang-tools-extra/clangd/CMakeLists.txt
+++ clang-tools-extra/clangd/CMakeLists.txt
@@ -97,6 +97,7 @@
SemanticHighlighting.cpp
SemanticSelection.cpp
SourceCode.cpp
+ SymbolDocumentation.cpp
QueryDriverDatabase.cpp
TidyProvider.cpp
TUScheduler.cpp
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits