tom-anders created this revision.
Herald added subscribers: kadircet, arphaman.
Herald added a project: All.
tom-anders added reviewers: nridge, sammccall, kadircet.
tom-anders published this revision for review.
Herald added subscribers: cfe-commits, llvm-commits, MaskRay, ilya-biryukov.
Herald added projects: LLVM, clang-tools-extra.

1/3: Add SymbolDocumentation class to parse Doxygen comments

This commit just adds and tests the a new class for doxygen parsing.
Consumption in Hover and Index will be added in a follow-up commit.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D134130

Files:
  clang-tools-extra/clangd/CMakeLists.txt
  clang-tools-extra/clangd/SymbolDocumentation.cpp
  clang-tools-extra/clangd/SymbolDocumentation.h
  clang-tools-extra/clangd/unittests/CodeCompletionStringsTests.cpp
  clang-tools-extra/clangd/unittests/SymbolDocumentationMatchers.h
  llvm/utils/gn/secondary/clang-tools-extra/clangd/BUILD.gn

Index: llvm/utils/gn/secondary/clang-tools-extra/clangd/BUILD.gn
===================================================================
--- llvm/utils/gn/secondary/clang-tools-extra/clangd/BUILD.gn
+++ llvm/utils/gn/secondary/clang-tools-extra/clangd/BUILD.gn
@@ -117,6 +117,7 @@
     "SemanticHighlighting.cpp",
     "SemanticSelection.cpp",
     "SourceCode.cpp",
+    "SymbolDocumentation.cpp"
     "TUScheduler.cpp",
     "TidyProvider.cpp",
     "URI.cpp",
Index: clang-tools-extra/clangd/unittests/SymbolDocumentationMatchers.h
===================================================================
--- /dev/null
+++ clang-tools-extra/clangd/unittests/SymbolDocumentationMatchers.h
@@ -0,0 +1,51 @@
+//===-- SymbolDocumentationMatchers.h ---------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// GMock matchers for the SymbolDocumentation class
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_SYMBOLDOCUMENTATION_MATCHERS_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_SYMBOLDOCUMENTATION_MATCHERS_H
+#include "SymbolDocumentation.h"
+#include "gmock/gmock.h"
+
+namespace clang {
+namespace clangd {
+
+template <class S>
+testing::Matcher<SymbolDocumentation<S>>
+matchesDoc(const SymbolDocumentation<S> &Expected) {
+  using namespace ::testing;
+
+  std::vector<Matcher<ParameterDocumentation<S>>> ParamMatchers;
+  for (const auto &P : Expected.Parameters)
+    ParamMatchers.push_back(
+        AllOf(Field("Name", &ParameterDocumentation<S>::Name, P.Name),
+              Field("Description", &ParameterDocumentation<S>::Description,
+                    P.Description)));
+
+  return AllOf(
+      Field("Brief", &SymbolDocumentation<S>::Brief, Expected.Brief),
+      Field("Returns", &SymbolDocumentation<S>::Returns, Expected.Returns),
+      Field("Notes", &SymbolDocumentation<S>::Notes,
+            ElementsAreArray(Expected.Notes)),
+      Field("Warnings", &SymbolDocumentation<S>::Warnings,
+            ElementsAreArray(Expected.Warnings)),
+      Field("Parameters", &SymbolDocumentation<S>::Parameters,
+            ElementsAreArray(ParamMatchers)),
+      Field("Description", &SymbolDocumentation<S>::Description,
+            Expected.Description),
+      Field("CommentText", &SymbolDocumentation<S>::CommentText,
+            Expected.CommentText));
+}
+
+} // namespace clangd
+} // namespace clang
+
+#endif
Index: clang-tools-extra/clangd/unittests/CodeCompletionStringsTests.cpp
===================================================================
--- clang-tools-extra/clangd/unittests/CodeCompletionStringsTests.cpp
+++ clang-tools-extra/clangd/unittests/CodeCompletionStringsTests.cpp
@@ -7,6 +7,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "CodeCompletionStrings.h"
+#include "SymbolDocumentationMatchers.h"
 #include "TestTU.h"
 #include "clang/Sema/CodeCompleteConsumer.h"
 #include "gmock/gmock.h"
@@ -65,6 +66,90 @@
             getDeclComment(AST.getASTContext(), findDecl(AST, "X")));
 }
 
+TEST_F(CompletionStringTest, DoxygenParsing) {
+  struct {
+    const char *const Code;
+    const std::function<void(SymbolDocumentationOwned &)> ExpectedBuilder;
+  } Cases[] = {
+      {R"cpp(
+    // Hello world
+    void foo();
+    )cpp",
+       [](SymbolDocumentationOwned &Doc) { Doc.Description = "Hello world"; }},
+      {R"cpp(
+    /*! 
+     * \brief brief
+     * \details details
+     */
+    void foo();
+    )cpp",
+       [](SymbolDocumentationOwned &Doc) {
+         Doc.Brief = "brief";
+         Doc.Description = "\\details details";
+       }},
+      {R"cpp(
+    /** 
+     * @brief brief
+     * @details details
+     * @see somewhere else
+     */
+    void foo();
+    )cpp",
+       [](SymbolDocumentationOwned &Doc) {
+         Doc.Brief = "brief";
+         Doc.Description = "@details details\n\n@see somewhere else";
+       }},
+      {R"cpp(
+    /*! 
+     * @brief brief
+     * @details details
+     * @param foo foodoc
+     * @throws ball at hoop
+     * @note note1
+     * @warning warning1
+     * @note note2
+     * @warning warning2
+     * @param bar bardoc
+     * @return something
+     */
+    void foo();
+    )cpp",
+       [](SymbolDocumentationOwned &Doc) {
+         Doc.Brief = "brief";
+         Doc.Description = "@details details\n\n@throws ball at hoop";
+         Doc.Parameters = {{"foo", "foodoc"}, {"bar", "bardoc"}};
+         Doc.Warnings = {"warning1", "warning2"};
+         Doc.Notes = {"note1", "note2"};
+         Doc.Returns = "something";
+       }},
+      {R"cpp(
+    /// @brief Here's \b bold \e italic and \p code
+    int foo;
+    )cpp",
+       [](SymbolDocumentationOwned &Doc) {
+         Doc.Brief = "Here's **bold** *italic* and `code`";
+       }}};
+
+  for (const auto &Case : Cases) {
+    SCOPED_TRACE(Case.Code);
+
+    auto TU = TestTU::withCode(Case.Code);
+    auto AST = TU.build();
+    auto &Ctx = AST.getASTContext();
+    const auto &Decl = findDecl(AST, "foo");
+
+    SymbolDocumentationOwned ExpectedDoc;
+    ExpectedDoc.CommentText =
+        getCompletionComment(Ctx, &Decl)
+            ->getFormattedText(Ctx.getSourceManager(), Ctx.getDiagnostics());
+    Case.ExpectedBuilder(ExpectedDoc);
+
+    const RawComment *RC = getCompletionComment(Ctx, &Decl);
+    EXPECT_THAT(RC, testing::NotNull());
+    EXPECT_THAT(parseDoxygenComment(*RC, Ctx, &Decl), matchesDoc(ExpectedDoc));
+  }
+}
+
 TEST_F(CompletionStringTest, MultipleAnnotations) {
   Builder.AddAnnotation("Ano1");
   Builder.AddAnnotation("Ano2");
Index: clang-tools-extra/clangd/SymbolDocumentation.h
===================================================================
--- /dev/null
+++ clang-tools-extra/clangd/SymbolDocumentation.h
@@ -0,0 +1,101 @@
+//===--- SymbolDocumentation.h ==---------------------------------*- C++-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Class to parse doxygen comments into a flat structure for consumption
+// in e.g. Hover and Code Completion
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_SYMBOLDOCUMENTATION_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_SYMBOLDOCUMENTATION_H
+
+#include "clang/AST/ASTContext.h"
+#include "clang/AST/Comment.h"
+#include "clang/AST/CommentVisitor.h"
+
+namespace clang {
+namespace clangd {
+
+template <class String> struct ParameterDocumentation {
+  String Name;
+  String Description;
+
+  ParameterDocumentation<llvm::StringRef> toRef() const;
+  ParameterDocumentation<std::string> toOwned() const;
+};
+
+using ParameterDocumentationRef = ParameterDocumentation<llvm::StringRef>;
+using ParameterDocumentationOwned = ParameterDocumentation<std::string>;
+
+/// @brief Represents a parsed doxygen comment.
+/// @details Currently there's special handling for the "brief", "param"
+/// "returns", "note" and "warning" commands. The content of all other
+/// paragraphs will be appended to the #Description field.
+/// If you're only interested in the full comment, but with comment
+/// markers stripped, use the #CommentText field.
+/// \tparam String When built from a declaration, we're building the strings
+/// by ourselves, so in this case String==std::string.
+/// However, when storing the contents of this class in the index, we need to
+/// use llvm::StringRef. To connvert between std::string and llvm::StringRef
+/// versions of this class, use toRef() and toOwned().
+template <class String> class SymbolDocumentation {
+public:
+  friend class CommentToSymbolDocumentation;
+
+  static SymbolDocumentation<String> descriptionOnly(String &&Description) {
+    SymbolDocumentation<String> Doc;
+    Doc.Description = Description;
+    Doc.CommentText = Description;
+    return Doc;
+  }
+
+  /// Constructs with all fields as empty strings/vectors.
+  SymbolDocumentation() = default;
+
+  SymbolDocumentation<llvm::StringRef> toRef() const;
+  SymbolDocumentation<std::string> toOwned() const;
+
+  bool empty() const { return CommentText.empty(); }
+
+  /// Paragraph of the "brief" command.
+  String Brief;
+
+  /// Paragraph of the "return" command.
+  String Returns;
+
+  /// Paragraph(s) of the "note" command(s)
+  llvm::SmallVector<String, 1> Notes;
+  /// Paragraph(s) of the "warning" command(s)
+  llvm::SmallVector<String, 1> Warnings;
+
+  /// Parsed paragaph(s) of the "param" comamnd(s)
+  llvm::SmallVector<ParameterDocumentation<String>> Parameters;
+
+  /// All the paragraphs we don't have any special handling for,
+  /// e.g. "details".
+  String Description;
+
+  /// The full documentation comment with comment markers stripped.
+  /// See clang::RawComment::getFormattedText() for the detailed
+  /// explanation of how the comment text is transformed.
+  String CommentText;
+};
+
+using SymbolDocumentationOwned = SymbolDocumentation<std::string>;
+using SymbolDocumentationRef = SymbolDocumentation<llvm::StringRef>;
+
+/// @param RC the comment to parse
+/// @param D the declaration that \p RC belongs to
+/// @return parsed doxgen documentation.
+SymbolDocumentationOwned
+parseDoxygenComment(const RawComment &RC, const ASTContext &Ctx, const Decl *D);
+
+} // namespace clangd
+} // namespace clang
+
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_SYMBOLDOCUMENTATION_H
Index: clang-tools-extra/clangd/SymbolDocumentation.cpp
===================================================================
--- /dev/null
+++ clang-tools-extra/clangd/SymbolDocumentation.cpp
@@ -0,0 +1,212 @@
+//===--- SymbolDocumentation.cpp ==-------------------------------*- C++-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "SymbolDocumentation.h"
+#include "clang/AST/CommentVisitor.h"
+#include "llvm/Support/JSON.h"
+
+namespace clang {
+namespace clangd {
+
+void ensureUTF8(std::string &Str) {
+  if (!llvm::json::isUTF8(Str))
+    Str = llvm::json::fixUTF8(Str);
+}
+
+void ensureUTF8(llvm::MutableArrayRef<std::string> Strings) {
+  for (auto &Str : Strings) {
+    ensureUTF8(Str);
+  }
+}
+
+class BlockCommentToString
+    : public comments::ConstCommentVisitor<BlockCommentToString> {
+public:
+  BlockCommentToString(std::string &Out, const ASTContext &Ctx)
+      : Out(Out), Ctx(Ctx) {}
+
+  void visitParagraphComment(const comments::ParagraphComment *C) {
+    for (const auto *Child = C->child_begin(); Child != C->child_end();
+         ++Child) {
+      visit(*Child);
+    }
+  }
+
+  void visitBlockCommandComment(const comments::BlockCommandComment *B) {
+    Out << (B->getCommandMarker() == (comments::CommandMarkerKind::CMK_At)
+                ? '@'
+                : '\\')
+        << B->getCommandName(Ctx.getCommentCommandTraits());
+
+    visit(B->getParagraph());
+  }
+
+  void visitTextComment(const comments::TextComment *C) {
+    // If this is the very first node, the paragraph has no doxygen command,
+    // so there will be a leading space -> Trim it
+    // Otherwise just trim trailing space
+    if (Out.str().empty())
+      Out << C->getText().trim();
+    else
+      Out << C->getText().rtrim();
+  }
+
+  void visitInlineCommandComment(const comments::InlineCommandComment *C) {
+    const std::string SurroundWith = [C] {
+      switch (C->getRenderKind()) {
+      case comments::InlineCommandComment::RenderKind::RenderMonospaced:
+        return "`";
+      case comments::InlineCommandComment::RenderKind::RenderBold:
+        return "**";
+      case comments::InlineCommandComment::RenderKind::RenderEmphasized:
+        return "*";
+      default:
+        return "";
+      }
+    }();
+
+    Out << " " << SurroundWith;
+    for (unsigned I = 0; I < C->getNumArgs(); ++I) {
+      Out << C->getArgText(I);
+    }
+    Out << SurroundWith;
+  }
+
+private:
+  llvm::raw_string_ostream Out;
+  const ASTContext &Ctx;
+};
+
+class CommentToSymbolDocumentation
+    : public comments::ConstCommentVisitor<CommentToSymbolDocumentation> {
+public:
+  CommentToSymbolDocumentation(const RawComment &RC, const ASTContext &Ctx,
+                               const Decl *D, SymbolDocumentationOwned &Doc)
+      : FullComment(RC.parse(Ctx, nullptr, D)), Output(Doc), Ctx(Ctx) {
+
+    Doc.CommentText =
+        RC.getFormattedText(Ctx.getSourceManager(), Ctx.getDiagnostics());
+
+    for (auto *Block : FullComment->getBlocks()) {
+      visit(Block);
+    }
+  }
+
+  void visitBlockCommandComment(const comments::BlockCommandComment *B) {
+    const llvm::StringRef CommandName =
+        B->getCommandName(Ctx.getCommentCommandTraits());
+
+    // Visit B->getParagraph() for commands that we have special fields for,
+    // so that the command name won't be included in the string.
+    // Otherwise, we want to keep the command name, so visit B itself.
+    if (CommandName == "brief") {
+      BlockCommentToString(Output.Brief, Ctx).visit(B->getParagraph());
+    } else if (CommandName == "return") {
+      BlockCommentToString(Output.Returns, Ctx).visit(B->getParagraph());
+    } else if (CommandName == "warning") {
+      BlockCommentToString(Output.Warnings.emplace_back(), Ctx)
+          .visit(B->getParagraph());
+    } else if (CommandName == "note") {
+      BlockCommentToString(Output.Notes.emplace_back(), Ctx)
+          .visit(B->getParagraph());
+    } else {
+      if (!Output.Description.empty())
+        Output.Description += "\n\n";
+
+      BlockCommentToString(Output.Description, Ctx).visit(B);
+    }
+  }
+
+  void visitParagraphComment(const comments::ParagraphComment *P) {
+    BlockCommentToString(Output.Description, Ctx).visit(P);
+  }
+
+  void visitParamCommandComment(const comments::ParamCommandComment *P) {
+    if (P->hasParamName() && P->hasNonWhitespaceParagraph()) {
+      ParameterDocumentationOwned Doc;
+      Doc.Name = P->getParamNameAsWritten().str();
+      BlockCommentToString(Doc.Description, Ctx).visit(P->getParagraph());
+      Output.Parameters.push_back(std::move(Doc));
+    }
+  }
+
+private:
+  comments::FullComment *FullComment;
+  SymbolDocumentationOwned &Output;
+  const ASTContext &Ctx;
+};
+
+SymbolDocumentationOwned parseDoxygenComment(const RawComment &RC,
+                                             const ASTContext &Ctx,
+                                             const Decl *D) {
+  SymbolDocumentationOwned Doc;
+  CommentToSymbolDocumentation(RC, Ctx, D, Doc);
+
+  // Clang requires source to be UTF-8, but doesn't enforce this in comments.
+  ensureUTF8(Doc.Brief);
+  ensureUTF8(Doc.Returns);
+
+  ensureUTF8(Doc.Notes);
+  ensureUTF8(Doc.Warnings);
+
+  for (auto &Param : Doc.Parameters) {
+    ensureUTF8(Param.Name);
+    ensureUTF8(Param.Description);
+  }
+
+  ensureUTF8(Doc.Description);
+  ensureUTF8(Doc.CommentText);
+
+  return Doc;
+}
+
+template struct ParameterDocumentation<std::string>;
+template struct ParameterDocumentation<llvm::StringRef>;
+
+template <class StrOut, class StrIn>
+SymbolDocumentation<StrOut> convert(const SymbolDocumentation<StrIn> &In) {
+  SymbolDocumentation<StrOut> Doc;
+
+  Doc.Brief = In.Brief;
+  Doc.Returns = In.Returns;
+
+  Doc.Notes.reserve(In.Notes.size());
+  for (const auto &Note : In.Notes) {
+    Doc.Notes.emplace_back(Note);
+  }
+
+  Doc.Warnings.reserve(In.Warnings.size());
+  for (const auto &Warning : In.Warnings) {
+    Doc.Warnings.emplace_back(Warning);
+  }
+
+  Doc.Parameters.reserve(In.Parameters.size());
+  for (const auto &ParamDoc : In.Parameters) {
+    Doc.Parameters.emplace_back(ParameterDocumentation<StrOut>{
+        StrOut(ParamDoc.Name), StrOut(ParamDoc.Description)});
+  }
+
+  Doc.Description = In.Description;
+  Doc.CommentText = In.CommentText;
+
+  return Doc;
+}
+
+template <> SymbolDocumentationRef SymbolDocumentationOwned::toRef() const {
+  return convert<llvm::StringRef>(*this);
+}
+
+template <> SymbolDocumentationOwned SymbolDocumentationRef::toOwned() const {
+  return convert<std::string>(*this);
+}
+
+template class SymbolDocumentation<std::string>;
+template class SymbolDocumentation<llvm::StringRef>;
+
+} // namespace clangd
+} // namespace clang
Index: clang-tools-extra/clangd/CMakeLists.txt
===================================================================
--- clang-tools-extra/clangd/CMakeLists.txt
+++ clang-tools-extra/clangd/CMakeLists.txt
@@ -97,6 +97,7 @@
   SemanticHighlighting.cpp
   SemanticSelection.cpp
   SourceCode.cpp
+  SymbolDocumentation.cpp
   QueryDriverDatabase.cpp
   TidyProvider.cpp
   TUScheduler.cpp
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to