hokein created this revision.
hokein added reviewers: ilya-biryukov, ioeric.
Herald added subscribers: arphaman, mgrang, jkorous, MaskRay, mgorny.

This patch implements a SymbolOccurenceCollector, which will be used to:

- Find all occurrences in AST
- Find all occurrences in MemIndex


Repository:
  rCTE Clang Tools Extra

https://reviews.llvm.org/D50385

Files:
  clangd/CMakeLists.txt
  clangd/index/Index.cpp
  clangd/index/Index.h
  clangd/index/SymbolOccurrenceCollector.cpp
  clangd/index/SymbolOccurrenceCollector.h
  unittests/clangd/CMakeLists.txt
  unittests/clangd/SymbolOccurrenceCollectorTests.cpp

Index: unittests/clangd/SymbolOccurrenceCollectorTests.cpp
===================================================================
--- /dev/null
+++ unittests/clangd/SymbolOccurrenceCollectorTests.cpp
@@ -0,0 +1,168 @@
+//===-- SymbolOccurrenceCollectorTests.cpp  ---------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "index/SymbolOccurrenceCollector.h"
+#include "Annotations.h"
+#include "TestFS.h"
+#include "TestTU.h"
+#include "clang/Index/IndexingAction.h"
+#include "clang/Basic/FileManager.h"
+#include "clang/Basic/FileSystemOptions.h"
+#include "clang/Basic/VirtualFileSystem.h"
+#include "clang/Frontend/CompilerInstance.h"
+#include "clang/Index/IndexingAction.h"
+#include "clang/Tooling/Tooling.h"
+#include "llvm/ADT/IntrusiveRefCntPtr.h"
+#include "llvm/ADT/StringRef.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+
+#include <memory>
+#include <string>
+
+MATCHER(OccurrenceRange, "") {
+  const clang::clangd::SymbolOccurrence& Pos = testing::get<0>(arg);
+  const clang::clangd::Range& Range = testing::get<1>(arg);
+  return std::tie(Pos.Location.Start.Line,
+                  Pos.Location.Start.Column,
+                  Pos.Location.End.Line,
+                  Pos.Location.End.Column) ==
+         std::tie(Range.start.line, Range.start.character, Range.end.line,
+                  Range.end.character);
+}
+
+namespace clang {
+namespace clangd {
+namespace {
+
+class SymbolIndexActionFactory : public tooling::FrontendActionFactory {
+public:
+  SymbolIndexActionFactory() = default;
+
+  clang::FrontendAction *create() override {
+    index::IndexingOptions IndexOpts;
+    IndexOpts.SystemSymbolFilter =
+        index::IndexingOptions::SystemSymbolFilterKind::All;
+    IndexOpts.IndexFunctionLocals = true;
+    SymbolOccurrenceKind Filter = SymbolOccurrenceKind::Declaration |
+                                  SymbolOccurrenceKind::Definition |
+                                  SymbolOccurrenceKind::Reference;
+    SymbolOccurrenceCollector::Options Opts;
+    Opts.Filter = Filter;
+    Opts.IDs = llvm::None;
+    Collector = std::make_shared<SymbolOccurrenceCollector>(Opts);
+    return index::createIndexingAction(Collector, IndexOpts, nullptr).release();
+  }
+
+  std::shared_ptr<SymbolOccurrenceCollector> Collector;
+};
+
+class OccurrenceCollectorTest : public ::testing::Test {
+public:
+  OccurrenceCollectorTest()
+      : InMemoryFileSystem(new vfs::InMemoryFileSystem),
+        TestHeaderName(testPath("symbol.h")),
+        TestFileName(testPath("symbol.cc")) {
+    TestHeaderURI = URI::createFile(TestHeaderName).toString();
+    TestFileURI = URI::createFile(TestFileName).toString();
+  }
+
+  bool collectOccurrences(StringRef HeaderCode, StringRef MainCode,
+                          const std::vector<std::string> &ExtraArgs = {}) {
+    llvm::IntrusiveRefCntPtr<FileManager> Files(
+        new FileManager(FileSystemOptions(), InMemoryFileSystem));
+
+    auto Factory = llvm::make_unique<SymbolIndexActionFactory>();
+
+    std::vector<std::string> Args = {
+        "symbol_occurrence_collector", "-fsyntax-only", "-xc++",
+        "-std=c++11",       "-include",      TestHeaderName};
+    Args.insert(Args.end(), ExtraArgs.begin(), ExtraArgs.end());
+    // This allows to override the "-xc++" with something else, i.e.
+    // -xobjective-c++.
+    Args.push_back(TestFileName);
+
+    tooling::ToolInvocation Invocation(
+        Args,
+        Factory->create(), Files.get(),
+        std::make_shared<PCHContainerOperations>());
+
+    InMemoryFileSystem->addFile(TestHeaderName, 0,
+                                llvm::MemoryBuffer::getMemBuffer(HeaderCode));
+    InMemoryFileSystem->addFile(TestFileName, 0,
+                                llvm::MemoryBuffer::getMemBuffer(MainCode));
+    Invocation.run();
+    Occurrences = Factory->Collector->takeOccurrences();
+    return true;
+  }
+
+protected:
+  llvm::IntrusiveRefCntPtr<vfs::InMemoryFileSystem> InMemoryFileSystem;
+  std::string TestHeaderName;
+  std::string TestHeaderURI;
+  std::string TestFileName;
+  std::string TestFileURI;
+  std::unique_ptr<SymbolOccurrenceSlab> Occurrences;
+};
+
+std::vector<Range> operator+(const std::vector<Range> &L,
+                             const std::vector<Range> &R) {
+  std::vector<Range> Result = L;
+  Result.insert(Result.end(), R.begin(), R.end());
+  return Result;
+}
+
+TEST_F(OccurrenceCollectorTest, Reference) {
+  Annotations Header(R"(
+  class $foo[[Foo]] {
+  public:
+    $foo[[Foo]]() {}
+    $foo[[Foo]](int);
+  };
+  class $bar[[Bar]];
+  void $func[[func]]();
+  )");
+  Annotations Main(R"(
+  class $bar[[Bar]] {};
+
+  void $func[[func]]();
+
+  void fff() {
+    $foo[[Foo]] foo;
+    $bar[[Bar]] bar;
+    $func[[func]]();
+    int abc = 0;
+    $foo[[Foo]] foo2 = abc;
+  }
+  )");
+  collectOccurrences(Header.code(), Main.code());
+  auto H = TestTU::withHeaderCode(Header.code());
+  auto Symbols = H.headerSymbols();
+  auto Foo = findSymbol(Symbols, "Foo");
+  auto Bar = findSymbol(Symbols, "Bar");
+  auto Func = findSymbol(Symbols, "func");
+
+  EXPECT_THAT(
+      Occurrences->find(Foo.ID),
+      testing::UnorderedPointwise(OccurrenceRange(),
+                                  Header.ranges("foo") + Main.ranges("foo")));
+  EXPECT_THAT(
+      Occurrences->find(Bar.ID),
+      testing::UnorderedPointwise(OccurrenceRange(),
+                                  Header.ranges("bar") + Main.ranges("bar")));
+
+  EXPECT_THAT(
+      Occurrences->find(Func.ID),
+      testing::UnorderedPointwise(OccurrenceRange(),
+                                  Header.ranges("func") + Main.ranges("func")));
+}
+
+} // namespace
+} // namespace clangd
+} // namespace clang
Index: unittests/clangd/CMakeLists.txt
===================================================================
--- unittests/clangd/CMakeLists.txt
+++ unittests/clangd/CMakeLists.txt
@@ -27,6 +27,7 @@
   QualityTests.cpp
   SourceCodeTests.cpp
   SymbolCollectorTests.cpp
+  SymbolOccurrenceCollectorTests.cpp
   SyncAPI.cpp
   TUSchedulerTests.cpp
   TestFS.cpp
Index: clangd/index/SymbolOccurrenceCollector.h
===================================================================
--- /dev/null
+++ clangd/index/SymbolOccurrenceCollector.h
@@ -0,0 +1,55 @@
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_SYMBOL_OCCURRENCE_COLLECTOR_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_SYMBOL_OCCURRENCE_COLLECTOR_H
+
+#include "Index.h"
+#include "llvm/ADT/DenseSet.h"
+#include "clang/AST/ASTContext.h"
+#include "clang/AST/Decl.h"
+#include "clang/Index/IndexDataConsumer.h"
+#include "clang/Index/IndexSymbol.h"
+
+namespace clang {
+namespace clangd {
+
+// A collector collects all symbol occurrences from the AST.
+class SymbolOccurrenceCollector: public index::IndexDataConsumer {
+public:
+  struct Options {
+    // The symbol occurrence kind that will be collected.
+    SymbolOccurrenceKind Filter;
+    // A whitelist symbols which will be collected.
+    // If none, all symbol occurrences will be collected.
+    llvm::Optional<llvm::DenseSet<SymbolID>> IDs = llvm::None;
+  };
+
+  SymbolOccurrenceCollector(Options Opts)
+      : Opts(Opts), Occurrences(new SymbolOccurrenceSlab()) {}
+
+  void initialize(ASTContext &Ctx) override {
+    ASTCtx = &Ctx;
+  }
+
+  bool
+  handleDeclOccurence(const Decl *D, index::SymbolRoleSet Roles,
+                      ArrayRef<index::SymbolRelation> Relations,
+                      SourceLocation Loc,
+                      index::IndexDataConsumer::ASTNodeInfo ASTNode) override;
+
+  void finish() override {
+    Occurrences->freeze();
+  }
+
+  std::unique_ptr<SymbolOccurrenceSlab> takeOccurrences() {
+    return std::move(Occurrences);
+  }
+
+private:
+  ASTContext *ASTCtx;
+  Options Opts;
+  std::unique_ptr<SymbolOccurrenceSlab> Occurrences;
+};
+
+} // namespace clangd
+} // namespace clang
+
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_SYMBOL_OCCURRENCE_COLLECTOR_H
Index: clangd/index/SymbolOccurrenceCollector.cpp
===================================================================
--- /dev/null
+++ clangd/index/SymbolOccurrenceCollector.cpp
@@ -0,0 +1,89 @@
+//===--- SymbolOccurrenceCollector.cpp ---------------------------*- C++-*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SymbolOccurrenceCollector.h"
+#include "clang/Index/USRGeneration.h"
+#include "../AST.h"
+#include "../SourceCode.h"
+#include "llvm/ADT/STLExtras.h"
+
+namespace clang {
+namespace clangd {
+namespace {
+llvm::Optional<SymbolLocation>
+getTokenLocation(SourceLocation TokLoc, const ASTContext* ASTCtx,
+                 std::string &FileURIStorage) {
+  const auto& SM = ASTCtx->getSourceManager();
+  auto TokenLength = clang::Lexer::MeasureTokenLength(TokLoc, SM,
+                                                      ASTCtx->getLangOpts());
+
+  auto CreatePosition = [&SM](SourceLocation Loc) {
+    auto LSPLoc = sourceLocToPosition(SM, Loc);
+    SymbolLocation::Position Pos;
+    Pos.Line = LSPLoc.line;
+    Pos.Column = LSPLoc.character;
+    return Pos;
+  };
+
+  SymbolLocation Result;
+  Result.Start = CreatePosition(TokLoc);
+  auto EndLoc = TokLoc.getLocWithOffset(TokenLength);
+  Result.End = CreatePosition(EndLoc);
+
+  const auto* F = SM.getFileEntryForID(SM.getFileID(TokLoc));
+  if (!F)
+    return llvm::None;
+  auto FilePath = getAbsoluteFilePath(F, SM);
+  if (!FilePath)
+    return llvm::None;
+  // FIXME: support custom URIs.
+  FileURIStorage = URI::createFile(*FilePath).toString();
+  Result.FileURI = FileURIStorage;
+  return Result;
+}
+
+SymbolOccurrenceKind ToOccurrenceKind(index::SymbolRoleSet Roles) {
+  SymbolOccurrenceKind Kind;
+  for (auto Mask : {SymbolOccurrenceKind::Declaration,
+                    SymbolOccurrenceKind::Definition,
+                    SymbolOccurrenceKind::Reference}) {
+    if (Roles & static_cast<unsigned>(Mask))
+      Kind |= Mask;
+  }
+  return Kind;
+}
+} // namespace
+
+bool SymbolOccurrenceCollector::handleDeclOccurence(
+    const Decl *D, index::SymbolRoleSet Roles,
+    ArrayRef<index::SymbolRelation> Relations, SourceLocation Loc,
+    index::IndexDataConsumer::ASTNodeInfo ASTNode) {
+  if (D->isImplicit())
+    return true;
+
+  std::string FileURI;
+  auto AddOccurrence = [&](SourceLocation L, const SymbolID& ID) {
+    if (auto Location = getTokenLocation(Loc, ASTCtx, FileURI)) {
+      SymbolOccurrence Occurrence;
+      Occurrence.Location = *Location;
+      Occurrence.Kind = ToOccurrenceKind(Roles);
+      Occurrences->insert(ID, Occurrence);
+    }
+  };
+  if (static_cast<unsigned>(Opts.Filter) & Roles) {
+    if (auto ID = getSymbolID(D)) {
+      if (!Opts.IDs || llvm::is_contained(*Opts.IDs, *ID))
+        AddOccurrence(Loc, *ID);
+    }
+  }
+  return true;
+}
+
+} // namespace clangd
+} // namespace clang
Index: clangd/index/Index.h
===================================================================
--- clangd/index/Index.h
+++ clangd/index/Index.h
@@ -17,6 +17,7 @@
 #include "llvm/ADT/Hashing.h"
 #include "llvm/ADT/Optional.h"
 #include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/StringSaver.h"
 #include <array>
 #include <string>
 
@@ -30,9 +31,6 @@
     uint32_t Line = 0; // 0-based
     // Using UTF-16 code units.
     uint32_t Column = 0; // 0-based
-    bool operator==(const Position& P) const {
-      return Line == P.Line && Column == P.Column;
-    }
   };
 
   // The URI of the source file where a symbol occurs.
@@ -43,11 +41,25 @@
   Position End;
 
   explicit operator bool() const { return !FileURI.empty(); }
-  bool operator==(const SymbolLocation& Loc) const {
-    return std::tie(FileURI, Start, End) ==
-           std::tie(Loc.FileURI, Loc.Start, Loc.End);
-  }
 };
+inline bool operator==(const SymbolLocation::Position &L,
+                       const SymbolLocation::Position &R){
+  return std::tie(L.Line, L.Column) == std::tie(R.Line, R.Column);
+}
+inline bool operator<(const SymbolLocation::Position &L,
+                      const SymbolLocation::Position &R){
+  return std::tie(L.Line, L.Column) < std::tie(R.Line, R.Column);
+}
+inline bool operator==(const SymbolLocation&L,
+                       const SymbolLocation&R){
+  return std::tie(L.FileURI, L.Start, L.End) ==
+         std::tie(R.FileURI, R.Start, R.End);
+}
+inline bool operator<(const SymbolLocation&L,
+                      const SymbolLocation&R){
+  return std::tie(L.FileURI, L.Start, L.End) <
+         std::tie(R.FileURI, R.Start, R.End);
+}
 llvm::raw_ostream &operator<<(llvm::raw_ostream &, const SymbolLocation &);
 
 // The class identifies a particular C++ symbol (class, function, method, etc).
@@ -310,6 +322,7 @@
   return static_cast<SymbolOccurrenceKind>(static_cast<uint8_t>(A) &
                                            static_cast<uint8_t>(B));
 }
+raw_ostream &operator<<(raw_ostream &OS, SymbolOccurrenceKind K);
 
 // Represents a symbol occurrence in the source file. It could be a
 // declaration/definition/reference occurrence.
@@ -320,6 +333,47 @@
   SymbolLocation Location;
   SymbolOccurrenceKind Kind = SymbolOccurrenceKind::Unknown;
 };
+inline bool operator<(const SymbolOccurrence &L, const SymbolOccurrence &R) {
+  return std::tie(L.Location, L.Kind) < std::tie(R.Location, R.Kind);
+}
+inline bool operator==(const SymbolOccurrence &L, const SymbolOccurrence &R) {
+  return std::tie(L.Location, L.Kind) == std::tie(R.Location, R.Kind);
+}
+llvm::raw_ostream &operator<<(llvm::raw_ostream &OS,
+                              const SymbolOccurrence &Occurrence);
+
+// An efficient structure of storing large set of symbol occurrences in memory.
+// Filenames are deduplicated.
+class SymbolOccurrenceSlab {
+ public:
+   using const_iterator =
+       llvm::DenseMap<SymbolID, std::vector<SymbolOccurrence>>::const_iterator;
+   using iterator = const_iterator;
+
+   SymbolOccurrenceSlab() : UniqueStrings(Arena) {}
+
+   const_iterator begin() const { return Occurrences.begin(); }
+   const_iterator end() const { return Occurrences.end(); }
+
+   // Adds a symbol occurrence.
+   // This is a deep copy: underlying FileURI will be owned by the slab.
+   void insert(const SymbolID &SymID, const SymbolOccurrence &Occurrence);
+
+   llvm::ArrayRef<SymbolOccurrence> find(const SymbolID &ID) const {
+     auto It = Occurrences.find(ID);
+     if (It == Occurrences.end())
+       return {};
+     return It->second;
+  }
+
+  void freeze();
+
+private:
+  bool Frozen = false;
+  llvm::BumpPtrAllocator Arena;
+  llvm::UniqueStringSaver UniqueStrings;
+  llvm::DenseMap<SymbolID, std::vector<SymbolOccurrence>> Occurrences;
+};
 
 struct FuzzyFindRequest {
   /// \brief A query string for the fuzzy find. This is matched against symbols'
Index: clangd/index/Index.cpp
===================================================================
--- clangd/index/Index.cpp
+++ clangd/index/Index.cpp
@@ -134,5 +134,44 @@
   return SymbolSlab(std::move(NewArena), std::move(Symbols));
 }
 
+raw_ostream &operator<<(raw_ostream &OS, SymbolOccurrenceKind K) {
+  OS << static_cast<unsigned>(K);
+  return OS;
+}
+
+llvm::raw_ostream &operator<<(llvm::raw_ostream &OS,
+                              const SymbolOccurrence &Occurrence) {
+  OS << Occurrence.Location << ":" << Occurrence.Kind;
+  return OS;
+}
+
+void SymbolOccurrenceSlab::insert(const SymbolID &SymID,
+                                  const SymbolOccurrence &Occurrence) {
+  assert(!Frozen &&
+         "Can't insert a symbol occurrence after the slab has been frozen!");
+  auto& SymOccurrences = Occurrences[SymID];
+  SymOccurrences.push_back(Occurrence);
+  SymOccurrences.back().Location.FileURI =
+      UniqueStrings.save(Occurrence.Location.FileURI);
+}
+
+void SymbolOccurrenceSlab::freeze() {
+  // We may have duplicated symbol occurrences. Deduplicate them.
+  for (auto &IDAndOccurrence : Occurrences) {
+    auto &Occurrence = IDAndOccurrence.getSecond();
+    std::sort(Occurrence.begin(), Occurrence.end(),
+              [](const SymbolOccurrence &L, const SymbolOccurrence &R) {
+                return L < R;
+              });
+    Occurrence.erase(
+        std::unique(Occurrence.begin(), Occurrence.end(),
+                    [](const SymbolOccurrence &L, const SymbolOccurrence &R) {
+                      return L == R;
+                    }),
+        Occurrence.end());
+  }
+  Frozen = true;
+}
+
 } // namespace clangd
 } // namespace clang
Index: clangd/CMakeLists.txt
===================================================================
--- clangd/CMakeLists.txt
+++ clangd/CMakeLists.txt
@@ -41,6 +41,7 @@
   index/MemIndex.cpp
   index/Merge.cpp
   index/SymbolCollector.cpp
+  index/SymbolOccurrenceCollector.cpp
   index/SymbolYAML.cpp
 
   index/dex/Iterator.cpp
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to