kbobyrev updated this revision to Diff 159515.
kbobyrev added a comment.

Continue implementing Proof of Concept Dex-based static index replacement.

This diff adds short query processing, the current solution does not utilize 
iterators framework (unlike the general queries) yet and is a subject to 
change. As discussed offline, this implementation should lean towards 
simplicity and usability rather then premature optimization.

The patch is still not ready for a comprehensive review yet, these are few 
points which should be addressed before the review is live:

- Code duplication should be reduced as much as possible. `DexIndex` is likely 
to become way more sophisticated than `MemIndex` in the future and hence it 
does not simply inherit or reuse `MemIndex`, this is also a reason why (as 
discussed offline) code duplication in unit tests is not that bad keeping in 
mind that the functionality and implementation of both types of index will 
diverge in the future. However, it's better to abstract out as much as possible 
if the implementation does not become less flexible and cross-dependencies are 
not introduced in the process.
- Slightly cleaning up unit tests (`IndexHelpers.(h|cpp)` is not a very good 
name for the new file used by both `MemIndex` and `DexIndex` testing framework, 
code duplication is also a slight concern)


https://reviews.llvm.org/D50337

Files:
  clang-tools-extra/clangd/CMakeLists.txt
  clang-tools-extra/clangd/index/MemIndex.h
  clang-tools-extra/clangd/index/dex/DexIndex.cpp
  clang-tools-extra/clangd/index/dex/DexIndex.h
  clang-tools-extra/clangd/index/dex/Token.cpp
  clang-tools-extra/clangd/index/dex/Token.h
  clang-tools-extra/unittests/clangd/CMakeLists.txt
  clang-tools-extra/unittests/clangd/DexIndexTests.cpp
  clang-tools-extra/unittests/clangd/IndexHelpers.cpp
  clang-tools-extra/unittests/clangd/IndexHelpers.h
  clang-tools-extra/unittests/clangd/IndexTests.cpp

Index: clang-tools-extra/unittests/clangd/IndexTests.cpp
===================================================================
--- clang-tools-extra/unittests/clangd/IndexTests.cpp
+++ clang-tools-extra/unittests/clangd/IndexTests.cpp
@@ -7,33 +7,20 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "IndexHelpers.h"
 #include "index/Index.h"
 #include "index/MemIndex.h"
 #include "index/Merge.h"
 #include "gmock/gmock.h"
 #include "gtest/gtest.h"
 
-using testing::UnorderedElementsAre;
 using testing::Pointee;
+using testing::UnorderedElementsAre;
 
 namespace clang {
 namespace clangd {
 namespace {
 
-Symbol symbol(llvm::StringRef QName) {
-  Symbol Sym;
-  Sym.ID = SymbolID(QName.str());
-  size_t Pos = QName.rfind("::");
-  if (Pos == llvm::StringRef::npos) {
-    Sym.Name = QName;
-    Sym.Scope = "";
-  } else {
-    Sym.Name = QName.substr(Pos + 2);
-    Sym.Scope = QName.substr(0, Pos + 2);
-  }
-  return Sym;
-}
-
 MATCHER_P(Named, N, "") { return arg.Name == N; }
 
 TEST(SymbolSlab, FindAndIterate) {
@@ -52,59 +39,6 @@
     EXPECT_THAT(*S.find(SymbolID(Sym)), Named(Sym));
 }
 
-struct SlabAndPointers {
-  SymbolSlab Slab;
-  std::vector<const Symbol *> Pointers;
-};
-
-// Create a slab of symbols with the given qualified names as both IDs and
-// names. The life time of the slab is managed by the returned shared pointer.
-// If \p WeakSymbols is provided, it will be pointed to the managed object in
-// the returned shared pointer.
-std::shared_ptr<std::vector<const Symbol *>>
-generateSymbols(std::vector<std::string> QualifiedNames,
-                std::weak_ptr<SlabAndPointers> *WeakSymbols = nullptr) {
-  SymbolSlab::Builder Slab;
-  for (llvm::StringRef QName : QualifiedNames)
-    Slab.insert(symbol(QName));
-
-  auto Storage = std::make_shared<SlabAndPointers>();
-  Storage->Slab = std::move(Slab).build();
-  for (const auto &Sym : Storage->Slab)
-    Storage->Pointers.push_back(&Sym);
-  if (WeakSymbols)
-    *WeakSymbols = Storage;
-  auto *Pointers = &Storage->Pointers;
-  return {std::move(Storage), Pointers};
-}
-
-// Create a slab of symbols with IDs and names [Begin, End], otherwise identical
-// to the `generateSymbols` above.
-std::shared_ptr<std::vector<const Symbol *>>
-generateNumSymbols(int Begin, int End,
-                   std::weak_ptr<SlabAndPointers> *WeakSymbols = nullptr) {
-  std::vector<std::string> Names;
-  for (int i = Begin; i <= End; i++)
-    Names.push_back(std::to_string(i));
-  return generateSymbols(Names, WeakSymbols);
-}
-
-std::string getQualifiedName(const Symbol &Sym) {
-  return (Sym.Scope + Sym.Name).str();
-}
-
-std::vector<std::string> match(const SymbolIndex &I,
-                               const FuzzyFindRequest &Req,
-                               bool *Incomplete = nullptr) {
-  std::vector<std::string> Matches;
-  bool IsIncomplete = I.fuzzyFind(Req, [&](const Symbol &Sym) {
-    Matches.push_back(getQualifiedName(Sym));
-  });
-  if (Incomplete)
-    *Incomplete = IsIncomplete;
-  return Matches;
-}
-
 TEST(MemIndexTest, MemIndexSymbolsRecycled) {
   MemIndex I;
   std::weak_ptr<SlabAndPointers> Symbols;
@@ -212,18 +146,6 @@
   EXPECT_THAT(match(I, Req), UnorderedElementsAre("ns::ABC", "ns::abc"));
 }
 
-// Returns qualified names of symbols with any of IDs in the index.
-std::vector<std::string> lookup(const SymbolIndex &I,
-                                llvm::ArrayRef<SymbolID> IDs) {
-  LookupRequest Req;
-  Req.IDs.insert(IDs.begin(), IDs.end());
-  std::vector<std::string> Results;
-  I.lookup(Req, [&](const Symbol &Sym) {
-    Results.push_back(getQualifiedName(Sym));
-  });
-  return Results;
-}
-
 TEST(MemIndexTest, Lookup) {
   MemIndex I;
   I.build(generateSymbols({"ns::abc", "ns::xyz"}));
@@ -269,7 +191,7 @@
 TEST(MergeTest, Merge) {
   Symbol L, R;
   L.ID = R.ID = SymbolID("hello");
-  L.Name = R.Name = "Foo";                    // same in both
+  L.Name = R.Name = "Foo";                           // same in both
   L.CanonicalDeclaration.FileURI = "file:///left.h"; // differs
   R.CanonicalDeclaration.FileURI = "file:///right.h";
   L.References = 1;
Index: clang-tools-extra/unittests/clangd/IndexHelpers.h
===================================================================
--- /dev/null
+++ clang-tools-extra/unittests/clangd/IndexHelpers.h
@@ -0,0 +1,57 @@
+//===-- IndexHelpers.h ------------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_TOOLS_EXTRA_UNITTESTS_CLANGD_INDEXTESTCOMMON_H
+#define LLVM_CLANG_TOOLS_EXTRA_UNITTESTS_CLANGD_INDEXTESTCOMMON_H
+
+#include "index/Index.h"
+#include "index/Merge.h"
+#include "index/dex/DexIndex.h"
+#include "index/dex/Iterator.h"
+#include "index/dex/Token.h"
+#include "index/dex/Trigram.h"
+
+namespace clang {
+namespace clangd {
+
+Symbol symbol(llvm::StringRef QName);
+
+struct SlabAndPointers {
+  SymbolSlab Slab;
+  std::vector<const Symbol *> Pointers;
+};
+
+// Create a slab of symbols with the given qualified names as both IDs and
+// names. The life time of the slab is managed by the returned shared pointer.
+// If \p WeakSymbols is provided, it will be pointed to the managed object in
+// the returned shared pointer.
+std::shared_ptr<std::vector<const Symbol *>>
+generateSymbols(std::vector<std::string> QualifiedNames,
+                std::weak_ptr<SlabAndPointers> *WeakSymbols = nullptr);
+
+// Create a slab of symbols with IDs and names [Begin, End], otherwise identical
+// to the `generateSymbols` above.
+std::shared_ptr<std::vector<const Symbol *>>
+generateNumSymbols(int Begin, int End,
+                   std::weak_ptr<SlabAndPointers> *WeakSymbols = nullptr);
+
+std::string getQualifiedName(const Symbol &Sym);
+
+std::vector<std::string> match(const SymbolIndex &I,
+                               const FuzzyFindRequest &Req,
+                               bool *Incomplete = nullptr);
+
+// Returns qualified names of symbols with any of IDs in the index.
+std::vector<std::string> lookup(const SymbolIndex &I,
+                                llvm::ArrayRef<SymbolID> IDs);
+
+} // namespace clangd
+} // namespace clang
+
+#endif
Index: clang-tools-extra/unittests/clangd/IndexHelpers.cpp
===================================================================
--- /dev/null
+++ clang-tools-extra/unittests/clangd/IndexHelpers.cpp
@@ -0,0 +1,89 @@
+//===-- IndexHelpers.cpp ----------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "IndexHelpers.h"
+
+namespace clang {
+namespace clangd {
+
+Symbol symbol(llvm::StringRef QName) {
+  Symbol Sym;
+  Sym.ID = SymbolID(QName.str());
+  size_t Pos = QName.rfind("::");
+  if (Pos == llvm::StringRef::npos) {
+    Sym.Name = QName;
+    Sym.Scope = "";
+  } else {
+    Sym.Name = QName.substr(Pos + 2);
+    Sym.Scope = QName.substr(0, Pos + 2);
+  }
+  return Sym;
+}
+
+// Create a slab of symbols with the given qualified names as both IDs and
+// names. The life time of the slab is managed by the returned shared pointer.
+// If \p WeakSymbols is provided, it will be pointed to the managed object in
+// the returned shared pointer.
+std::shared_ptr<std::vector<const Symbol *>>
+generateSymbols(std::vector<std::string> QualifiedNames,
+                std::weak_ptr<SlabAndPointers> *WeakSymbols) {
+  SymbolSlab::Builder Slab;
+  for (llvm::StringRef QName : QualifiedNames)
+    Slab.insert(symbol(QName));
+
+  auto Storage = std::make_shared<SlabAndPointers>();
+  Storage->Slab = std::move(Slab).build();
+  for (const auto &Sym : Storage->Slab)
+    Storage->Pointers.push_back(&Sym);
+  if (WeakSymbols)
+    *WeakSymbols = Storage;
+  auto *Pointers = &Storage->Pointers;
+  return {std::move(Storage), Pointers};
+}
+
+// Create a slab of symbols with IDs and names [Begin, End], otherwise identical
+// to the `generateSymbols` above.
+std::shared_ptr<std::vector<const Symbol *>>
+generateNumSymbols(int Begin, int End,
+                   std::weak_ptr<SlabAndPointers> *WeakSymbols) {
+  std::vector<std::string> Names;
+  for (int i = Begin; i <= End; i++)
+    Names.push_back(std::to_string(i));
+  return generateSymbols(Names, WeakSymbols);
+}
+
+std::string getQualifiedName(const Symbol &Sym) {
+  return (Sym.Scope + Sym.Name).str();
+}
+
+std::vector<std::string> match(const SymbolIndex &I,
+                               const FuzzyFindRequest &Req, bool *Incomplete) {
+  std::vector<std::string> Matches;
+  bool IsIncomplete = I.fuzzyFind(Req, [&](const Symbol &Sym) {
+    Matches.push_back(clang::clangd::getQualifiedName(Sym));
+  });
+  if (Incomplete)
+    *Incomplete = IsIncomplete;
+  return Matches;
+}
+
+// Returns qualified names of symbols with any of IDs in the index.
+std::vector<std::string> lookup(const SymbolIndex &I,
+                                llvm::ArrayRef<SymbolID> IDs) {
+  LookupRequest Req;
+  Req.IDs.insert(IDs.begin(), IDs.end());
+  std::vector<std::string> Results;
+  I.lookup(Req, [&](const Symbol &Sym) {
+    Results.push_back(getQualifiedName(Sym));
+  });
+  return Results;
+}
+
+} // namespace clangd
+} // namespace clang
Index: clang-tools-extra/unittests/clangd/DexIndexTests.cpp
===================================================================
--- clang-tools-extra/unittests/clangd/DexIndexTests.cpp
+++ clang-tools-extra/unittests/clangd/DexIndexTests.cpp
@@ -7,6 +7,10 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "IndexHelpers.h"
+#include "index/Index.h"
+#include "index/Merge.h"
+#include "index/dex/DexIndex.h"
 #include "index/dex/Iterator.h"
 #include "index/dex/Token.h"
 #include "index/dex/Trigram.h"
@@ -17,11 +21,13 @@
 #include <string>
 #include <vector>
 
+using ::testing::ElementsAre;
+using ::testing::UnorderedElementsAre;
+
 namespace clang {
 namespace clangd {
 namespace dex {
-
-using ::testing::ElementsAre;
+namespace {
 
 TEST(DexIndexIterators, DocumentIterator) {
   const PostingList L = {4, 7, 8, 20, 42, 100};
@@ -312,6 +318,218 @@
                            "hij", "ijk", "jkl", "klm"}));
 }
 
+TEST(DexIndex, Lookup) {
+  DexIndex I;
+  I.build(generateSymbols({"ns::abc", "ns::xyz"}));
+  EXPECT_THAT(lookup(I, SymbolID("ns::abc")), UnorderedElementsAre("ns::abc"));
+  EXPECT_THAT(lookup(I, {SymbolID("ns::abc"), SymbolID("ns::xyz")}),
+              UnorderedElementsAre("ns::abc", "ns::xyz"));
+  EXPECT_THAT(lookup(I, {SymbolID("ns::nonono"), SymbolID("ns::xyz")}),
+              UnorderedElementsAre("ns::xyz"));
+  EXPECT_THAT(lookup(I, SymbolID("ns::nonono")), UnorderedElementsAre());
+}
+
+// FIXME(kbobyrev): Use 3+ symbols long names so that trigram index is used.
+TEST(MergeDexIndex, Lookup) {
+  DexIndex I, J;
+  I.build(generateSymbols({"ns::A", "ns::B"}));
+  J.build(generateSymbols({"ns::B", "ns::C"}));
+  EXPECT_THAT(lookup(*mergeIndex(&I, &J), SymbolID("ns::A")),
+              UnorderedElementsAre("ns::A"));
+  EXPECT_THAT(lookup(*mergeIndex(&I, &J), SymbolID("ns::B")),
+              UnorderedElementsAre("ns::B"));
+  EXPECT_THAT(lookup(*mergeIndex(&I, &J), SymbolID("ns::C")),
+              UnorderedElementsAre("ns::C"));
+  EXPECT_THAT(
+      lookup(*mergeIndex(&I, &J), {SymbolID("ns::A"), SymbolID("ns::B")}),
+      UnorderedElementsAre("ns::A", "ns::B"));
+  EXPECT_THAT(
+      lookup(*mergeIndex(&I, &J), {SymbolID("ns::A"), SymbolID("ns::C")}),
+      UnorderedElementsAre("ns::A", "ns::C"));
+  EXPECT_THAT(lookup(*mergeIndex(&I, &J), SymbolID("ns::D")),
+              UnorderedElementsAre());
+  EXPECT_THAT(lookup(*mergeIndex(&I, &J), {}), UnorderedElementsAre());
+}
+
+// FIXME(kbobyrev): Add more tests on DexIndex? Right now, it's mostly a wrapper
+// around DexIndex, adopting tests from IndexTests.cpp sounds reasonable.
+// However, most of these tests use short names (<3 symbols) for symbol lookups,
+// which currently are dispatched to DexIndex and hence just duplicating these
+// tests while substituting DexIndex with DexIndex is not a viable solution.
+TEST(DexIndex, FuzzyFindQ) {
+  DexIndex Index;
+  Index.build(generateSymbols(
+      {"ns::ABC", "ns::BCD", "::ABC", "ns::nested::ABC", "other::ABC"}));
+  FuzzyFindRequest Req;
+  Req.Query = "ABC";
+  Req.Scopes = {"ns::"};
+  EXPECT_THAT(match(Index, Req), UnorderedElementsAre("ns::ABC"));
+  Req.Scopes = {"ns::", "ns::nested::"};
+  EXPECT_THAT(match(Index, Req),
+              UnorderedElementsAre("ns::ABC", "ns::nested::ABC"));
+}
+
+TEST(DexIndexTest, FuzzyMatchQ) {
+  DexIndex I;
+  I.build(
+      generateSymbols({"LaughingOutLoud", "LionPopulation", "LittleOldLady"}));
+  FuzzyFindRequest Req;
+  Req.Query = "lol";
+  Req.MaxCandidateCount = 2;
+  EXPECT_THAT(match(I, Req),
+              UnorderedElementsAre("LaughingOutLoud", "LittleOldLady"));
+}
+
+TEST(DexIndexTest, DexIndexSymbolsRecycled) {
+  DexIndex I;
+  std::weak_ptr<SlabAndPointers> Symbols;
+  I.build(generateNumSymbols(0, 10, &Symbols));
+  FuzzyFindRequest Req;
+  Req.Query = "7";
+  EXPECT_THAT(match(I, Req), UnorderedElementsAre("7"));
+
+  EXPECT_FALSE(Symbols.expired());
+  // Release old symbols.
+  I.build(generateNumSymbols(0, 0));
+  EXPECT_TRUE(Symbols.expired());
+}
+
+TEST(DexIndexTest, DexIndexDeduplicate) {
+  auto Symbols = generateNumSymbols(0, 10);
+
+  // Inject some duplicates and make sure we only match the same symbol once.
+  auto Sym = symbol("7");
+  Symbols->push_back(&Sym);
+  Symbols->push_back(&Sym);
+  Symbols->push_back(&Sym);
+
+  FuzzyFindRequest Req;
+  Req.Query = "7";
+  DexIndex I;
+  I.build(std::move(Symbols));
+  auto Matches = match(I, Req);
+  EXPECT_EQ(Matches.size(), 1u);
+}
+
+TEST(DexIndexTest, DexIndexLimitedNumMatches) {
+  DexIndex I;
+  I.build(generateNumSymbols(0, 100));
+  FuzzyFindRequest Req;
+  Req.Query = "5";
+  Req.MaxCandidateCount = 3;
+  bool Incomplete;
+  auto Matches = match(I, Req, &Incomplete);
+  EXPECT_EQ(Matches.size(), Req.MaxCandidateCount);
+  EXPECT_TRUE(Incomplete);
+}
+
+TEST(DexIndexTest, FuzzyMatch) {
+  DexIndex I;
+  I.build(
+      generateSymbols({"LaughingOutLoud", "LionPopulation", "LittleOldLady"}));
+  FuzzyFindRequest Req;
+  Req.Query = "lol";
+  Req.MaxCandidateCount = 2;
+  EXPECT_THAT(match(I, Req),
+              UnorderedElementsAre("LaughingOutLoud", "LittleOldLady"));
+}
+
+TEST(DexIndexTest, MatchQualifiedNamesWithoutSpecificScope) {
+  DexIndex I;
+  I.build(generateSymbols({"a::y1", "b::y2", "y3"}));
+  FuzzyFindRequest Req;
+  Req.Query = "y";
+  EXPECT_THAT(match(I, Req), UnorderedElementsAre("a::y1", "b::y2", "y3"));
+}
+
+TEST(DexIndexTest, MatchQualifiedNamesWithGlobalScope) {
+  DexIndex I;
+  I.build(generateSymbols({"a::y1", "b::y2", "y3"}));
+  FuzzyFindRequest Req;
+  Req.Query = "y";
+  Req.Scopes = {""};
+  EXPECT_THAT(match(I, Req), UnorderedElementsAre("y3"));
+}
+
+TEST(DexIndexTest, MatchQualifiedNamesWithOneScope) {
+  DexIndex I;
+  I.build(generateSymbols({"a::y1", "a::y2", "a::x", "b::y2", "y3"}));
+  FuzzyFindRequest Req;
+  Req.Query = "y";
+  Req.Scopes = {"a::"};
+  EXPECT_THAT(match(I, Req), UnorderedElementsAre("a::y1", "a::y2"));
+}
+
+TEST(DexIndexTest, MatchQualifiedNamesWithMultipleScopes) {
+  DexIndex I;
+  I.build(generateSymbols({"a::y1", "a::y2", "a::x", "b::y3", "y3"}));
+  FuzzyFindRequest Req;
+  Req.Query = "y";
+  Req.Scopes = {"a::", "b::"};
+  EXPECT_THAT(match(I, Req), UnorderedElementsAre("a::y1", "a::y2", "b::y3"));
+}
+
+TEST(DexIndexTest, NoMatchNestedScopes) {
+  DexIndex I;
+  I.build(generateSymbols({"a::y1", "a::b::y2"}));
+  FuzzyFindRequest Req;
+  Req.Query = "y";
+  Req.Scopes = {"a::"};
+  EXPECT_THAT(match(I, Req), UnorderedElementsAre("a::y1"));
+}
+
+TEST(DexIndexTest, IgnoreCases) {
+  DexIndex I;
+  I.build(generateSymbols({"ns::ABC", "ns::abc"}));
+  FuzzyFindRequest Req;
+  Req.Query = "AB";
+  Req.Scopes = {"ns::"};
+  EXPECT_THAT(match(I, Req), UnorderedElementsAre("ns::ABC", "ns::abc"));
+}
+
+TEST(DexIndexTest, Lookup) {
+  DexIndex I;
+  I.build(generateSymbols({"ns::abc", "ns::xyz"}));
+  EXPECT_THAT(lookup(I, SymbolID("ns::abc")), UnorderedElementsAre("ns::abc"));
+  EXPECT_THAT(lookup(I, {SymbolID("ns::abc"), SymbolID("ns::xyz")}),
+              UnorderedElementsAre("ns::abc", "ns::xyz"));
+  EXPECT_THAT(lookup(I, {SymbolID("ns::nonono"), SymbolID("ns::xyz")}),
+              UnorderedElementsAre("ns::xyz"));
+  EXPECT_THAT(lookup(I, SymbolID("ns::nonono")), UnorderedElementsAre());
+}
+
+TEST(DexMergeIndexTest, Lookup) {
+  DexIndex I, J;
+  I.build(generateSymbols({"ns::A", "ns::B"}));
+  J.build(generateSymbols({"ns::B", "ns::C"}));
+  EXPECT_THAT(lookup(*mergeIndex(&I, &J), SymbolID("ns::A")),
+              UnorderedElementsAre("ns::A"));
+  EXPECT_THAT(lookup(*mergeIndex(&I, &J), SymbolID("ns::B")),
+              UnorderedElementsAre("ns::B"));
+  EXPECT_THAT(lookup(*mergeIndex(&I, &J), SymbolID("ns::C")),
+              UnorderedElementsAre("ns::C"));
+  EXPECT_THAT(
+      lookup(*mergeIndex(&I, &J), {SymbolID("ns::A"), SymbolID("ns::B")}),
+      UnorderedElementsAre("ns::A", "ns::B"));
+  EXPECT_THAT(
+      lookup(*mergeIndex(&I, &J), {SymbolID("ns::A"), SymbolID("ns::C")}),
+      UnorderedElementsAre("ns::A", "ns::C"));
+  EXPECT_THAT(lookup(*mergeIndex(&I, &J), SymbolID("ns::D")),
+              UnorderedElementsAre());
+  EXPECT_THAT(lookup(*mergeIndex(&I, &J), {}), UnorderedElementsAre());
+}
+
+TEST(DexMergeIndexTest, FuzzyFind) {
+  DexIndex I, J;
+  I.build(generateSymbols({"ns::A", "ns::B"}));
+  J.build(generateSymbols({"ns::B", "ns::C"}));
+  FuzzyFindRequest Req;
+  Req.Scopes = {"ns::"};
+  EXPECT_THAT(match(*mergeIndex(&I, &J), Req),
+              UnorderedElementsAre("ns::A", "ns::B", "ns::C"));
+}
+
+} // namespace
 } // namespace dex
 } // namespace clangd
 } // namespace clang
Index: clang-tools-extra/unittests/clangd/CMakeLists.txt
===================================================================
--- clang-tools-extra/unittests/clangd/CMakeLists.txt
+++ clang-tools-extra/unittests/clangd/CMakeLists.txt
@@ -23,6 +23,7 @@
   FuzzyMatchTests.cpp
   GlobalCompilationDatabaseTests.cpp
   HeadersTests.cpp
+  IndexHelpers.cpp
   IndexTests.cpp
   QualityTests.cpp
   SourceCodeTests.cpp
Index: clang-tools-extra/clangd/index/dex/Token.h
===================================================================
--- clang-tools-extra/clangd/index/dex/Token.h
+++ clang-tools-extra/clangd/index/dex/Token.h
@@ -22,9 +22,9 @@
 #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_DEX_TOKEN_H
 #define LLVM_CLANG_TOOLS_EXTRA_CLANGD_DEX_TOKEN_H
 
+#include "../Index.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/Support/raw_ostream.h"
-
 #include <string>
 #include <vector>
 
@@ -81,6 +81,8 @@
   }
 };
 
+std::vector<Token> generateSearchTokens(const Symbol &Sym);
+
 } // namespace dex
 } // namespace clangd
 } // namespace clang
Index: clang-tools-extra/clangd/index/dex/Token.cpp
===================================================================
--- /dev/null
+++ clang-tools-extra/clangd/index/dex/Token.cpp
@@ -0,0 +1,25 @@
+//===--- Token.cpp - Symbol Search primitive --------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Token.h"
+#include "Trigram.h"
+
+namespace clang {
+namespace clangd {
+namespace dex {
+
+std::vector<Token> generateSearchTokens(const Symbol &Sym) {
+  std::vector<Token> Result = generateIdentifierTrigrams(Sym.Name);
+  Result.push_back(Token(Token::Kind::Scope, Sym.Scope));
+  return Result;
+}
+
+} // namespace dex
+} // namespace clangd
+} // namespace clang
Index: clang-tools-extra/clangd/index/dex/DexIndex.h
===================================================================
--- /dev/null
+++ clang-tools-extra/clangd/index/dex/DexIndex.h
@@ -0,0 +1,57 @@
+//===--- DexIndex.h - Dex Symbol Index Implementation -----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_DEX_DEXINDEX_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_DEX_DEXINDEX_H
+
+#include "../Index.h"
+#include "../MemIndex.h"
+#include "Iterator.h"
+#include "Token.h"
+#include "Trigram.h"
+#include <mutex>
+
+namespace clang {
+namespace clangd {
+namespace dex {
+
+/// In-memory Dex index implementation.
+class DexIndex : public SymbolIndex {
+public:
+  void build(std::shared_ptr<std::vector<const Symbol *>> Symbols);
+
+  // FIXME(kbobyrev): This is the same as in MemIndex. Should I abstract this
+  // out?
+  static std::unique_ptr<SymbolIndex> build(SymbolSlab Slab);
+
+  bool
+  fuzzyFind(const FuzzyFindRequest &Req,
+            llvm::function_ref<void(const Symbol &)> Callback) const override;
+
+  // FIXME(kbobyrev): Symbol lookup is also exactly the same as in MemIndex.
+  virtual void
+  lookup(const LookupRequest &Req,
+         llvm::function_ref<void(const Symbol &)> Callback) const override;
+
+  void findOccurrences(const OccurrencesRequest &Req,
+                       llvm::function_ref<void(const SymbolOccurrence &)>
+                           Callback) const override;
+
+private:
+  std::shared_ptr<std::vector<const Symbol *>> Symbols;
+  llvm::DenseMap<SymbolID, const Symbol *> Index;
+  mutable std::mutex Mutex;
+  llvm::DenseMap<Token, PostingList> InvertedIndex;
+};
+
+} // namespace dex
+} // namespace clangd
+} // namespace clang
+
+#endif
Index: clang-tools-extra/clangd/index/dex/DexIndex.cpp
===================================================================
--- /dev/null
+++ clang-tools-extra/clangd/index/dex/DexIndex.cpp
@@ -0,0 +1,174 @@
+//===--- DexIndex.cpp - Dex Symbol Index Implementation ---------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "DexIndex.h"
+#include "../../FuzzyMatch.h"
+#include "../../Logger.h"
+#include <algorithm>
+
+namespace clang {
+namespace clangd {
+namespace dex {
+
+void DexIndex::build(std::shared_ptr<std::vector<const Symbol *>> Syms) {
+  llvm::DenseMap<SymbolID, const Symbol *> TempIndex;
+  for (const Symbol *Sym : *Syms)
+    TempIndex[Sym->ID] = Sym;
+
+  {
+    std::lock_guard<std::mutex> Lock(Mutex);
+
+    Index = std::move(TempIndex);
+    Symbols = std::move(Syms);
+
+    std::sort(begin(*Symbols), end(*Symbols),
+              [](const Symbol *LHS, const Symbol *RHS) {
+                return quality(*LHS) > quality(*RHS);
+              });
+    InvertedIndex.clear();
+    for (DocID SymbolRank = 0; SymbolRank < Symbols->size(); ++SymbolRank) {
+      const auto Sym = (*Symbols)[SymbolRank];
+      for (const auto &Token : generateSearchTokens(*Sym)) {
+        if (!InvertedIndex.count(Token))
+          InvertedIndex[Token] = {SymbolRank};
+        else
+          InvertedIndex[Token].push_back(SymbolRank);
+      }
+    }
+  }
+}
+
+bool DexIndex::fuzzyFind(
+    const FuzzyFindRequest &Req,
+    llvm::function_ref<void(const Symbol &)> Callback) const {
+  assert(!StringRef(Req.Query).contains("::") &&
+         "There must be no :: in query.");
+  FuzzyMatcher Filter(Req.Query);
+  bool More;
+  {
+    std::lock_guard<std::mutex> Lock(Mutex);
+
+    std::vector<DocID> SymbolDocIDs;
+
+    // FIXME(kbobyrev): Implement short queries (<3 length) using posting
+    // lists and incomplete trigrams.
+    // FIXME(kbobyrev): Code sharing is not very pleasant here, is it better to
+    // dispatch long and short queries to internal helper functions?
+    if (Req.Query.size() >= 3) {
+      // Generate query trigrams and construct AND iterator over all query
+      // trigrams.
+      const auto TrigramTokens = generateIdentifierTrigrams(Req.Query);
+      std::vector<std::unique_ptr<Iterator>> TrigramIterators;
+      for (const auto &Trigram : TrigramTokens) {
+        const auto It = InvertedIndex.find(Trigram);
+        if (It != InvertedIndex.end())
+          TrigramIterators.push_back(create(It->second));
+      }
+
+      std::vector<std::unique_ptr<Iterator>> TopLevelChildren;
+      TopLevelChildren.push_back(createAnd(move(TrigramIterators)));
+
+      std::vector<std::unique_ptr<Iterator>> ScopeIterators;
+      for (const auto &Scope : Req.Scopes) {
+        const auto It = InvertedIndex.find(Token(Token::Kind::Scope, Scope));
+        if (It != InvertedIndex.end())
+          ScopeIterators.push_back(create(It->second));
+      }
+      // Only add OR iterator for scopes if the request contains scopes,
+      // otherwise the iterator won't have any children and will be invalid.
+      if (ScopeIterators.size())
+        TopLevelChildren.push_back(createOr(move(ScopeIterators)));
+
+      auto QueryIterator = createAnd(move(TopLevelChildren));
+      // FIXME(kbobyrev): Limit the total number of consumed symbols using
+      // Req.MaxCandidateCount. That would require implementing Limit iterator.
+      SymbolDocIDs = consume(*QueryIterator);
+      More = SymbolDocIDs.size() > Req.MaxCandidateCount;
+    } else {
+      // FIXME(kbobyrev): Don't score items twice.
+      for (const auto &Pair : Index) {
+        const auto Sym = Pair.second;
+        const auto Score = Filter.match(Sym->Name);
+        // Exact match against all possible scopes.
+        if (!Req.Scopes.empty() && !llvm::is_contained(Req.Scopes, Sym->Scope))
+          continue;
+        if (Req.RestrictForCodeCompletion && !Sym->IsIndexedForCodeCompletion)
+          continue;
+        if (Score.hasValue()) {
+          // If the requested number of results is already retrieved but there
+          // are other symbols which match the criteria, just stop processing
+          // items and let callee know there are more results.
+          if (SymbolDocIDs.size() == Req.MaxCandidateCount) {
+            More = true;
+            break;
+          }
+          const auto SymDocID =
+              std::find(Symbols->begin(), Symbols->end(), Sym) -
+              Symbols->begin();
+          SymbolDocIDs.push_back(SymDocID);
+        }
+      }
+    }
+
+    // FIXME(kbobyrev): Scoring all matched symbols and then processing
+    // MaxCandidateCount items is rather expensive, this should be replaced by
+    // two-stage filtering as proposed in Dex design document.
+    std::vector<std::pair<float, const Symbol *>> Scores(SymbolDocIDs.size());
+    for (size_t SymbolIdx = 0; SymbolIdx < SymbolDocIDs.size(); ++SymbolIdx) {
+      const auto Sym = (*Symbols)[SymbolDocIDs[SymbolIdx]];
+      const auto Score = Filter.match(Sym->Name);
+      assert(Score.hasValue() && "Matched symbol has all Fuzzy Matching "
+                                 "trigrams, it should match the query");
+      Scores[SymbolIdx] = std::make_pair(-*Score * quality(*Sym), Sym);
+    }
+    // Sort retrieved symbol by Fuzzy Matching score.
+    std::sort(begin(Scores), end(Scores));
+
+    for (size_t CandidateRank = 0;
+         CandidateRank < std::min(Req.MaxCandidateCount, Scores.size());
+         ++CandidateRank)
+      Callback(*Scores[CandidateRank].second);
+  }
+  return More;
+}
+
+void DexIndex::lookup(const LookupRequest &Req,
+                      llvm::function_ref<void(const Symbol &)> Callback) const {
+  for (const auto &ID : Req.IDs) {
+    auto I = Index.find(ID);
+    if (I != Index.end())
+      Callback(*I->second);
+  }
+}
+
+std::unique_ptr<SymbolIndex> DexIndex::build(SymbolSlab Slab) {
+  struct Snapshot {
+    SymbolSlab Slab;
+    std::vector<const Symbol *> Pointers;
+  };
+  auto Snap = std::make_shared<Snapshot>();
+  Snap->Slab = std::move(Slab);
+  for (auto &Sym : Snap->Slab)
+    Snap->Pointers.push_back(&Sym);
+  auto S = std::shared_ptr<std::vector<const Symbol *>>(std::move(Snap),
+                                                        &Snap->Pointers);
+  auto DexIdx = llvm::make_unique<DexIndex>();
+  DexIdx->build(std::move(S));
+  return std::move(DexIdx);
+}
+
+void DexIndex::findOccurrences(
+    const OccurrencesRequest &Req,
+    llvm::function_ref<void(const SymbolOccurrence &)> Callback) const {
+  log("findOccurrences is not implemented.");
+}
+
+} // namespace dex
+} // namespace clangd
+} // namespace clang
Index: clang-tools-extra/clangd/index/MemIndex.h
===================================================================
--- clang-tools-extra/clangd/index/MemIndex.h
+++ clang-tools-extra/clangd/index/MemIndex.h
@@ -42,7 +42,6 @@
 private:
   std::shared_ptr<std::vector<const Symbol *>> Symbols;
   // Index is a set of symbols that are deduplicated by symbol IDs.
-  // FIXME: build smarter index structure.
   llvm::DenseMap<SymbolID, const Symbol *> Index;
   mutable std::mutex Mutex;
 };
Index: clang-tools-extra/clangd/CMakeLists.txt
===================================================================
--- clang-tools-extra/clangd/CMakeLists.txt
+++ clang-tools-extra/clangd/CMakeLists.txt
@@ -43,8 +43,10 @@
   index/SymbolCollector.cpp
   index/SymbolYAML.cpp
 
+  index/dex/DexIndex.cpp
   index/dex/Iterator.cpp
   index/dex/Trigram.cpp
+  index/dex/Token.cpp
 
   LINK_LIBS
   clangAST
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to