kbobyrev created this revision.
kbobyrev added reviewers: ioeric, ilya-biryukov.
Herald added subscribers: arphaman, jkorous, MaskRay.

https://reviews.llvm.org/D50517

Files:
  clang-tools-extra/clangd/index/dex/Trigram.cpp
  clang-tools-extra/clangd/index/dex/Trigram.h
  clang-tools-extra/unittests/clangd/DexIndexTests.cpp

Index: clang-tools-extra/unittests/clangd/DexIndexTests.cpp
===================================================================
--- clang-tools-extra/unittests/clangd/DexIndexTests.cpp
+++ clang-tools-extra/unittests/clangd/DexIndexTests.cpp
@@ -250,30 +250,44 @@
 }
 
 TEST(DexIndexTrigrams, IdentifierTrigrams) {
-  EXPECT_THAT(generateIdentifierTrigrams("X86"), trigramsAre({"x86"}));
+  EXPECT_THAT(generateIdentifierTrigrams("X86"),
+              trigramsAre({"x86", "x$$", "8$$", "6$$", "x8$", "86$"}));
 
-  EXPECT_THAT(generateIdentifierTrigrams("nl"), trigramsAre({}));
+  EXPECT_THAT(generateIdentifierTrigrams("nl"),
+              trigramsAre({"nl$", "n$$", "l$$"}));
 
-  EXPECT_THAT(generateIdentifierTrigrams("clangd"),
-              trigramsAre({"cla", "lan", "ang", "ngd"}));
-
-  EXPECT_THAT(generateIdentifierTrigrams("abc_def"),
-              trigramsAre({"abc", "abd", "ade", "bcd", "bde", "cde", "def"}));
+  EXPECT_THAT(generateIdentifierTrigrams("n"), trigramsAre({"n$$"}));
 
   EXPECT_THAT(
-      generateIdentifierTrigrams("a_b_c_d_e_"),
-      trigramsAre({"abc", "abd", "acd", "ace", "bcd", "bce", "bde", "cde"}));
+      generateIdentifierTrigrams("clangd"),
+      trigramsAre({"cla", "lan", "ang", "ngd", "an$", "n$$", "g$$", "cl$",
+                   "ng$", "d$$", "l$$", "a$$", "c$$", "gd$", "la$"}));
 
-  EXPECT_THAT(
-      generateIdentifierTrigrams("unique_ptr"),
-      trigramsAre({"uni", "unp", "upt", "niq", "nip", "npt", "iqu", "iqp",
-                   "ipt", "que", "qup", "qpt", "uep", "ept", "ptr"}));
+  EXPECT_THAT(generateIdentifierTrigrams("abc_def"),
+              trigramsAre({"abc", "abd", "ade", "bcd", "bde", "cde", "def",
+                           "a$$", "b$$", "c$$", "d$$", "e$$", "f$$", "ab$",
+                           "ad$", "bc$", "bd$", "cd$", "de$", "ef$"}));
+
+  EXPECT_THAT(generateIdentifierTrigrams("a_b_c_d_e_"),
+              trigramsAre({"abc", "abd", "acd", "ace", "bcd", "bce", "bde",
+                           "cde", "a$$", "ab$", "ac$", "b$$", "bc$", "bd$",
+                           "c$$", "cd$", "ce$", "d$$", "de$", "e$$"}));
+
+  EXPECT_THAT(generateIdentifierTrigrams("unique_ptr"),
+              trigramsAre({"uni", "unp", "upt", "niq", "nip", "npt", "iqu",
+                           "iqp", "ipt", "que", "qup", "qpt", "uep", "ept",
+                           "ptr", "u$$", "un$", "up$", "n$$", "ni$", "np$",
+                           "i$$", "iq$", "ip$", "q$$", "qu$", "qp$", "ue$",
+                           "e$$", "ep$", "p$$", "pt$", "t$$", "tr$", "r$$"}));
 
   EXPECT_THAT(generateIdentifierTrigrams("TUDecl"),
-              trigramsAre({"tud", "tde", "ude", "dec", "ecl"}));
+              trigramsAre({"tud", "tde", "ude", "dec", "ecl", "t$$", "tu$",
+                           "td$", "u$$", "ud$", "d$$", "de$", "e$$", "ec$",
+                           "c$$", "cl$", "l$$"}));
 
   EXPECT_THAT(generateIdentifierTrigrams("IsOK"),
-              trigramsAre({"iso", "iok", "sok"}));
+              trigramsAre({"iso", "iok", "sok", "i$$", "is$", "io$", "s$$",
+                           "so$", "o$$", "ok$", "k$$"}));
 
   EXPECT_THAT(generateIdentifierTrigrams("abc_defGhij__klm"),
               trigramsAre({
Index: clang-tools-extra/clangd/index/dex/Trigram.h
===================================================================
--- clang-tools-extra/clangd/index/dex/Trigram.h
+++ clang-tools-extra/clangd/index/dex/Trigram.h
@@ -31,6 +31,11 @@
 namespace clangd {
 namespace dex {
 
+/// This is used to mark unigrams and bigrams and distinct them from complete
+/// trigrams. Since '$' is not present in valid identifier names, it is safe to
+/// use it as the special symbol.
+const auto END_SYMBOL = '$';
+
 /// Returns list of unique fuzzy-search trigrams from unqualified symbol.
 ///
 /// First, given Identifier (unqualified symbol name) is segmented using
Index: clang-tools-extra/clangd/index/dex/Trigram.cpp
===================================================================
--- clang-tools-extra/clangd/index/dex/Trigram.cpp
+++ clang-tools-extra/clangd/index/dex/Trigram.cpp
@@ -10,11 +10,9 @@
 #include "Trigram.h"
 #include "../../FuzzyMatch.h"
 #include "Token.h"
-
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/StringExtras.h"
-
 #include <cctype>
 #include <queue>
 #include <string>
@@ -59,15 +57,31 @@
     }
   }
 
+  // Iterate through valid seqneces of three characters Fuzzy Matcher can
+  // process.
   DenseSet<Token> UniqueTrigrams;
   std::array<char, 4> Chars;
   for (size_t I = 0; I < LowercaseIdentifier.size(); ++I) {
     // Skip delimiters.
     if (Roles[I] != Head && Roles[I] != Tail)
       continue;
+
+    Chars = {{LowercaseIdentifier[I], END_SYMBOL, END_SYMBOL, 0}};
+    const auto Unigram = Token(Token::Kind::Trigram, Chars.data());
+    if (!UniqueTrigrams.count(Unigram)) {
+      UniqueTrigrams.insert(Unigram);
+    }
+
     for (const unsigned J : Next[I]) {
       if (!J)
         continue;
+
+      Chars = {{LowercaseIdentifier[I], LowercaseIdentifier[J], END_SYMBOL, 0}};
+      const auto Bigram = Token(Token::Kind::Trigram, Chars.data());
+      if (!UniqueTrigrams.count(Bigram)) {
+        UniqueTrigrams.insert(Bigram);
+      }
+
       for (const unsigned K : Next[J]) {
         if (!K)
           continue;
@@ -99,7 +113,7 @@
   std::string LowercaseQuery = Query.lower();
 
   DenseSet<Token> UniqueTrigrams;
-  std::deque<char> Chars;
+  std::deque<char> Chars = {END_SYMBOL, END_SYMBOL, END_SYMBOL};
 
   for (size_t I = 0; I < LowercaseQuery.size(); ++I) {
     // If current symbol is delimiter, just skip it.
@@ -110,13 +124,17 @@
 
     if (Chars.size() > 3)
       Chars.pop_front();
-    if (Chars.size() == 3) {
-      auto Trigram =
-          Token(Token::Kind::Trigram, std::string(begin(Chars), end(Chars)));
-      // Push unique trigrams to the result.
-      if (!UniqueTrigrams.count(Trigram)) {
-        UniqueTrigrams.insert(Trigram);
-      }
+
+    auto Trigram =
+        Token(Token::Kind::Trigram, std::string(begin(Chars), end(Chars)));
+
+    if (Chars.front() == END_SYMBOL)
+      Trigram = Token(Token::Kind::Trigram,
+                      std::string(Chars.rbegin(), Chars.rend()));
+
+    // Push unique trigrams to the result.
+    if (!UniqueTrigrams.count(Trigram)) {
+      UniqueTrigrams.insert(Trigram);
     }
   }
 
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to