Hello Sam, This commit broke one of our bots: http://lab.llvm.org:8011/builders/clang-x86_64-linux-abi-test/builds/18908
. . . FAILED: tools/clang/tools/extra/clangd/CMakeFiles/clangDaemon.dir/FuzzyMatch.cpp.o /usr/bin/c++ -DGTEST_HAS_RTTI=0 -D_DEBUG -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -Itools/clang/tools/extra/clangd -I/home/buildslave/buildslave1a/clang-x86_64-linux-abi-test/llvm/tools/clang/tools/extra/clangd -I/home/buildslave/buildslave1a/clang-x86_64-linux-abi-test/llvm/tools/clang/include -Itools/clang/include -Iinclude -I/home/buildslave/buildslave1a/clang-x86_64-linux-abi-test/llvm/include -fPIC -fvisibility-inlines-hidden -std=c++11 -Wall -W -Wno-unused-parameter -Wwrite-strings -Wcast-qual -Wno-missing-field-initializers -pedantic -Wno-long-long -Wno-maybe-uninitialized -Wdelete-non-virtual-dtor -Wno-comment -ffunction-sections -fdata-sections -fno-common -Woverloaded-virtual -fno-strict-aliasing -O3 -UNDEBUG -fno-exceptions -fno-rtti -MD -MT tools/clang/tools/extra/clangd/CMakeFiles/clangDaemon.dir/FuzzyMatch.cpp.o -MF tools/clang/tools/extra/clangd/CMakeFiles/clangDaemon.dir/FuzzyMatch.cpp.o.d -o tools/clang/tools/extra/clangd/CMakeFiles/clangDaemon.dir/FuzzyMatch.cpp.o -c /home/buildslave/buildslave1a/clang-x86_64-linux-abi-test/llvm/tools/clang/tools/extra/clangd/FuzzyMatch.cpp /home/buildslave/buildslave1a/clang-x86_64-linux-abi-test/llvm/tools/clang/tools/extra/clangd/FuzzyMatch.cpp:64:25: error: redeclaration ‘clang::clangd::FuzzyMatcher::MaxPat’ differs in ‘constexpr’ const int FuzzyMatcher::MaxPat; ^ In file included from /home/buildslave/buildslave1a/clang-x86_64-linux-abi-test/llvm/tools/clang/tools/extra/clangd/FuzzyMatch.cpp:57:0: /home/buildslave/buildslave1a/clang-x86_64-linux-abi-test/llvm/tools/clang/tools/extra/clangd/FuzzyMatch.h:45:24: error: from previous declaration ‘clang::clangd::FuzzyMatcher::MaxPat’ constexpr static int MaxPat = 63, MaxWord = 127; ^ /home/buildslave/buildslave1a/clang-x86_64-linux-abi-test/llvm/tools/clang/tools/extra/clangd/FuzzyMatch.cpp:64:25: error: declaration of ‘constexpr const int clang::clangd::FuzzyMatcher::MaxPat’ outside of class is not definition [-fpermissive] const int FuzzyMatcher::MaxPat; ^ /home/buildslave/buildslave1a/clang-x86_64-linux-abi-test/llvm/tools/clang/tools/extra/clangd/FuzzyMatch.cpp:65:25: error: redeclaration ‘clang::clangd::FuzzyMatcher::MaxWord’ differs in ‘constexpr’ const int FuzzyMatcher::MaxWord; ^ In file included from /home/buildslave/buildslave1a/clang-x86_64-linux-abi-test/llvm/tools/clang/tools/extra/clangd/FuzzyMatch.cpp:57:0: /home/buildslave/buildslave1a/clang-x86_64-linux-abi-test/llvm/tools/clang/tools/extra/clangd/FuzzyMatch.h:45:37: error: from previous declaration ‘clang::clangd::FuzzyMatcher::MaxWord’ constexpr static int MaxPat = 63, MaxWord = 127; ^ /home/buildslave/buildslave1a/clang-x86_64-linux-abi-test/llvm/tools/clang/tools/extra/clangd/FuzzyMatch.cpp:65:25: error: declaration of ‘constexpr const int clang::clangd::FuzzyMatcher::MaxWord’ outside of class is not definition [-fpermissive] const int FuzzyMatcher::MaxWord; ^ Please have a look? Thanks Galina On Fri, Dec 1, 2017 at 9:08 AM, Sam McCall via cfe-commits < cfe-commits@lists.llvm.org> wrote: > Author: sammccall > Date: Fri Dec 1 09:08:02 2017 > New Revision: 319557 > > URL: http://llvm.org/viewvc/llvm-project?rev=319557&view=rev > Log: > [clangd] Fuzzy match scorer > > Summary: > This will be used for rescoring code completion results based on partial > identifiers. > Short-term use: > - we want to limit the number of code completion results returned to > improve performance of global completion. The scorer will be used to > rerank the results to return when the user has applied a filter. > Long-term use case: > - ranking of completion results from in-memory index > - merging of completion results from multiple sources (merging usually > works best when done at the component-score level, rescoring the > fuzzy-match quality avoids different backends needing to have > comparable scores) > > Reviewers: ilya-biryukov > > Subscribers: cfe-commits, mgorny > > Differential Revision: https://reviews.llvm.org/D40060 > > Added: > clang-tools-extra/trunk/clangd/FuzzyMatch.cpp > clang-tools-extra/trunk/clangd/FuzzyMatch.h > clang-tools-extra/trunk/unittests/clangd/FuzzyMatchTests.cpp > Modified: > clang-tools-extra/trunk/clangd/CMakeLists.txt > clang-tools-extra/trunk/unittests/clangd/CMakeLists.txt > > Modified: clang-tools-extra/trunk/clangd/CMakeLists.txt > URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/ > trunk/clangd/CMakeLists.txt?rev=319557&r1=319556&r2=319557&view=diff > ============================================================ > ================== > --- clang-tools-extra/trunk/clangd/CMakeLists.txt (original) > +++ clang-tools-extra/trunk/clangd/CMakeLists.txt Fri Dec 1 09:08:02 2017 > @@ -8,6 +8,7 @@ add_clang_library(clangDaemon > ClangdUnit.cpp > ClangdUnitStore.cpp > DraftStore.cpp > + FuzzyMatch.cpp > GlobalCompilationDatabase.cpp > JSONExpr.cpp > JSONRPCDispatcher.cpp > > Added: clang-tools-extra/trunk/clangd/FuzzyMatch.cpp > URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/ > trunk/clangd/FuzzyMatch.cpp?rev=319557&view=auto > ============================================================ > ================== > --- clang-tools-extra/trunk/clangd/FuzzyMatch.cpp (added) > +++ clang-tools-extra/trunk/clangd/FuzzyMatch.cpp Fri Dec 1 09:08:02 2017 > @@ -0,0 +1,373 @@ > +//===--- FuzzyMatch.h - Approximate identifier matching ---------*- > C++-*-===// > +// > +// The LLVM Compiler Infrastructure > +// > +// This file is distributed under the University of Illinois Open Source > +// License. See LICENSE.TXT for details. > +// > +//===------------------------------------------------------ > ----------------===// > +// > +// To check for a match between a Pattern ('u_p') and a Word > ('unique_ptr'), > +// we consider the possible partial match states: > +// > +// u n i q u e _ p t r > +// +--------------------- > +// |A . . . . . . . . . . > +// u| > +// |. . . . . . . . . . . > +// _| > +// |. . . . . . . O . . . > +// p| > +// |. . . . . . . . . . B > +// > +// Each dot represents some prefix of the pattern being matched against > some > +// prefix of the word. > +// - A is the initial state: '' matched against '' > +// - O is an intermediate state: 'u_' matched against 'unique_' > +// - B is the target state: 'u_p' matched against 'unique_ptr' > +// > +// We aim to find the best path from A->B. > +// - Moving right (consuming a word character) > +// Always legal: not all word characters must match. > +// - Moving diagonally (consuming both a word and pattern character) > +// Legal if the characters match. > +// - Moving down (consuming a pattern character) is never legal. > +// Never legal: all pattern characters must match something. > +// > +// The scoring is based on heuristics: > +// - when matching a character, apply a bonus or penalty depending on the > +// match quality (does case match, do word segments align, etc) > +// - when skipping a character, apply a penalty if it hurts the match > +// (it starts a word segment, or splits the matched region, etc) > +// > +// These heuristics require the ability to "look backward" one character, > to > +// see whether it was matched or not. Therefore the dynamic-programming > matrix > +// has an extra dimension (last character matched). > +// Each entry also has an additional flag indicating whether the > last-but-one > +// character matched, which is needed to trace back through the scoring > table > +// and reconstruct the match. > +// > +// We treat strings as byte-sequences, so only ASCII has first-class > support. > +// > +// This algorithm was inspired by VS code's client-side filtering, and > aims > +// to be mostly-compatible. > +// > +//===------------------------------------------------------ > ----------------===// > + > +#include "FuzzyMatch.h" > +#include "llvm/ADT/Optional.h" > +#include "llvm/Support/Format.h" > + > +using namespace llvm; > +using namespace clang::clangd; > + > +const int FuzzyMatcher::MaxPat; > +const int FuzzyMatcher::MaxWord; > + > +static char lower(char C) { return C >= 'A' && C <= 'Z' ? C + ('a' - 'A') > : C; } > +// A "negative infinity" score that won't overflow. > +// We use this to mark unreachable states and forbidden solutions. > +// Score field is 15 bits wide, min value is -2^14, we use half of that. > +static constexpr int AwfulScore = -(1 << 13); > +static bool isAwful(int S) { return S < AwfulScore / 2; } > +static constexpr int PerfectBonus = 3; // Perfect per-pattern-char score. > + > +FuzzyMatcher::FuzzyMatcher(StringRef Pattern) > + : PatN(std::min<int>(MaxPat, Pattern.size())), CaseSensitive(false), > + ScoreScale(float{1} / (PerfectBonus * PatN)), WordN(0) { > + memcpy(Pat, Pattern.data(), PatN); > + for (int I = 0; I < PatN; ++I) { > + LowPat[I] = lower(Pat[I]); > + CaseSensitive |= LowPat[I] != Pat[I]; > + } > + Scores[0][0][Miss] = {0, Miss}; > + Scores[0][0][Match] = {AwfulScore, Miss}; > + for (int P = 0; P <= PatN; ++P) > + for (int W = 0; W < P; ++W) > + for (Action A : {Miss, Match}) > + Scores[P][W][A] = {AwfulScore, Miss}; > + calculateRoles(Pat, PatRole, PatN); > +} > + > +Optional<float> FuzzyMatcher::match(StringRef Word) { > + if (!PatN) > + return 1; > + if (!(WordContainsPattern = init(Word))) > + return None; > + buildGraph(); > + auto Best = std::max(Scores[PatN][WordN][Miss].Score, > + Scores[PatN][WordN][Match].Score); > + if (isAwful(Best)) > + return None; > + return ScoreScale * std::min(PerfectBonus * PatN, std::max<int>(0, > Best)); > +} > + > +// Segmentation of words and patterns. > +// A name like "fooBar_baz" consists of several parts foo, bar, baz. > +// Aligning segmentation of word and pattern improves the fuzzy-match. > +// For example: [lol] matches "LaughingOutLoud" better than > "LionPopulation" > +// > +// First we classify each character into types (uppercase, lowercase, > etc). > +// Then we look at the sequence: e.g. [upper, lower] is the start of a > segment. > + > +// We only distinguish the types of characters that affect segmentation. > +// It's not obvious how to segment digits, we treat them as lowercase > letters. > +// As we don't decode UTF-8, we treat bytes over 127 as lowercase too. > +// This means we require exact (case-sensitive) match. > +enum FuzzyMatcher::CharType : char { > + Empty = 0, // Before-the-start and after-the-end (and control > chars). > + Lower = 1, // Lowercase letters, digits, and non-ASCII bytes. > + Upper = 2, // Uppercase letters. > + Punctuation = 3, // ASCII punctuation (including Space) > +}; > + > +// We get CharTypes from a lookup table. Each is 2 bits, 4 fit in each > byte. > +// The top 6 bits of the char select the byte, the bottom 2 select the > offset. > +// e.g. 'q' = 010100 01 = byte 28 (55), bits 3-2 (01) -> Lower. > +constexpr static uint8_t CharTypes[] = { > + 0x00, 0x00, 0x00, 0x00, // Control characters > + 0x00, 0x00, 0x00, 0x00, // Control characters > + 0xff, 0xff, 0xff, 0xff, // Punctuation > + 0x55, 0x55, 0xf5, 0xff, // Numbers->Lower, more Punctuation. > + 0xab, 0xaa, 0xaa, 0xaa, // @ and A-O > + 0xaa, 0xaa, 0xea, 0xff, // P-Z, more Punctuation. > + 0x57, 0x55, 0x55, 0x55, // ` and a-o > + 0x55, 0x55, 0xd5, 0x3f, // p-z, Punctuation, DEL. > + 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, // Bytes over 127 -> > Lower. > + 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, // (probably UTF-8). > + 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, > + 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, > +}; > + > +// Each character's Role is the Head or Tail of a segment, or a Separator. > +// e.g. XMLHttpRequest_Async > +// +--+---+------ +---- > +// ^Head ^Tail ^Separator > +enum FuzzyMatcher::CharRole : char { > + Unknown = 0, // Stray control characters or impossible states. > + Tail = 1, // Part of a word segment, but not the first character. > + Head = 2, // The first character of a word segment. > + Separator = 3, // Punctuation characters that separate word segments. > +}; > + > +// The Role can be determined from the Type of a character and its > neighbors: > +// > +// Example | Chars | Type | Role > +// ---------+--------------+----- > +// F(o)oBar | Foo | Ull | Tail > +// Foo(B)ar | oBa | lUl | Head > +// (f)oo | ^fo | Ell | Head > +// H(T)TP | HTT | UUU | Tail > +// > +// Our lookup table maps a 6 bit key (Prev, Curr, Next) to a 2-bit Role. > +// A byte packs 4 Roles. (Prev, Curr) selects a byte, Next selects the > offset. > +// e.g. Lower, Upper, Lower -> 01 10 01 -> byte 6 (aa), bits 3-2 (10) -> > Head. > +constexpr static uint8_t CharRoles[] = { > + // clang-format off > + // Curr= Empty Lower Upper Separ > + /* Prev=Empty */ 0x00, 0xaa, 0xaa, 0xff, // At start, > Lower|Upper->Head > + /* Prev=Lower */ 0x00, 0x55, 0xaa, 0xff, // In word, > Upper->Head;Lower->Tail > + /* Prev=Upper */ 0x00, 0x55, 0x59, 0xff, // Ditto, but U(U)U->Tail > + /* Prev=Separ */ 0x00, 0xaa, 0xaa, 0xff, // After separator, like at > start > + // clang-format on > +}; > + > +template <typename T> static T packedLookup(const uint8_t *Data, int I) { > + return static_cast<T>((Data[I >> 2] >> ((I & 3) * 2)) & 3); > +} > +void FuzzyMatcher::calculateRoles(const char *Text, CharRole *Out, int > N) { > + // Types holds a sliding window of (Prev, Curr, Next) types. > + // Initial value is (Empty, Empty, type of Text[0]). > + int Types = packedLookup<CharType>(CharTypes, Text[0]); > + // Rotate slides in the type of the next character. > + auto Rotate = [&](CharType T) { Types = ((Types << 2) | T) & 0x3f; }; > + for (int I = 0; I < N - 1; ++I) { > + // For each character, rotate in the next, and look up the role. > + Rotate(packedLookup<CharType>(CharTypes, Text[I + 1])); > + *Out++ = packedLookup<CharRole>(CharRoles, Types); > + } > + // For the last character, the "next character" is Empty. > + Rotate(Empty); > + *Out++ = packedLookup<CharRole>(CharRoles, Types); > +} > + > +// Sets up the data structures matching Word. > +// Returns false if we can cheaply determine that no match is possible. > +bool FuzzyMatcher::init(StringRef NewWord) { > + WordN = std::min<int>(MaxWord, NewWord.size()); > + if (PatN > WordN) > + return false; > + memcpy(Word, NewWord.data(), WordN); > + for (int I = 0; I < WordN; ++I) > + LowWord[I] = lower(Word[I]); > + > + // Cheap subsequence check. > + for (int W = 0, P = 0; P != PatN; ++W) { > + if (W == WordN) > + return false; > + if (LowWord[W] == LowPat[P]) > + ++P; > + } > + > + calculateRoles(Word, WordRole, WordN); > + return true; > +} > + > +// The forwards pass finds the mappings of Pattern onto Word. > +// Score = best score achieved matching Word[..W] against Pat[..P]. > +// Unlike other tables, indices range from 0 to N *inclusive* > +// Matched = whether we chose to match Word[W] with Pat[P] or not. > +// > +// Points are mostly assigned to matched characters, with 1 being a good > score > +// and 3 being a great one. So we treat the score range as [0, 3 * PatN]. > +// This range is not strict: we can apply larger bonuses/penalties, or > penalize > +// non-matched characters. > +void FuzzyMatcher::buildGraph() { > + for (int W = 0; W < WordN; ++W) { > + Scores[0][W + 1][Miss] = {Scores[0][W][Miss].Score - skipPenalty(W, > Miss), > + Miss}; > + Scores[0][W + 1][Match] = {AwfulScore, Miss}; > + } > + for (int P = 0; P < PatN; ++P) { > + for (int W = P; W < WordN; ++W) { > + auto &Score = Scores[P + 1][W + 1], &PreMiss = Scores[P + 1][W]; > + > + auto MatchMissScore = PreMiss[Match].Score; > + auto MissMissScore = PreMiss[Miss].Score; > + if (P < PatN - 1) { // Skipping trailing characters is always free. > + MatchMissScore -= skipPenalty(W, Match); > + MissMissScore -= skipPenalty(W, Miss); > + } > + Score[Miss] = (MatchMissScore > MissMissScore) > + ? ScoreInfo{MatchMissScore, Match} > + : ScoreInfo{MissMissScore, Miss}; > + > + if (LowPat[P] != LowWord[W]) { // No match possible. > + Score[Match] = {AwfulScore, Miss}; > + } else { > + auto &PreMatch = Scores[P][W]; > + auto MatchMatchScore = PreMatch[Match].Score + matchBonus(P, W, > Match); > + auto MissMatchScore = PreMatch[Miss].Score + matchBonus(P, W, > Miss); > + Score[Match] = (MatchMatchScore > MissMatchScore) > + ? ScoreInfo{MatchMatchScore, Match} > + : ScoreInfo{MissMatchScore, Miss}; > + } > + } > + } > +} > + > +int FuzzyMatcher::skipPenalty(int W, Action Last) { > + int S = 0; > + if (WordRole[W] == Head) // Skipping a segment. > + S += 1; > + if (Last == Match) // Non-consecutive match. > + S += 2; // We'd rather skip a segment than split our match. > + return S; > +} > + > +int FuzzyMatcher::matchBonus(int P, int W, Action Last) { > + assert(LowPat[P] == LowWord[W]); > + int S = 1; > + // Bonus: pattern so far is a (case-insensitive) prefix of the word. > + if (P == W) // We can't skip pattern characters, so we must have > matched all. > + ++S; > + // Bonus: case matches, or a Head in the pattern aligns with one in the > word. > + if ((Pat[P] == Word[W] && (CaseSensitive || P == W)) || > + (PatRole[P] == Head && WordRole[W] == Head)) > + ++S; > + // Penalty: matching inside a segment (and previous char wasn't > matched). > + if (WordRole[W] == Tail && P && Last == Miss) > + S -= 3; > + // Penalty: a Head in the pattern matches in the middle of a word > segment. > + if (PatRole[P] == Head && WordRole[W] == Tail) > + --S; > + // Penalty: matching the first pattern character in the middle of a > segment. > + if (P == 0 && WordRole[W] == Tail) > + S -= 4; > + assert(S <= PerfectBonus); > + return S; > +} > + > +llvm::SmallString<256> FuzzyMatcher::dumpLast(llvm::raw_ostream &OS) > const { > + llvm::SmallString<256> Result; > + OS << "=== Match \"" << StringRef(Word, WordN) << "\" against [" > + << StringRef(Pat, PatN) << "] ===\n"; > + if (!WordContainsPattern) { > + OS << "Substring check failed.\n"; > + return Result; > + } else if (isAwful(std::max(Scores[PatN][WordN][Match].Score, > + Scores[PatN][WordN][Miss].Score))) { > + OS << "Substring check passed, but all matches are forbidden\n"; > + } > + if (!CaseSensitive) > + OS << "Lowercase query, so scoring ignores case\n"; > + > + // Traverse Matched table backwards to reconstruct the Pattern/Word > mapping. > + // The Score table has cumulative scores, subtracting along this path > gives > + // us the per-letter scores. > + Action Last = > + (Scores[PatN][WordN][Match].Score > Scores[PatN][WordN][Miss]. > Score) > + ? Match > + : Miss; > + int S[MaxWord]; > + Action A[MaxWord]; > + for (int W = WordN - 1, P = PatN - 1; W >= 0; --W) { > + A[W] = Last; > + const auto &Cell = Scores[P + 1][W + 1][Last]; > + if (Last == Match) > + --P; > + const auto &Prev = Scores[P + 1][W][Cell.Prev]; > + S[W] = Cell.Score - Prev.Score; > + Last = Cell.Prev; > + } > + for (int I = 0; I < WordN; ++I) { > + if (A[I] == Match && (I == 0 || A[I - 1] == Miss)) > + Result.push_back('['); > + if (A[I] == Miss && I > 0 && A[I - 1] == Match) > + Result.push_back(']'); > + Result.push_back(Word[I]); > + } > + if (A[WordN - 1] == Match) > + Result.push_back(']'); > + > + for (char C : StringRef(Word, WordN)) > + OS << " " << C << " "; > + OS << "\n"; > + for (int I = 0, J = 0; I < WordN; I++) > + OS << " " << (A[I] == Match ? Pat[J++] : ' ') << " "; > + OS << "\n"; > + for (int I = 0; I < WordN; I++) > + OS << format("%2d ", S[I]); > + OS << "\n"; > + > + OS << "\nSegmentation:"; > + OS << "\n'" << StringRef(Word, WordN) << "'\n "; > + for (int I = 0; I < WordN; ++I) > + OS << "?-+ "[static_cast<int>(WordRole[I])]; > + OS << "\n[" << StringRef(Pat, PatN) << "]\n "; > + for (int I = 0; I < PatN; ++I) > + OS << "?-+ "[static_cast<int>(PatRole[I])]; > + OS << "\n"; > + > + OS << "\nScoring table (last-Miss, last-Match):\n"; > + OS << " | "; > + for (char C : StringRef(Word, WordN)) > + OS << " " << C << " "; > + OS << "\n"; > + OS << "-+----" << std::string(WordN * 4, '-') << "\n"; > + for (int I = 0; I <= PatN; ++I) { > + for (Action A : {Miss, Match}) { > + OS << ((I && A == Miss) ? Pat[I - 1] : ' ') << "|"; > + for (int J = 0; J <= WordN; ++J) { > + if (!isAwful(Scores[I][J][A].Score)) > + OS << format("%3d%c", Scores[I][J][A].Score, > + Scores[I][J][A].Prev == Match ? '*' : ' '); > + else > + OS << " "; > + } > + OS << "\n"; > + } > + } > + > + return Result; > +} > > Added: clang-tools-extra/trunk/clangd/FuzzyMatch.h > URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/ > trunk/clangd/FuzzyMatch.h?rev=319557&view=auto > ============================================================ > ================== > --- clang-tools-extra/trunk/clangd/FuzzyMatch.h (added) > +++ clang-tools-extra/trunk/clangd/FuzzyMatch.h Fri Dec 1 09:08:02 2017 > @@ -0,0 +1,84 @@ > +//===--- FuzzyMatch.h - Approximate identifier matching ---------*- > C++-*-===// > +// > +// The LLVM Compiler Infrastructure > +// > +// This file is distributed under the University of Illinois Open Source > +// License. See LICENSE.TXT for details. > +// > +//===------------------------------------------------------ > ----------------===// > +// > +// This file implements fuzzy-matching of strings against identifiers. > +// It indicates both the existence and quality of a match: > +// 'eb' matches both 'emplace_back' and 'embed', the former has a better > score. > +// > +//===------------------------------------------------------ > ----------------===// > + > +#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_FUZZYMATCH_H > +#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_FUZZYMATCH_H > + > +#include "llvm/ADT/Optional.h" > +#include "llvm/ADT/SmallString.h" > +#include "llvm/ADT/StringRef.h" > +#include "llvm/Support/raw_ostream.h" > + > +namespace clang { > +namespace clangd { > + > +// A matcher capable of matching and scoring strings against a single > pattern. > +// It's optimized for matching against many strings - match() does not > allocate. > +class FuzzyMatcher { > +public: > + // Characters beyond MaxPat are ignored. > + FuzzyMatcher(llvm::StringRef Pattern); > + > + // If Word matches the pattern, return a score in [0,1] (higher is > better). > + // Characters beyond MaxWord are ignored. > + llvm::Optional<float> match(llvm::StringRef Word); > + > + // Dump internal state from the last match() to the stream, for > debugging. > + // Returns the pattern with [] around matched characters, e.g. > + // [u_p] + "unique_ptr" --> "[u]nique[_p]tr" > + llvm::SmallString<256> dumpLast(llvm::raw_ostream &) const; > + > +private: > + // We truncate the pattern and the word to bound the cost of matching. > + constexpr static int MaxPat = 63, MaxWord = 127; > + enum CharRole : char; // For segmentation. > + enum CharType : char; // For segmentation. > + enum Action { Miss = 0, Match = 1 }; > + > + bool init(llvm::StringRef Word); > + void buildGraph(); > + void calculateRoles(const char *Text, CharRole *Out, int N); > + int skipPenalty(int W, Action Last); > + int matchBonus(int P, int W, Action Last); > + > + // Pattern data is initialized by the constructor, then constant. > + char Pat[MaxPat]; // Pattern data > + int PatN; // Length > + char LowPat[MaxPat]; // Pattern in lowercase > + CharRole PatRole[MaxPat]; // Pattern segmentation info > + bool CaseSensitive; // Case-sensitive match if pattern has > uppercase > + float ScoreScale; // Normalizes scores for the pattern length. > + > + // Word data is initialized on each call to match(), mostly by init(). > + char Word[MaxWord]; // Word data > + int WordN; // Length > + char LowWord[MaxWord]; // Word in lowercase > + CharRole WordRole[MaxWord]; // Word segmentation info > + bool WordContainsPattern; // Simple substring check > + > + // Cumulative best-match score table. > + // Boundary conditions are filled in by the constructor. > + // The rest is repopulated for each match(), by buildGraph(). > + struct ScoreInfo { > + signed int Score : 15; > + Action Prev : 1; > + }; > + ScoreInfo Scores[MaxPat + 1][MaxWord + 1][/* Last Action */ 2]; > +}; > + > +} // namespace clangd > +} // namespace clang > + > +#endif > > Modified: clang-tools-extra/trunk/unittests/clangd/CMakeLists.txt > URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/ > trunk/unittests/clangd/CMakeLists.txt?rev=319557&r1= > 319556&r2=319557&view=diff > ============================================================ > ================== > --- clang-tools-extra/trunk/unittests/clangd/CMakeLists.txt (original) > +++ clang-tools-extra/trunk/unittests/clangd/CMakeLists.txt Fri Dec 1 > 09:08:02 2017 > @@ -10,6 +10,7 @@ include_directories( > > add_extra_unittest(ClangdTests > ClangdTests.cpp > + FuzzyMatchTests.cpp > JSONExprTests.cpp > TraceTests.cpp > ) > > Added: clang-tools-extra/trunk/unittests/clangd/FuzzyMatchTests.cpp > URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/ > trunk/unittests/clangd/FuzzyMatchTests.cpp?rev=319557&view=auto > ============================================================ > ================== > --- clang-tools-extra/trunk/unittests/clangd/FuzzyMatchTests.cpp (added) > +++ clang-tools-extra/trunk/unittests/clangd/FuzzyMatchTests.cpp Fri Dec > 1 09:08:02 2017 > @@ -0,0 +1,252 @@ > +//===-- FuzzyMatchTests.cpp - String fuzzy matcher tests --------*- C++ > -*-===// > +// > +// The LLVM Compiler Infrastructure > +// > +// This file is distributed under the University of Illinois Open Source > +// License. See LICENSE.TXT for details. > +// > +//===------------------------------------------------------ > ----------------===// > + > +#include "FuzzyMatch.h" > + > +#include "llvm/ADT/StringExtras.h" > +#include "gmock/gmock.h" > +#include "gtest/gtest.h" > + > +namespace clang { > +namespace clangd { > +namespace { > +using namespace llvm; > +using testing::Not; > + > +struct ExpectedMatch { > + ExpectedMatch(StringRef Annotated) : Word(Annotated), > Annotated(Annotated) { > + for (char C : "[]") > + Word.erase(std::remove(Word.begin(), Word.end(), C), Word.end()); > + } > + std::string Word; > + StringRef Annotated; > +}; > +raw_ostream &operator<<(raw_ostream &OS, const ExpectedMatch &M) { > + return OS << "'" << M.Word << "' as " << M.Annotated; > +} > + > +struct MatchesMatcher : public testing::MatcherInterface<StringRef> { > + ExpectedMatch Candidate; > + MatchesMatcher(ExpectedMatch Candidate) : Candidate(std::move(Candidate)) > {} > + > + void DescribeTo(::std::ostream *OS) const override { > + raw_os_ostream(*OS) << "Matches " << Candidate; > + } > + > + bool MatchAndExplain(StringRef Pattern, > + testing::MatchResultListener *L) const override { > + std::unique_ptr<raw_ostream> OS( > + L->stream() ? (raw_ostream *)(new raw_os_ostream(*L->stream())) > + : new raw_null_ostream()); > + FuzzyMatcher Matcher(Pattern); > + auto Result = Matcher.match(Candidate.Word); > + auto AnnotatedMatch = Matcher.dumpLast(*OS << "\n"); > + return Result && AnnotatedMatch == Candidate.Annotated; > + } > +}; > + > +// Accepts patterns that match a given word. > +// Dumps the debug tables on match failure. > +testing::Matcher<StringRef> matches(StringRef M) { > + return testing::MakeMatcher<StringRef>(new MatchesMatcher(M)); > +} > + > +TEST(FuzzyMatch, Matches) { > + EXPECT_THAT("u_p", matches("[u]nique[_p]tr")); > + EXPECT_THAT("up", matches("[u]nique_[p]tr")); > + EXPECT_THAT("uq", matches("[u]ni[q]ue_ptr")); > + EXPECT_THAT("qp", Not(matches("unique_ptr"))); > + EXPECT_THAT("log", Not(matches("SVGFEMorphologyElement"))); > + > + EXPECT_THAT("tit", matches("win.[tit]")); > + EXPECT_THAT("title", matches("win.[title]")); > + EXPECT_THAT("WordCla", matches("[Word]Character[Cla]ssifier")); > + EXPECT_THAT("WordCCla", matches("[WordC]haracter[Cla]ssifier")); > + > + EXPECT_THAT("dete", Not(matches("editor.quickSuggestionsDelay"))); > + > + EXPECT_THAT("highlight", matches("editorHover[Highlight]")); > + EXPECT_THAT("hhighlight", matches("editor[H]over[Highlight]")); > + EXPECT_THAT("dhhighlight", Not(matches("editorHoverHighlight"))); > + > + EXPECT_THAT("-moz", matches("[-moz]-foo")); > + EXPECT_THAT("moz", matches("-[moz]-foo")); > + EXPECT_THAT("moza", matches("-[moz]-[a]nimation")); > + > + EXPECT_THAT("ab", matches("[ab]A")); > + EXPECT_THAT("ccm", matches("[c]a[cm]elCase")); > + EXPECT_THAT("bti", Not(matches("the_black_knight"))); > + EXPECT_THAT("ccm", Not(matches("camelCase"))); > + EXPECT_THAT("cmcm", Not(matches("camelCase"))); > + EXPECT_THAT("BK", matches("the_[b]lack_[k]night")); > + EXPECT_THAT("KeyboardLayout=", Not(matches("KeyboardLayout"))); > + EXPECT_THAT("LLL", matches("SVisual[L]ogger[L]ogs[L]ist")); > + EXPECT_THAT("LLLL", Not(matches("SVilLoLosLi"))); > + EXPECT_THAT("LLLL", Not(matches("SVisualLoggerLogsList"))); > + EXPECT_THAT("TEdit", matches("[T]ext[Edit]")); > + EXPECT_THAT("TEdit", matches("[T]ext[Edit]or")); > + EXPECT_THAT("TEdit", matches("[Te]xte[dit]")); > + EXPECT_THAT("TEdit", matches("[t]ext_[edit]")); > + EXPECT_THAT("TEditDit", matches("[T]ext[Edit]or[D]ecorat[i]on[T]ype")); > + EXPECT_THAT("TEdit", matches("[T]ext[Edit]orDecorationType")); > + EXPECT_THAT("Tedit", matches("[T]ext[Edit]")); > + EXPECT_THAT("ba", Not(matches("?AB?"))); > + EXPECT_THAT("bkn", matches("the_[b]lack_[kn]ight")); > + EXPECT_THAT("bt", matches("the_[b]lack_knigh[t]")); > + EXPECT_THAT("ccm", matches("[c]amelCase[cm]")); > + EXPECT_THAT("fdm", matches("[f]in[dM]odel")); > + EXPECT_THAT("fob", matches("[fo]o[b]ar")); > + EXPECT_THAT("fobz", Not(matches("foobar"))); > + EXPECT_THAT("foobar", matches("[foobar]")); > + EXPECT_THAT("form", matches("editor.[form]atOnSave")); > + EXPECT_THAT("g p", matches("[G]it:[ P]ull")); > + EXPECT_THAT("g p", matches("[G]it:[ P]ull")); > + EXPECT_THAT("gip", matches("[Gi]t: [P]ull")); > + EXPECT_THAT("gip", matches("[Gi]t: [P]ull")); > + EXPECT_THAT("gp", matches("[G]it: [P]ull")); > + EXPECT_THAT("gp", matches("[G]it_Git_[P]ull")); > + EXPECT_THAT("is", matches("[I]mport[S]tatement")); > + EXPECT_THAT("is", matches("[is]Valid")); > + EXPECT_THAT("lowrd", matches("[low]Wo[rd]")); > + EXPECT_THAT("myvable", matches("[myva]ria[ble]")); > + EXPECT_THAT("no", Not(matches(""))); > + EXPECT_THAT("no", Not(matches("match"))); > + EXPECT_THAT("ob", Not(matches("foobar"))); > + EXPECT_THAT("sl", matches("[S]Visual[L]oggerLogsList")); > + EXPECT_THAT("sllll", matches("[S]Visua[lL]ogger[L]ogs[L]ist")); > + EXPECT_THAT("Three", matches("H[T]ML[HRE]l[e]ment")); > + EXPECT_THAT("Three", matches("[Three]")); > + EXPECT_THAT("fo", Not(matches("barfoo"))); > + EXPECT_THAT("fo", matches("bar_[fo]o")); > + EXPECT_THAT("fo", matches("bar_[Fo]o")); > + EXPECT_THAT("fo", matches("bar [fo]o")); > + EXPECT_THAT("fo", matches("bar.[fo]o")); > + EXPECT_THAT("fo", matches("bar/[fo]o")); > + EXPECT_THAT("fo", matches("bar\\[fo]o")); > + > + EXPECT_THAT( > + "aaaaaa", > + matches("[aaaaaa]aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa > aaaaaaaaaaaaaaaaaaaaaaaaa" > + "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa > aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")); > + EXPECT_THAT("baba", Not(matches("ababababab"))); > + EXPECT_THAT("fsfsfs", Not(matches("dsafdsafdsafdsafdsafdsafdsafas > dfdsa"))); > + EXPECT_THAT("fsfsfsfsfsfsfsf", > + Not(matches("dsafdsafdsafdsafdsafdsafdsafas > dfdsafdsafdsafdsafdsfd" > + "safdsfdfdfasdnfdsajfndsjnafjndsajlknfdsa"))); > + > + EXPECT_THAT(" g", matches("[ g]roup")); > + EXPECT_THAT("g", matches(" [g]roup")); > + EXPECT_THAT("g g", Not(matches(" groupGroup"))); > + EXPECT_THAT("g g", matches(" [g]roup[ G]roup")); > + EXPECT_THAT(" g g", matches("[ ] [g]roup[ G]roup")); > + EXPECT_THAT("zz", matches("[zz]Group")); > + EXPECT_THAT("zzg", matches("[zzG]roup")); > + EXPECT_THAT("g", matches("zz[G]roup")); > + > + EXPECT_THAT("aaaa", matches("_a_[aaaa]")); // Prefer consecutive. > + EXPECT_THAT("printf", matches("s[printf]")); > + EXPECT_THAT("str", matches("o[str]eam")); > +} > + > +struct RankMatcher : public testing::MatcherInterface<StringRef> { > + std::vector<ExpectedMatch> RankedStrings; > + RankMatcher(std::initializer_list<ExpectedMatch> RankedStrings) > + : RankedStrings(RankedStrings) {} > + > + void DescribeTo(::std::ostream *OS) const override { > + raw_os_ostream O(*OS); > + O << "Ranks strings in order: ["; > + for (const auto &Str : RankedStrings) > + O << "\n\t" << Str; > + O << "\n]"; > + } > + > + bool MatchAndExplain(StringRef Pattern, > + testing::MatchResultListener *L) const override { > + std::unique_ptr<raw_ostream> OS( > + L->stream() ? (raw_ostream *)(new raw_os_ostream(*L->stream())) > + : new raw_null_ostream()); > + FuzzyMatcher Matcher(Pattern); > + const ExpectedMatch *LastMatch; > + Optional<float> LastScore; > + bool Ok = true; > + for (const auto &Str : RankedStrings) { > + auto Score = Matcher.match(Str.Word); > + if (!Score) { > + *OS << "\nDoesn't match '" << Str.Word << "'"; > + Matcher.dumpLast(*OS << "\n"); > + Ok = false; > + } else { > + std::string Buf; > + llvm::raw_string_ostream Info(Buf); > + auto AnnotatedMatch = Matcher.dumpLast(Info); > + > + if (AnnotatedMatch != Str.Annotated) { > + *OS << "\nMatched " << Str.Word << " as " << AnnotatedMatch > + << " instead of " << Str.Annotated << "\n" > + << Info.str(); > + Ok = false; > + } else if (LastScore && *LastScore < *Score) { > + *OS << "\nRanks '" << Str.Word << "'=" << *Score << " above '" > + << LastMatch->Word << "'=" << *LastScore << "\n" > + << Info.str(); > + Matcher.match(LastMatch->Word); > + Matcher.dumpLast(*OS << "\n"); > + Ok = false; > + } > + } > + LastMatch = &Str; > + LastScore = Score; > + } > + return Ok; > + } > +}; > + > +// Accepts patterns that match all the strings and rank them in the given > order. > +// Dumps the debug tables on match failure. > +template <typename... T> testing::Matcher<StringRef> ranks(T... > RankedStrings) { > + return testing::MakeMatcher<StringRef>( > + new RankMatcher{ExpectedMatch(RankedStrings)...}); > +} > + > +TEST(FuzzyMatch, Ranking) { > + EXPECT_THAT("eb", ranks("[e]mplace_[b]ack", "[e]m[b]ed")); > + EXPECT_THAT("cons", > + ranks("[cons]ole", "[Cons]ole", > "ArrayBuffer[Cons]tructor")); > + EXPECT_THAT("foo", ranks("[foo]", "[Foo]")); > + EXPECT_THAT("onMess", > + ranks("[onMess]age", "[onmess]age", > "[on]This[M]ega[Es]cape[s]")); > + EXPECT_THAT("CC", ranks("[C]amel[C]ase", "[c]amel[C]ase")); > + EXPECT_THAT("cC", ranks("[c]amel[C]ase", "[C]amel[C]ase")); > + EXPECT_THAT("p", ranks("[p]arse", "[p]osix", "[p]afdsa", "[p]ath", > "[p]")); > + EXPECT_THAT("pa", ranks("[pa]rse", "[pa]th", "[pa]fdsa")); > + EXPECT_THAT("log", ranks("[log]", "Scroll[Log]icalPosition")); > + EXPECT_THAT("e", ranks("[e]lse", "Abstract[E]lement")); > + EXPECT_THAT("workbench.sideb", > + ranks("[workbench.sideB]ar.location", > + "[workbench.]editor.default[SideB]ySideLayout")); > + EXPECT_THAT("editor.r", ranks("[editor.r]enderControlCharacter", > + "[editor.]overview[R]ulerlanes", > + "diff[Editor.r]enderSideBySide")); > + EXPECT_THAT("-mo", ranks("[-mo]z-columns", "[-]ms-ime-[mo]de")); > + EXPECT_THAT("convertModelPosition", > + ranks("[convertModelPosition]ToViewPosition", > + "[convert]ViewTo[ModelPosition]")); > + EXPECT_THAT("is", ranks("[is]ValidViewletId", "[i]mport [s]tatement")); > + EXPECT_THAT("title", ranks("window.[title]", > + "files.[t]r[i]m[T]rai[l]ingWhit[e]space")); > + EXPECT_THAT("strcpy", ranks("[strcpy]", "[strcpy]_s", "[str]n[cpy]")); > + EXPECT_THAT("close", ranks("workbench.quickOpen.[close]OnFocusOut", > + "[c]ss.[l]int.imp[o]rt[S]tat[e]ment", > + "[c]ss.co[lo]rDecorator[s].[e]nable")); > +} > + > +} // namespace > +} // namespace clangd > +} // namespace clang > > > _______________________________________________ > cfe-commits mailing list > cfe-commits@lists.llvm.org > http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits >
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits