usaxena95 updated this revision to Diff 262852. usaxena95 added a comment. - Added DerivedSignals struct containing all the derived signals. - Added NameMatchesContext and proximtiy signals to this struct. - We need to call computeDerivedSignals() before calling evaluate() if we set non-concrete utilites (e.g. ContextWords and Name). - This is logically equivalent to the previous version (both when the utilites are explicitly set and when the default signals are used). - The utilites are not marked as null in computeDerivedSignals. This is due to 2 reasons: - Current scoring function checks whether `ScopeProximityMatch` is set or not to decide whether to multiply with scopeProxitiyScore. Possible solutions: - Have scopeProxitiyScore as a derived signal itself. - Or have a different derived signal `HasScopeProximityMatch`. - Having these utilities available for debug purposes is great. We can try to compute other derived signals (e.g. ContextMatchesName) and test out it's value without even adding them concretely to clangd. Once their value is justified, we can add it to Quality/Relevance signals.
Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D79500/new/ https://reviews.llvm.org/D79500 Files: clang-tools-extra/clangd/CodeComplete.cpp clang-tools-extra/clangd/FindSymbols.cpp clang-tools-extra/clangd/Quality.cpp clang-tools-extra/clangd/Quality.h clang-tools-extra/clangd/Quality.h.rej clang-tools-extra/clangd/XRefs.cpp clang-tools-extra/clangd/index/dex/Dex.cpp clang-tools-extra/clangd/unittests/QualityTests.cpp
Index: clang-tools-extra/clangd/unittests/QualityTests.cpp =================================================================== --- clang-tools-extra/clangd/unittests/QualityTests.cpp +++ clang-tools-extra/clangd/unittests/QualityTests.cpp @@ -268,9 +268,11 @@ ProxSources.try_emplace(testPath("foo/baz.h")); URIDistance Distance(ProxSources); IndexProximate.FileProximityMatch = &Distance; + IndexProximate.calculateDerivedSignals(); EXPECT_GT(IndexProximate.evaluate(), Default.evaluate()); SymbolRelevanceSignals IndexDistant = IndexProximate; IndexDistant.SymbolURI = "unittest:/elsewhere/path.h"; + IndexDistant.calculateDerivedSignals(); EXPECT_GT(IndexProximate.evaluate(), IndexDistant.evaluate()) << IndexProximate << IndexDistant; EXPECT_GT(IndexDistant.evaluate(), Default.evaluate()); @@ -294,13 +296,17 @@ EXPECT_LT(InBaseClass.evaluate(), Default.evaluate()); llvm::StringSet<> Words = {"one", "two", "three"}; + SymbolRelevanceSignals WithoutMatchingWord; WithoutMatchingWord.ContextWords = &Words; WithoutMatchingWord.Name = "four"; + WithoutMatchingWord.calculateDerivedSignals(); EXPECT_EQ(WithoutMatchingWord.evaluate(), Default.evaluate()); + SymbolRelevanceSignals WithMatchingWord; WithMatchingWord.ContextWords = &Words; WithMatchingWord.Name = "TheTwoTowers"; + WithMatchingWord.calculateDerivedSignals(); EXPECT_GT(WithMatchingWord.evaluate(), Default.evaluate()); } @@ -310,25 +316,31 @@ Relevance.ScopeProximityMatch = &ScopeProximity; Relevance.SymbolScope = "other::"; + Relevance.calculateDerivedSignals(); float NotMatched = Relevance.evaluate(); Relevance.SymbolScope = ""; + Relevance.calculateDerivedSignals(); float Global = Relevance.evaluate(); EXPECT_GT(Global, NotMatched); Relevance.SymbolScope = "llvm::"; + Relevance.calculateDerivedSignals(); float NonParent = Relevance.evaluate(); EXPECT_GT(NonParent, Global); Relevance.SymbolScope = "x::"; + Relevance.calculateDerivedSignals(); float GrandParent = Relevance.evaluate(); EXPECT_GT(GrandParent, Global); Relevance.SymbolScope = "x::y::"; + Relevance.calculateDerivedSignals(); float Parent = Relevance.evaluate(); EXPECT_GT(Parent, GrandParent); Relevance.SymbolScope = "x::y::z::"; + Relevance.calculateDerivedSignals(); float Enclosing = Relevance.evaluate(); EXPECT_GT(Enclosing, Parent); } Index: clang-tools-extra/clangd/index/dex/Dex.cpp =================================================================== --- clang-tools-extra/clangd/index/dex/Dex.cpp +++ clang-tools-extra/clangd/index/dex/Dex.cpp @@ -126,7 +126,6 @@ // DistanceCalculator will find the shortest distance from ProximityPaths to // any URI extracted from the ProximityPaths. URIDistance DistanceCalculator(Sources); - PathProximitySignals.FileProximityMatch = &DistanceCalculator; // Try to build BOOST iterator for each Proximity Path provided by // ProximityPaths. Boosting factor should depend on the distance to the // Proximity Path: the closer processed path is, the higher boosting factor. @@ -134,7 +133,9 @@ // FIXME(kbobyrev): Append LIMIT on top of every BOOST iterator. auto It = iterator(Token(Token::Kind::ProximityURI, ParentURI)); if (It->kind() != Iterator::Kind::False) { + PathProximitySignals.FileProximityMatch = &DistanceCalculator; PathProximitySignals.SymbolURI = ParentURI; + PathProximitySignals.calculateDerivedSignals(); BoostingIterators.push_back( Corpus.boost(std::move(It), PathProximitySignals.evaluate())); } Index: clang-tools-extra/clangd/XRefs.cpp =================================================================== --- clang-tools-extra/clangd/XRefs.cpp +++ clang-tools-extra/clangd/XRefs.cpp @@ -427,7 +427,6 @@ SymbolQualitySignals Quality; Quality.merge(Sym); SymbolRelevanceSignals Relevance; - Relevance.Name = Sym.Name; Relevance.Query = SymbolRelevanceSignals::Generic; Relevance.merge(Sym); auto Score = Index: clang-tools-extra/clangd/Quality.h.rej =================================================================== --- /dev/null +++ clang-tools-extra/clangd/Quality.h.rej @@ -0,0 +1,12 @@ +--- third_party/llvm/llvm-project/clang-tools-extra/clangd/Quality.h ++++ third_party/llvm/llvm-project/clang-tools-extra/clangd/Quality.h +@@ -85,9 +85,6 @@ llvm::raw_ostream &operator<<(llvm::raw_ + + /// Attributes of a symbol-query pair that affect how much we like it. + struct SymbolRelevanceSignals { +- /// Whether Name contains some word in context. Must not be set explicitly. +- /// Computed by calcNameMatchesContext(). +- bool NameMatchesContext = false; + /// 0-1+ fuzzy-match score for unqualified name. Must be explicitly assigned. + float NameMatch = 1; + Index: clang-tools-extra/clangd/Quality.h =================================================================== --- clang-tools-extra/clangd/Quality.h +++ clang-tools-extra/clangd/Quality.h @@ -85,29 +85,19 @@ /// Attributes of a symbol-query pair that affect how much we like it. struct SymbolRelevanceSignals { - /// The name of the symbol (for ContextWords). Must be explicitly assigned. - llvm::StringRef Name; /// 0-1+ fuzzy-match score for unqualified name. Must be explicitly assigned. float NameMatch = 1; - /// Lowercase words relevant to the context (e.g. near the completion point). - llvm::StringSet<>* ContextWords = nullptr; + bool Forbidden = false; // Unavailable (e.g const) or inaccessible (private). /// Whether fixits needs to be applied for that completion or not. bool NeedsFixIts = false; bool InBaseClass = false; // A member from base class of the accessed class. - URIDistance *FileProximityMatch = nullptr; - /// These are used to calculate proximity between the index symbol and the - /// query. - llvm::StringRef SymbolURI; /// FIXME: unify with index proximity score - signals should be /// source-independent. /// Proximity between best declaration and the query. [0-1], 1 is closest. float SemaFileProximityScore = 0; - // Scope proximity is only considered (both index and sema) when this is set. - ScopeDistance *ScopeProximityMatch = nullptr; - llvm::Optional<llvm::StringRef> SymbolScope; // A symbol from sema should be accessible from the current scope. bool SemaSaysInScope = false; @@ -136,6 +126,34 @@ // Whether the item matches the type expected in the completion context. bool TypeMatchesPreferred = false; + // Properties and utilites used to compute derived signals. These are ignored + // by a scoring function. Must be explicitly assigned. + llvm::StringRef SymbolURI; + URIDistance *FileProximityMatch = nullptr; + /// These are used to calculate proximity between the index symbol and the + /// query. + llvm::Optional<llvm::StringRef> SymbolScope; + // Scope proximity is only considered (both index and sema) when this is set. + ScopeDistance *ScopeProximityMatch = nullptr; + /// The name of the symbol (for ContextWords). + llvm::StringRef Name; + /// Lowercase words relevant to the context (e.g. near the completion + /// point). + llvm::StringSet<> *ContextWords = nullptr; + + /// Set of derived signals computed by calculateDerivedSignals(). Must not be + /// set explicitly. + struct DerivedSignals { + /// Whether Name contains some word from context. + bool NameMatchesContext = false; + /// Min distance between SymbolURI and all the headers included by the TU. + unsigned FileProximityDistance = FileDistance::Unreachable; + /// Min distance between SymbolScope and all the available scopes. + unsigned ScopeProximityDistance = FileDistance::Unreachable; + } Derived; + + void calculateDerivedSignals(); + void merge(const CodeCompletionResult &SemaResult); void merge(const Symbol &IndexResult); Index: clang-tools-extra/clangd/Quality.cpp =================================================================== --- clang-tools-extra/clangd/Quality.cpp +++ clang-tools-extra/clangd/Quality.cpp @@ -320,23 +320,23 @@ NeedsFixIts = !SemaCCResult.FixIts.empty(); } -static std::pair<float, unsigned> uriProximity(llvm::StringRef SymbolURI, - URIDistance *D) { - if (!D || SymbolURI.empty()) - return {0.f, 0u}; - unsigned Distance = D->distance(SymbolURI); +static float fileProximityScore(unsigned FileDistance) { + // Range: [0, 1] + // FileDistance = [0, 1, 2, 3, 4, .., FileDistance::Unreachable] + // Score = [1, 0.82, 0.67, 0.55, 0.45, .., 0] + if (FileDistance == FileDistance::Unreachable) + return 0; // Assume approximately default options are used for sensible scoring. - return {std::exp(Distance * -0.4f / FileDistanceOptions().UpCost), Distance}; + return std::exp(FileDistance * -0.4f / FileDistanceOptions().UpCost); } -static float scopeBoost(ScopeDistance &Distance, - llvm::Optional<llvm::StringRef> SymbolScope) { - if (!SymbolScope) - return 1; - auto D = Distance.distance(*SymbolScope); - if (D == FileDistance::Unreachable) +static float scopeProximityScore(unsigned ScopeDistance) { + // Range: [0.6, 2]. + // ScopeDistance = [0, 1, 2, 3, 4, 5, 6, 7, .., FileDistance::Unreachable] + // Score = [2.0, 1.55, 1.2, 0.93, 0.72, 0.65, 0.65, 0.65, .., 0.6] + if (ScopeDistance == FileDistance::Unreachable) return 0.6f; - return std::max(0.65, 2.0 * std::pow(0.6, D / 2.0)); + return std::max(0.65, 2.0 * std::pow(0.6, ScopeDistance / 2.0)); } static llvm::Optional<llvm::StringRef> @@ -348,6 +348,18 @@ return llvm::None; } +void SymbolRelevanceSignals::calculateDerivedSignals() { + Derived.NameMatchesContext = wordMatching(Name, ContextWords).hasValue(); + Derived.FileProximityDistance = !FileProximityMatch || SymbolURI.empty() + ? FileDistance::Unreachable + : FileProximityMatch->distance(SymbolURI); + if (ScopeProximityMatch) { + // For global symbol, the distance is 0. + Derived.ScopeProximityDistance = + SymbolScope ? ScopeProximityMatch->distance(*SymbolScope) : 0; + } +} + float SymbolRelevanceSignals::evaluate() const { float Score = 1; @@ -358,7 +370,7 @@ // File proximity scores are [0,1] and we translate them into a multiplier in // the range from 1 to 3. - Score *= 1 + 2 * std::max(uriProximity(SymbolURI, FileProximityMatch).first, + Score *= 1 + 2 * std::max(fileProximityScore(Derived.FileProximityDistance), SemaFileProximityScore); if (ScopeProximityMatch) @@ -366,10 +378,11 @@ // can be tricky (e.g. class/function scope). Set to the max boost as we // don't load top-level symbols from the preamble and sema results are // always in the accessible scope. - Score *= - SemaSaysInScope ? 2.0 : scopeBoost(*ScopeProximityMatch, SymbolScope); + Score *= SemaSaysInScope + ? 2.0 + : scopeProximityScore(Derived.ScopeProximityDistance); - if (wordMatching(Name, ContextWords)) + if (Derived.NameMatchesContext) Score *= 1.5; // Symbols like local variables may only be referenced within their scope. @@ -446,16 +459,16 @@ S.SymbolScope ? *S.SymbolScope : "<None>"); if (S.FileProximityMatch) { - auto Score = uriProximity(S.SymbolURI, S.FileProximityMatch); - OS << llvm::formatv("\tIndex URI proximity: {0} (distance={1})\n", - Score.first, Score.second); + unsigned Score = fileProximityScore(S.Derived.FileProximityDistance); + OS << llvm::formatv("\tIndex URI proximity: {0} (distance={1})\n", Score, + S.Derived.FileProximityDistance); } OS << llvm::formatv("\tSema file proximity: {0}\n", S.SemaFileProximityScore); OS << llvm::formatv("\tSema says in scope: {0}\n", S.SemaSaysInScope); if (S.ScopeProximityMatch) OS << llvm::formatv("\tIndex scope boost: {0}\n", - scopeBoost(*S.ScopeProximityMatch, S.SymbolScope)); + scopeProximityScore(S.Derived.ScopeProximityDistance)); OS << llvm::formatv( "\tType matched preferred: {0} (Context type: {1}, Symbol type: {2}\n", Index: clang-tools-extra/clangd/FindSymbols.cpp =================================================================== --- clang-tools-extra/clangd/FindSymbols.cpp +++ clang-tools-extra/clangd/FindSymbols.cpp @@ -109,7 +109,6 @@ SymbolQualitySignals Quality; Quality.merge(Sym); SymbolRelevanceSignals Relevance; - Relevance.Name = Sym.Name; Relevance.Query = SymbolRelevanceSignals::Generic; if (auto NameMatch = Filter.match(Sym.Name)) Relevance.NameMatch = *NameMatch; Index: clang-tools-extra/clangd/CodeComplete.cpp =================================================================== --- clang-tools-extra/clangd/CodeComplete.cpp +++ clang-tools-extra/clangd/CodeComplete.cpp @@ -1612,13 +1612,14 @@ SymbolQualitySignals Quality; SymbolRelevanceSignals Relevance; Relevance.Context = CCContextKind; - Relevance.Name = Bundle.front().Name; Relevance.Query = SymbolRelevanceSignals::CodeComplete; Relevance.FileProximityMatch = FileProximity.getPointer(); if (ScopeProximity) Relevance.ScopeProximityMatch = ScopeProximity.getPointer(); if (PreferredType) Relevance.HadContextType = true; + + Relevance.Name = Bundle.front().Name; Relevance.ContextWords = &ContextWords; auto &First = Bundle.front(); @@ -1663,6 +1664,7 @@ CodeCompletion::Scores Scores; Scores.Quality = Quality.evaluate(); + Relevance.calculateDerivedSignals(); Scores.Relevance = Relevance.evaluate(); Scores.Total = evaluateSymbolAndRelevance(Scores.Quality, Scores.Relevance); // NameMatch is in fact a multiplier on total score, so rescoring is sound.
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits