nridge updated this revision to Diff 248630.
nridge marked 16 inline comments as done.
nridge added a comment.
Rebase onto D75479 <https://reviews.llvm.org/D75479> and address most review
comments
Comments remaining to be addressed:
- revising the tests to exercise locateSymbolNamedTextuallyAt() directly
- comments related to fuzzy matching (I have an outstanding question about that)
Handling of dependent code has been deferred to a follow-up change
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D72874/new/
https://reviews.llvm.org/D72874
Files:
clang-tools-extra/clangd/FindSymbols.cpp
clang-tools-extra/clangd/FindSymbols.h
clang-tools-extra/clangd/XRefs.cpp
clang-tools-extra/clangd/XRefs.h
clang-tools-extra/clangd/unittests/XRefsTests.cpp
Index: clang-tools-extra/clangd/unittests/XRefsTests.cpp
===================================================================
--- clang-tools-extra/clangd/unittests/XRefsTests.cpp
+++ clang-tools-extra/clangd/unittests/XRefsTests.cpp
@@ -596,6 +596,44 @@
}
}
+TEST(LocateSymbol, Textual) {
+ const char *Tests[] = {
+ R"cpp(// Comment
+ struct [[MyClass]] {};
+ // Comment mentioning M^yClass
+ )cpp",
+ R"cpp(// String
+ struct [[MyClass]] {};
+ const char* s = "String literal mentioning M^yClass";
+ )cpp",
+ R"cpp(// Ifdef'ed out code
+ struct [[MyClass]] {};
+ #ifdef WALDO
+ M^yClass var;
+ #endif
+ )cpp"};
+
+ for (const char *Test : Tests) {
+ Annotations T(Test);
+ llvm::Optional<Range> WantDecl;
+ if (!T.ranges().empty())
+ WantDecl = T.range();
+
+ auto TU = TestTU::withCode(T.code());
+
+ auto AST = TU.build();
+ auto Index = TU.index();
+ auto Results = locateSymbolAt(AST, T.point(), Index.get());
+
+ if (!WantDecl) {
+ EXPECT_THAT(Results, IsEmpty()) << Test;
+ } else {
+ ASSERT_THAT(Results, ::testing::SizeIs(1)) << Test;
+ EXPECT_EQ(Results[0].PreferredDeclaration.range, *WantDecl) << Test;
+ }
+ }
+}
+
TEST(LocateSymbol, Ambiguous) {
auto T = Annotations(R"cpp(
struct Foo {
@@ -670,6 +708,26 @@
Sym("baz", T.range("StaticOverload2"))));
}
+TEST(LocateSymbol, TextualAmbiguous) {
+ auto T = Annotations(R"cpp(
+ struct Foo {
+ void $FooLoc[[uniqueMethodName]]();
+ };
+ struct Bar {
+ void $BarLoc[[uniqueMethodName]]();
+ };
+ // Will call u^niqueMethodName() on t.
+ template <typename T>
+ void f(T t);
+ )cpp");
+ auto TU = TestTU::withCode(T.code());
+ auto AST = TU.build();
+ auto Index = TU.index();
+ EXPECT_THAT(locateSymbolAt(AST, T.point(), Index.get()),
+ UnorderedElementsAre(Sym("uniqueMethodName", T.range("FooLoc")),
+ Sym("uniqueMethodName", T.range("BarLoc"))));
+}
+
TEST(LocateSymbol, TemplateTypedefs) {
auto T = Annotations(R"cpp(
template <class T> struct function {};
@@ -869,37 +927,37 @@
TEST(LocateSymbol, NearbyIdentifier) {
const char *Tests[] = {
- R"cpp(
+ R"cpp(
// regular identifiers (won't trigger)
int hello;
int y = he^llo;
)cpp",
- R"cpp(
+ R"cpp(
// disabled preprocessor sections
int [[hello]];
#if 0
int y = ^hello;
#endif
)cpp",
- R"cpp(
+ R"cpp(
// comments
// he^llo, world
int [[hello]];
)cpp",
- R"cpp(
+ R"cpp(
// string literals
int [[hello]];
const char* greeting = "h^ello, world";
)cpp",
- R"cpp(
+ R"cpp(
// can refer to macro invocations (even if they expand to nothing)
#define INT int
[[INT]] x;
// I^NT
)cpp",
- R"cpp(
+ R"cpp(
// prefer nearest occurrence
int hello;
int x = hello;
@@ -908,12 +966,12 @@
int z = hello;
)cpp",
- R"cpp(
+ R"cpp(
// short identifiers find near results
int [[hi]];
// h^i
)cpp",
- R"cpp(
+ R"cpp(
// short identifiers don't find far results
int hi;
@@ -922,13 +980,13 @@
// h^i
)cpp",
};
- for (const char* Test : Tests) {
+ for (const char *Test : Tests) {
Annotations T(Test);
auto AST = TestTU::withCode(T.code()).build();
const auto &SM = AST.getSourceManager();
llvm::Optional<Range> Nearby;
- if (const auto*Tok = findNearbyIdentifier(
- cantFail(sourceLocationInMainFile(SM, T.point())), AST.getTokens()))
+ if (const auto *Tok = findNearbyIdentifier(
+ cantFail(sourceLocationInMainFile(SM, T.point())), AST.getTokens()))
Nearby = halfOpenToRange(SM, CharSourceRange::getCharRange(
Tok->location(), Tok->endLocation()));
if (T.ranges().empty())
Index: clang-tools-extra/clangd/XRefs.h
===================================================================
--- clang-tools-extra/clangd/XRefs.h
+++ clang-tools-extra/clangd/XRefs.h
@@ -60,6 +60,21 @@
const syntax::Token *findNearbyIdentifier(SourceLocation SpellingLoc,
const syntax::TokenBuffer &TB);
+// Tries to provide a textual fallback for locating a symbol referenced at
+// a location, by looking up the word under the cursor as a symbol name in the
+// index. The aim is to pick up references to symbols in contexts where
+// AST-based resolution does not work, such as comments, strings, and PP
+// disabled regions. The implementation takes a number of measures to avoid
+// false positives, such as looking for some signal that the word at the
+// given location is likely to be an identifier. The function does not
+// currently return results for locations that end up as real expanded
+// tokens, although this may be relaxed for e.g. dependent code in the future.
+// (This is for internal use by locateSymbolAt, and is exposed for testing).
+std::vector<LocatedSymbol>
+locateSymbolNamedTextuallyAt(ParsedAST &AST, const SymbolIndex *Index,
+ SourceLocation Loc,
+ const std::string &MainFilePath);
+
/// Get all document links
std::vector<DocumentLink> getDocumentLinks(ParsedAST &AST);
Index: clang-tools-extra/clangd/XRefs.cpp
===================================================================
--- clang-tools-extra/clangd/XRefs.cpp
+++ clang-tools-extra/clangd/XRefs.cpp
@@ -10,9 +10,11 @@
#include "CodeCompletionStrings.h"
#include "FindSymbols.h"
#include "FindTarget.h"
+#include "FuzzyMatch.h"
#include "Logger.h"
#include "ParsedAST.h"
#include "Protocol.h"
+#include "Quality.h"
#include "Selection.h"
#include "SourceCode.h"
#include "URI.h"
@@ -49,6 +51,7 @@
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/raw_ostream.h"
+#include <cctype>
namespace clang {
namespace clangd {
@@ -369,7 +372,7 @@
// Updates BestTok and BestCost if Tok is a good candidate.
// May return true if the cost is too high for this token.
auto Consider = [&](const syntax::Token &Tok) {
- if(!(Tok.kind() == tok::identifier && Tok.text(SM) == Word))
+ if (!(Tok.kind() == tok::identifier && Tok.text(SM) == Word))
return false;
// No point guessing the same location we started with.
if (Tok.location() == WordStart)
@@ -412,6 +415,145 @@
return BestTok;
}
+static bool isLikelyToBeIdentifier(StringRef Word) {
+ // Word contains underscore.
+ // This handles things like snake_case and MACRO_CASE.
+ if (Word.contains('_')) {
+ return true;
+ }
+ // Word contains capital letter other than at beginning.
+ // This handles things like lowerCamel and UpperCamel.
+ // The check for also containing a lowercase letter is to rule out
+ // initialisms like "HTTP".
+ bool HasLower = Word.find_if(clang::isLowercase) != StringRef::npos;
+ bool HasUpper = Word.substr(1).find_if(clang::isUppercase) != StringRef::npos;
+ if (HasLower && HasUpper) {
+ return true;
+ }
+ // FIXME: There are other signals we could listen for.
+ // Some of these require inspecting the surroundings of the word as well.
+ // - mid-sentence Capitalization
+ // - markup like quotes / backticks / brackets / "\p"
+ // - word has a qualifier (foo::bar)
+ return false;
+}
+
+using ScoredLocatedSymbol = std::pair<float, LocatedSymbol>;
+struct ScoredSymbolGreater {
+ bool operator()(const ScoredLocatedSymbol &L,
+ const ScoredLocatedSymbol &R) const {
+ return L.first > R.first;
+ }
+};
+
+std::vector<LocatedSymbol>
+locateSymbolNamedTextuallyAt(ParsedAST &AST, const SymbolIndex *Index,
+ SourceLocation Loc,
+ const std::string &MainFilePath) {
+ const auto &SM = AST.getSourceManager();
+ FileID File;
+ unsigned Pos;
+ std::tie(File, Pos) = SM.getDecomposedLoc(Loc);
+ llvm::StringRef Code = SM.getBufferData(File);
+ auto QueryString = wordTouching(Code, Pos);
+ if (!isLikelyToBeIdentifier(QueryString)) {
+ return {};
+ }
+
+ // If this is a real token that survived preprocessing, don't
+ // use the textual heuristic. This is to avoid false positives
+ // when over tokens that happen to correspond to an identifier
+ // name elsewhere.
+ // FIXME: Relax this for dependent code.
+ unsigned WordOffset = QueryString.data() - Code.data();
+ SourceLocation WordStart = SM.getComposedLoc(File, WordOffset);
+ // If this is a real token that survived preprocessing, don't use heuristics.
+ auto WordExpandedTokens =
+ AST.getTokens().expandedTokens(SM.getMacroArgExpandedLocation(WordStart));
+ if (!WordExpandedTokens.empty())
+ return {};
+
+ FuzzyFindRequest Req;
+ Req.Query = QueryString.str();
+ Req.ProximityPaths = {MainFilePath};
+ Req.Scopes = visibleNamespaces(Code.take_front(Pos), AST.getLangOpts());
+ // FIXME: For extra strictness, consider AnyScope=false.
+ Req.AnyScope = true;
+ // We limit the results to 3 further below. This limit is to avoid fetching
+ // too much data, while still likely having enough for 3 results to remain
+ // after additional filtering.
+ Req.Limit = 10;
+ TopN<ScoredLocatedSymbol, ScoredSymbolGreater> Top(*Req.Limit);
+ FuzzyMatcher Filter(Req.Query);
+ Index->fuzzyFind(Req, [&](const Symbol &Sym) {
+ auto MaybeDeclLoc =
+ symbolLocationToLocation(Sym.CanonicalDeclaration, MainFilePath);
+ if (!MaybeDeclLoc) {
+ log("locateSymbolNamedTextuallyAt: {0}", MaybeDeclLoc.takeError());
+ return;
+ }
+ Location DeclLoc = *MaybeDeclLoc;
+ Location DefLoc;
+ if (Sym.Definition) {
+ auto MaybeDefLoc = symbolLocationToLocation(Sym.Definition, MainFilePath);
+ if (!MaybeDefLoc) {
+ log("locateSymbolNamedTextuallyAt: {0}", MaybeDefLoc.takeError());
+ return;
+ }
+ DefLoc = *MaybeDefLoc;
+ }
+ Location PreferredLoc = bool(Sym.Definition) ? DefLoc : DeclLoc;
+
+ // For now, only consider exact name matches, including case.
+ // This is to avoid too many false positives.
+ // We could relax this in the future if we make the query more accurate
+ // by other means.
+ if (Sym.Name != QueryString)
+ return;
+
+ // Exclude constructor results. They have the same name as the class,
+ // but we don't have enough context to prefer them over the class.
+ if (Sym.SymInfo.Kind == index::SymbolKind::Constructor)
+ return;
+
+ std::string Scope = std::string(Sym.Scope);
+ llvm::StringRef ScopeRef = Scope;
+ ScopeRef.consume_back("::");
+ LocatedSymbol Located;
+ Located.Name = (Sym.Name + Sym.TemplateSpecializationArgs).str();
+ Located.PreferredDeclaration = DeclLoc;
+ Located.Definition = DefLoc;
+
+ SymbolQualitySignals Quality;
+ Quality.merge(Sym);
+ SymbolRelevanceSignals Relevance;
+ Relevance.Name = Sym.Name;
+ Relevance.Query = SymbolRelevanceSignals::Generic;
+ if (auto NameMatch = Filter.match(Sym.Name))
+ Relevance.NameMatch = *NameMatch;
+ else
+ return;
+ Relevance.merge(Sym);
+ auto Score =
+ evaluateSymbolAndRelevance(Quality.evaluate(), Relevance.evaluate());
+ dlog("locateSymbolNamedTextuallyAt: {0}{1} = {2}\n{3}{4}\n", Sym.Scope,
+ Sym.Name, Score, Quality, Relevance);
+
+ Top.push({Score, std::move(Located)});
+ });
+ std::vector<LocatedSymbol> Result;
+ for (auto &Res : std::move(Top).items())
+ Result.push_back(std::move(Res.second));
+ // Assume we don't have results from the current file, otherwise the
+ // findNearbyIdentifier() mechanism would have handled them.
+ // If we have more than 3 results, and none from the current file, don't
+ // return anything, as confidence is too low.
+ // FIXME: Alternatively, try a stricter query?
+ if (Result.size() > 3)
+ return {};
+ return Result;
+}
+
std::vector<LocatedSymbol> locateSymbolAt(ParsedAST &AST, Position Pos,
const SymbolIndex *Index) {
const auto &SM = AST.getSourceManager();
@@ -457,7 +599,7 @@
return ASTResults;
}
- return {};
+ return locateSymbolNamedTextuallyAt(AST, Index, *CurLoc, *MainFilePath);
}
std::vector<DocumentLink> getDocumentLinks(ParsedAST &AST) {
Index: clang-tools-extra/clangd/FindSymbols.h
===================================================================
--- clang-tools-extra/clangd/FindSymbols.h
+++ clang-tools-extra/clangd/FindSymbols.h
@@ -21,6 +21,10 @@
class ParsedAST;
class SymbolIndex;
+/// Helper function for deriving an LSP Location from a SymbolLocation.
+llvm::Expected<Location> symbolLocationToLocation(const SymbolLocation &Loc,
+ llvm::StringRef HintPath);
+
/// Helper function for deriving an LSP Location for a Symbol.
llvm::Expected<Location> symbolToLocation(const Symbol &Sym,
llvm::StringRef HintPath);
Index: clang-tools-extra/clangd/FindSymbols.cpp
===================================================================
--- clang-tools-extra/clangd/FindSymbols.cpp
+++ clang-tools-extra/clangd/FindSymbols.cpp
@@ -18,6 +18,7 @@
#include "clang/Index/IndexDataConsumer.h"
#include "clang/Index/IndexSymbol.h"
#include "clang/Index/IndexingAction.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/ScopedPrinter.h"
@@ -39,15 +40,13 @@
} // namespace
-llvm::Expected<Location> symbolToLocation(const Symbol &Sym,
- llvm::StringRef HintPath) {
- // Prefer the definition over e.g. a function declaration in a header
- auto &CD = Sym.Definition ? Sym.Definition : Sym.CanonicalDeclaration;
- auto Path = URI::resolve(CD.FileURI, HintPath);
+llvm::Expected<Location> symbolLocationToLocation(const SymbolLocation &Loc,
+ llvm::StringRef HintPath) {
+ auto Path = URI::resolve(Loc.FileURI, HintPath);
if (!Path) {
return llvm::make_error<llvm::StringError>(
- formatv("Could not resolve path for symbol '{0}': {1}",
- Sym.Name, llvm::toString(Path.takeError())),
+ llvm::formatv("Could not resolve path for file '{0}': {1}", Loc.FileURI,
+ llvm::toString(Path.takeError())),
llvm::inconvertibleErrorCode());
}
Location L;
@@ -55,14 +54,21 @@
// request.
L.uri = URIForFile::canonicalize(*Path, HintPath);
Position Start, End;
- Start.line = CD.Start.line();
- Start.character = CD.Start.column();
- End.line = CD.End.line();
- End.character = CD.End.column();
+ Start.line = Loc.Start.line();
+ Start.character = Loc.Start.column();
+ End.line = Loc.End.line();
+ End.character = Loc.End.column();
L.range = {Start, End};
return L;
}
+llvm::Expected<Location> symbolToLocation(const Symbol &Sym,
+ llvm::StringRef HintPath) {
+ // Prefer the definition over e.g. a function declaration in a header
+ return symbolLocationToLocation(
+ Sym.Definition ? Sym.Definition : Sym.CanonicalDeclaration, HintPath);
+}
+
llvm::Expected<std::vector<SymbolInformation>>
getWorkspaceSymbols(llvm::StringRef Query, int Limit,
const SymbolIndex *const Index, llvm::StringRef HintPath) {
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits