sammccall created this revision.
sammccall added a reviewer: nridge.
Herald added subscribers: cfe-commits, usaxena95, kadircet, arphaman, jkorous,
MaskRay, ilya-biryukov.
Herald added a project: clang.
This is intended as a companion to (and is inspired by) D72874
<https://reviews.llvm.org/D72874> which attempts to
resolve these cases using the index.
The intent is we'd try this strategy after the AST-based approach but before the
index-based (I think local usages would be more reliable than index matches).
Repository:
rG LLVM Github Monorepo
https://reviews.llvm.org/D75479
Files:
clang-tools-extra/clangd/XRefs.cpp
clang-tools-extra/clangd/XRefs.h
clang-tools-extra/clangd/unittests/XRefsTests.cpp
Index: clang-tools-extra/clangd/unittests/XRefsTests.cpp
===================================================================
--- clang-tools-extra/clangd/unittests/XRefsTests.cpp
+++ clang-tools-extra/clangd/unittests/XRefsTests.cpp
@@ -38,6 +38,7 @@
namespace {
using ::testing::ElementsAre;
+using ::testing::Eq;
using ::testing::IsEmpty;
using ::testing::Matcher;
using ::testing::UnorderedElementsAreArray;
@@ -342,7 +343,7 @@
R"cpp(// Symbol concatenated inside macro (not supported)
int *pi;
#define POINTER(X) p ## X;
- int i = *POINTER(^i);
+ int x = *POINTER(^i);
)cpp",
R"cpp(// Forward class declaration
@@ -855,6 +856,88 @@
ElementsAre(Sym("foo", FooWithoutHeader.range())));
}
+TEST(LocateSymbol, NearbyTokenSmoke) {
+ auto T = Annotations(R"cpp(
+ // prints e^rr and crashes
+ void die(const char* [[err]]);
+ )cpp");
+ auto AST = TestTU::withCode(T.code()).build();
+ // We don't pass an index, so can't hit index-based fallback.
+ EXPECT_THAT(locateSymbolAt(AST, T.point()),
+ ElementsAre(Sym("err", T.range())));
+}
+
+TEST(LocateSymbol, NearbyIdentifier) {
+ const char *Tests[] = {
+ R"cpp(
+ // regular identifiers (won't trigger)
+ int hello;
+ int y = he^llo;
+ )cpp",
+ R"cpp(
+ // disabled preprocessor sections
+ int [[hello]];
+ #if 0
+ int y = ^hello;
+ #endif
+ )cpp",
+ R"cpp(
+ // comments
+ // he^llo, world
+ int [[hello]];
+ )cpp",
+ R"cpp(
+ // string literals
+ int [[hello]];
+ const char* greeting = "h^ello, world";
+ )cpp",
+
+ R"cpp(
+ // can refer to macro invocations (even if they expand to nothing)
+ #define INT int
+ [[INT]] x;
+ // I^NT
+ )cpp",
+
+ R"cpp(
+ // prefer nearest occurrence
+ int hello;
+ int x = hello;
+ // h^ello
+ int y = [[hello]];
+ int z = hello;
+ )cpp",
+
+ R"cpp(
+ // short identifiers find near results
+ int [[hi]];
+ // h^i
+ )cpp",
+ R"cpp(
+ // short identifiers don't find far results
+ int hi;
+
+
+
+ // h^i
+ )cpp",
+ };
+ for (const char* Test : Tests) {
+ Annotations T(Test);
+ auto AST = TestTU::withCode(T.code()).build();
+ const auto &SM = AST.getSourceManager();
+ llvm::Optional<Range> Nearby;
+ if (const auto*Tok = findNearbyIdentifier(
+ cantFail(sourceLocationInMainFile(SM, T.point())), AST.getTokens()))
+ Nearby = halfOpenToRange(SM, CharSourceRange::getCharRange(
+ Tok->location(), Tok->endLocation()));
+ if (T.ranges().empty())
+ EXPECT_THAT(Nearby, Eq(llvm::None)) << Test;
+ else
+ EXPECT_THAT(Nearby, T.range()) << Test;
+ }
+}
+
TEST(FindReferences, WithinAST) {
const char *Tests[] = {
R"cpp(// Local variable
Index: clang-tools-extra/clangd/XRefs.h
===================================================================
--- clang-tools-extra/clangd/XRefs.h
+++ clang-tools-extra/clangd/XRefs.h
@@ -26,6 +26,10 @@
#include <vector>
namespace clang {
+namespace syntax {
+class Token;
+class TokenBuffer;
+} // namespace syntax
namespace clangd {
class ParsedAST;
@@ -49,6 +53,13 @@
std::vector<LocatedSymbol> locateSymbolAt(ParsedAST &AST, Position Pos,
const SymbolIndex *Index = nullptr);
+// If SpellingLoc points at a "word" that does not correspond to an expanded
+// token (e.g. in a comment, a string, or a PP disabled region), then try to
+// find a close occurrence of that word that does.
+// (This is for internal use by locateSymbolAt, and is exposed for testing).
+const syntax::Token *findNearbyIdentifier(SourceLocation SpellingLoc,
+ const syntax::TokenBuffer &TB);
+
/// Get all document links
std::vector<DocumentLink> getDocumentLinks(ParsedAST &AST);
Index: clang-tools-extra/clangd/XRefs.cpp
===================================================================
--- clang-tools-extra/clangd/XRefs.cpp
+++ clang-tools-extra/clangd/XRefs.cpp
@@ -28,6 +28,7 @@
#include "clang/AST/DeclTemplate.h"
#include "clang/AST/ExprCXX.h"
#include "clang/AST/Type.h"
+#include "clang/Basic/CharInfo.h"
#include "clang/Basic/LLVM.h"
#include "clang/Basic/SourceLocation.h"
#include "clang/Basic/SourceManager.h"
@@ -44,6 +45,7 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/Error.h"
+#include "llvm/Support/MathExtras.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/raw_ostream.h"
@@ -313,6 +315,100 @@
return Result;
}
+static bool tokenSpelledAt(SourceLocation SpellingLoc,
+ const syntax::TokenBuffer &TB) {
+ auto ExpandedTokens = TB.expandedTokens(
+ TB.sourceManager().getMacroArgExpandedLocation(SpellingLoc));
+ return !ExpandedTokens.empty();
+}
+
+static llvm::StringRef wordTouching(llvm::StringRef Code, unsigned Offset) {
+ unsigned B = Offset, E = Offset;
+ while (B > 0 && isIdentifierBody(Code[B - 1]))
+ --B;
+ while (E < Code.size() && isIdentifierBody(Code[E]))
+ ++E;
+ return Code.slice(B, E);
+}
+
+const syntax::Token *findNearbyIdentifier(SourceLocation SpellingLoc,
+ const syntax::TokenBuffer &TB) {
+ const SourceManager &SM = TB.sourceManager();
+ auto Pos = SM.getDecomposedLoc(SpellingLoc);
+ llvm::StringRef Code = SM.getBufferData(Pos.first);
+ llvm::StringRef Word = wordTouching(Code, Pos.second);
+ if (Word.empty())
+ return nullptr;
+ unsigned WordOffset = Word.data() - Code.data();
+ SourceLocation WordStart = SM.getComposedLoc(Pos.first, WordOffset);
+ // If this is a real token that survived preprocessing, don't use heuristics.
+ auto WordExpandedTokens =
+ TB.expandedTokens(SM.getMacroArgExpandedLocation(WordStart));
+ if (!WordExpandedTokens.empty())
+ return nullptr;
+
+ // We prefer the closest possible token, line-wise. Backwards is penalized.
+ // Ties are implicitly broken by traversal order (first-one-wins).
+ unsigned WordLine = SM.getLineNumber(Pos.first, WordOffset);
+ auto Cost = [&](SourceLocation Loc) -> unsigned {
+ assert(SM.getFileID(Loc) == Pos.first && "spelled token in wrong file?");
+ unsigned Line = SM.getLineNumber(Pos.first, SM.getFileOffset(Loc));
+ if (Line > WordLine)
+ return 1 + llvm::Log2_64(Line - WordLine);
+ if (Line < WordLine)
+ return 2 + llvm::Log2_64(WordLine - Line);
+ return 0;
+ };
+ const syntax::Token *BestTok = nullptr;
+ // Search bounds are based on word length: 2^N lines forward.
+ unsigned BestCost = Word.size() + 1;
+
+ // Updates BestTok and BestCost if Tok is a good candidate.
+ // May return true if the cost is too high for this token.
+ auto Consider = [&](const syntax::Token &Tok) {
+ if(!(Tok.kind() == tok::identifier && Tok.text(SM) == Word))
+ return false;
+ // No point guessing the same location we started with.
+ if (Tok.location() == WordStart)
+ return false;
+ // We've done cheap checks, compute cost so we can break the caller's loop.
+ unsigned TokCost = Cost(Tok.location());
+ if (TokCost >= BestCost)
+ return true; // causes the outer loop to break.
+ // Allow locations that might be part of the AST, and macros (even if empty)
+ // but not things like disabled preprocessor sections.
+ if (!(tokenSpelledAt(Tok.location(), TB) || TB.expansionStartingAt(&Tok)))
+ return false;
+ // We already verified this token is an improvement.
+ BestCost = TokCost;
+ BestTok = &Tok;
+ return false;
+ };
+ auto SpelledTokens = TB.spelledTokens(Pos.first);
+ // Find where the word occurred in the token stream, to search forward & back.
+ auto *I = llvm::partition_point(SpelledTokens, [&](const syntax::Token &T) {
+ assert(SM.getFileID(T.location()) == SM.getFileID(WordStart));
+ return T.location() >= WordStart; // Comparison OK: same file.
+ });
+ // Search for matches after the cursor.
+ for (const syntax::Token &Tok : llvm::makeArrayRef(I, SpelledTokens.end()))
+ if (Consider(Tok))
+ break; // costs of later tokens are greater...
+ // Search for matches before the cursor.
+ for (const syntax::Token &Tok :
+ llvm::reverse(llvm::makeArrayRef(SpelledTokens.begin(), I)))
+ if (Consider(Tok))
+ break;
+
+ if (BestTok)
+ vlog(
+ "Word {0} under cursor {1} isn't a token (after PP), trying nearby {2}",
+ Word, WordStart.printToString(SM),
+ BestTok->location().printToString(SM));
+
+ return BestTok;
+}
+
std::vector<LocatedSymbol> locateSymbolAt(ParsedAST &AST, Position Pos,
const SymbolIndex *Index) {
const auto &SM = AST.getSourceManager();
@@ -343,8 +439,22 @@
// expansion.)
return {*std::move(Macro)};
- return locateASTReferent(*CurLoc, TouchedIdentifier, AST, *MainFilePath,
- Index);
+ auto ASTResults =
+ locateASTReferent(*CurLoc, TouchedIdentifier, AST, *MainFilePath, Index);
+ if (!ASTResults.empty())
+ return ASTResults;
+
+ if (const syntax::Token *NearbyIdent =
+ findNearbyIdentifier(*CurLoc, AST.getTokens())) {
+ if (auto Macro = locateMacroReferent(*NearbyIdent, AST, *MainFilePath))
+ return {*std::move(Macro)};
+ ASTResults = locateASTReferent(NearbyIdent->location(), NearbyIdent, AST,
+ *MainFilePath, Index);
+ if (!ASTResults.empty())
+ return ASTResults;
+ }
+
+ return {};
}
std::vector<DocumentLink> getDocumentLinks(ParsedAST &AST) {
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits