[PATCH] D75479: [clangd] go-to-def on names in comments etc that are used nearby.

Sam McCall via Phabricator via cfe-commits Mon, 02 Mar 2020 13:46:07 -0800

sammccall created this revision.
sammccall added a reviewer: nridge.
Herald added subscribers: cfe-commits, usaxena95, kadircet, arphaman, jkorous, 
MaskRay, ilya-biryukov.
Herald added a project: clang.


This is intended as a companion to (and is inspired by) D72874 
<https://reviews.llvm.org/D72874> which attempts to
resolve these cases using the index.
The intent is we'd try this strategy after the AST-based approach but before the
index-based (I think local usages would be more reliable than index matches).


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D75479

Files:
  clang-tools-extra/clangd/XRefs.cpp
  clang-tools-extra/clangd/XRefs.h
  clang-tools-extra/clangd/unittests/XRefsTests.cpp

Index: clang-tools-extra/clangd/unittests/XRefsTests.cpp
===================================================================
--- clang-tools-extra/clangd/unittests/XRefsTests.cpp
+++ clang-tools-extra/clangd/unittests/XRefsTests.cpp
@@ -38,6 +38,7 @@
 namespace {
 
 using ::testing::ElementsAre;
+using ::testing::Eq;
 using ::testing::IsEmpty;
 using ::testing::Matcher;
 using ::testing::UnorderedElementsAreArray;
@@ -342,7 +343,7 @@
       R"cpp(// Symbol concatenated inside macro (not supported)
        int *pi;
        #define POINTER(X) p ## X;
-       int i = *POINTER(^i);
+       int x = *POINTER(^i);
       )cpp",
 
       R"cpp(// Forward class declaration
@@ -855,6 +856,88 @@
       ElementsAre(Sym("foo", FooWithoutHeader.range())));
 }
 
+TEST(LocateSymbol, NearbyTokenSmoke) {
+  auto T = Annotations(R"cpp(
+    // prints e^rr and crashes
+    void die(const char* [[err]]);
+  )cpp");
+  auto AST = TestTU::withCode(T.code()).build();
+  // We don't pass an index, so can't hit index-based fallback.
+  EXPECT_THAT(locateSymbolAt(AST, T.point()),
+              ElementsAre(Sym("err", T.range())));
+}
+
+TEST(LocateSymbol, NearbyIdentifier) {
+  const char *Tests[] = {
+    R"cpp(
+      // regular identifiers (won't trigger)
+      int hello;
+      int y = he^llo;
+    )cpp",
+    R"cpp(
+      // disabled preprocessor sections
+      int [[hello]];
+      #if 0
+      int y = ^hello;
+      #endif
+    )cpp",
+    R"cpp(
+      // comments
+      // he^llo, world
+      int [[hello]];
+    )cpp",
+    R"cpp(
+      // string literals
+      int [[hello]];
+      const char* greeting = "h^ello, world";
+    )cpp",
+
+    R"cpp(
+      // can refer to macro invocations (even if they expand to nothing)
+      #define INT int
+      [[INT]] x;
+      // I^NT
+    )cpp",
+
+    R"cpp(
+      // prefer nearest occurrence
+      int hello;
+      int x = hello;
+      // h^ello
+      int y = [[hello]];
+      int z = hello;
+    )cpp",
+
+    R"cpp(
+      // short identifiers find near results
+      int [[hi]];
+      // h^i
+    )cpp",
+    R"cpp(
+      // short identifiers don't find far results
+      int hi;
+
+
+
+      // h^i
+    )cpp",
+  };
+  for (const char* Test : Tests) {
+    Annotations T(Test);
+    auto AST = TestTU::withCode(T.code()).build();
+    const auto &SM = AST.getSourceManager();
+    llvm::Optional<Range> Nearby;
+    if (const auto*Tok = findNearbyIdentifier(
+        cantFail(sourceLocationInMainFile(SM, T.point())), AST.getTokens()))
+      Nearby = halfOpenToRange(SM, CharSourceRange::getCharRange(
+                                       Tok->location(), Tok->endLocation()));
+    if (T.ranges().empty())
+      EXPECT_THAT(Nearby, Eq(llvm::None)) << Test;
+    else
+      EXPECT_THAT(Nearby, T.range()) << Test;
+  }
+}
+
 TEST(FindReferences, WithinAST) {
   const char *Tests[] = {
       R"cpp(// Local variable
Index: clang-tools-extra/clangd/XRefs.h
===================================================================
--- clang-tools-extra/clangd/XRefs.h
+++ clang-tools-extra/clangd/XRefs.h
@@ -26,6 +26,10 @@
 #include <vector>
 
 namespace clang {
+namespace syntax {
+class Token;
+class TokenBuffer;
+} // namespace syntax
 namespace clangd {
 class ParsedAST;
 
@@ -49,6 +53,13 @@
 std::vector<LocatedSymbol> locateSymbolAt(ParsedAST &AST, Position Pos,
                                           const SymbolIndex *Index = nullptr);
 
+// If SpellingLoc points at a "word" that does not correspond to an expanded
+// token (e.g. in a comment, a string, or a PP disabled region), then try to
+// find a close occurrence of that word that does.
+// (This is for internal use by locateSymbolAt, and is exposed for testing).
+const syntax::Token *findNearbyIdentifier(SourceLocation SpellingLoc,
+                                          const syntax::TokenBuffer &TB);
+
 /// Get all document links
 std::vector<DocumentLink> getDocumentLinks(ParsedAST &AST);
 
Index: clang-tools-extra/clangd/XRefs.cpp
===================================================================
--- clang-tools-extra/clangd/XRefs.cpp
+++ clang-tools-extra/clangd/XRefs.cpp
@@ -28,6 +28,7 @@
 #include "clang/AST/DeclTemplate.h"
 #include "clang/AST/ExprCXX.h"
 #include "clang/AST/Type.h"
+#include "clang/Basic/CharInfo.h"
 #include "clang/Basic/LLVM.h"
 #include "clang/Basic/SourceLocation.h"
 #include "clang/Basic/SourceManager.h"
@@ -44,6 +45,7 @@
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/Error.h"
+#include "llvm/Support/MathExtras.h"
 #include "llvm/Support/Path.h"
 #include "llvm/Support/raw_ostream.h"
 
@@ -313,6 +315,100 @@
   return Result;
 }
 
+static bool tokenSpelledAt(SourceLocation SpellingLoc,
+                           const syntax::TokenBuffer &TB) {
+  auto ExpandedTokens = TB.expandedTokens(
+      TB.sourceManager().getMacroArgExpandedLocation(SpellingLoc));
+  return !ExpandedTokens.empty();
+}
+
+static llvm::StringRef wordTouching(llvm::StringRef Code, unsigned Offset) {
+  unsigned B = Offset, E = Offset;
+  while (B > 0 && isIdentifierBody(Code[B - 1]))
+    --B;
+  while (E < Code.size() && isIdentifierBody(Code[E]))
+    ++E;
+  return Code.slice(B, E);
+}
+
+const syntax::Token *findNearbyIdentifier(SourceLocation SpellingLoc,
+                                          const syntax::TokenBuffer &TB) {
+  const SourceManager &SM = TB.sourceManager();
+  auto Pos = SM.getDecomposedLoc(SpellingLoc);
+  llvm::StringRef Code = SM.getBufferData(Pos.first);
+  llvm::StringRef Word = wordTouching(Code, Pos.second);
+  if (Word.empty())
+    return nullptr;
+  unsigned WordOffset = Word.data() - Code.data();
+  SourceLocation WordStart = SM.getComposedLoc(Pos.first, WordOffset);
+  // If this is a real token that survived preprocessing, don't use heuristics.
+  auto WordExpandedTokens =
+      TB.expandedTokens(SM.getMacroArgExpandedLocation(WordStart));
+  if (!WordExpandedTokens.empty())
+    return nullptr;
+
+  // We prefer the closest possible token, line-wise. Backwards is penalized.
+  // Ties are implicitly broken by traversal order (first-one-wins).
+  unsigned WordLine = SM.getLineNumber(Pos.first, WordOffset);
+  auto Cost = [&](SourceLocation Loc) -> unsigned {
+    assert(SM.getFileID(Loc) == Pos.first && "spelled token in wrong file?");
+    unsigned Line = SM.getLineNumber(Pos.first, SM.getFileOffset(Loc));
+    if (Line > WordLine)
+      return 1 + llvm::Log2_64(Line - WordLine);
+    if (Line < WordLine)
+      return 2 + llvm::Log2_64(WordLine - Line);
+    return 0;
+  };
+  const syntax::Token *BestTok = nullptr;
+  // Search bounds are based on word length: 2^N lines forward.
+  unsigned BestCost = Word.size() + 1;
+
+  // Updates BestTok and BestCost if Tok is a good candidate.
+  // May return true if the cost is too high for this token.
+  auto Consider = [&](const syntax::Token &Tok) {
+    if(!(Tok.kind() == tok::identifier && Tok.text(SM) == Word))
+      return false;
+    // No point guessing the same location we started with.
+    if (Tok.location() == WordStart)
+      return false;
+    // We've done cheap checks, compute cost so we can break the caller's loop.
+    unsigned TokCost = Cost(Tok.location());
+    if (TokCost >= BestCost)
+      return true; // causes the outer loop to break.
+    // Allow locations that might be part of the AST, and macros (even if empty)
+    // but not things like disabled preprocessor sections.
+    if (!(tokenSpelledAt(Tok.location(), TB) || TB.expansionStartingAt(&Tok)))
+      return false;
+    // We already verified this token is an improvement.
+    BestCost = TokCost;
+    BestTok = &Tok;
+    return false;
+  };
+  auto SpelledTokens = TB.spelledTokens(Pos.first);
+  // Find where the word occurred in the token stream, to search forward & back.
+  auto *I = llvm::partition_point(SpelledTokens, [&](const syntax::Token &T) {
+    assert(SM.getFileID(T.location()) == SM.getFileID(WordStart));
+    return T.location() >= WordStart; // Comparison OK: same file.
+  });
+  // Search for matches after the cursor.
+  for (const syntax::Token &Tok : llvm::makeArrayRef(I, SpelledTokens.end()))
+    if (Consider(Tok))
+      break; // costs of later tokens are greater...
+  // Search for matches before the cursor.
+  for (const syntax::Token &Tok :
+       llvm::reverse(llvm::makeArrayRef(SpelledTokens.begin(), I)))
+    if (Consider(Tok))
+      break;
+
+  if (BestTok)
+    vlog(
+        "Word {0} under cursor {1} isn't a token (after PP), trying nearby {2}",
+        Word, WordStart.printToString(SM),
+        BestTok->location().printToString(SM));
+
+  return BestTok;
+}
+
 std::vector<LocatedSymbol> locateSymbolAt(ParsedAST &AST, Position Pos,
                                           const SymbolIndex *Index) {
   const auto &SM = AST.getSourceManager();
@@ -343,8 +439,22 @@
       // expansion.)
       return {*std::move(Macro)};
 
-  return locateASTReferent(*CurLoc, TouchedIdentifier, AST, *MainFilePath,
-                           Index);
+  auto ASTResults =
+      locateASTReferent(*CurLoc, TouchedIdentifier, AST, *MainFilePath, Index);
+  if (!ASTResults.empty())
+    return ASTResults;
+
+  if (const syntax::Token *NearbyIdent =
+          findNearbyIdentifier(*CurLoc, AST.getTokens())) {
+    if (auto Macro = locateMacroReferent(*NearbyIdent, AST, *MainFilePath))
+      return {*std::move(Macro)};
+    ASTResults = locateASTReferent(NearbyIdent->location(), NearbyIdent, AST,
+                                   *MainFilePath, Index);
+    if (!ASTResults.empty())
+      return ASTResults;
+  }
+
+  return {};
 }
 
 std::vector<DocumentLink> getDocumentLinks(ParsedAST &AST) {

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D75479: [clangd] go-to-def on names in comments etc that are used nearby.

Reply via email to