sammccall created this revision.
sammccall added reviewers: kadircet, hokein.
Herald added a project: All.
sammccall requested review of this revision.
Herald added a project: clang-tools-extra.
Herald added a subscriber: cfe-commits.

The occurrences are roots for finding used headers, like walkAST.
Includes are the targets we're matching used headers against.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D136723

Files:
  clang-tools-extra/include-cleaner/include/clang-include-cleaner/Record.h
  clang-tools-extra/include-cleaner/include/clang-include-cleaner/Types.h
  clang-tools-extra/include-cleaner/lib/Record.cpp
  clang-tools-extra/include-cleaner/lib/Types.cpp
  clang-tools-extra/include-cleaner/unittests/RecordTest.cpp

Index: clang-tools-extra/include-cleaner/unittests/RecordTest.cpp
===================================================================
--- clang-tools-extra/include-cleaner/unittests/RecordTest.cpp
+++ clang-tools-extra/include-cleaner/unittests/RecordTest.cpp
@@ -8,13 +8,19 @@
 
 #include "clang-include-cleaner/Record.h"
 #include "clang/Frontend/FrontendAction.h"
+#include "clang/Frontend/FrontendActions.h"
 #include "clang/Testing/TestAST.h"
+#include "clang/Tooling/Inclusions/StandardLibrary.h"
+#include "llvm/Support/VirtualFileSystem.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Testing/Support/Annotations.h"
 #include "gmock/gmock.h"
 #include "gtest/gtest.h"
 
 namespace clang::include_cleaner {
 namespace {
+using testing::ElementsAre;
+using testing::ElementsAreArray;
 
 // Matches a Decl* if it is a NamedDecl with the given name.
 MATCHER_P(Named, N, "") {
@@ -88,5 +94,145 @@
   EXPECT_THAT(Recorded.Roots, testing::ElementsAre(Named("x")));
 }
 
+class RecordPPTest : public ::testing::Test {
+protected:
+  TestInputs Inputs;
+  RecordedPP Recorded;
+
+  RecordPPTest() {
+    struct RecordAction : public PreprocessOnlyAction {
+      RecordedPP &Out;
+      RecordAction(RecordedPP &Out) : Out(Out) {}
+
+      void ExecuteAction() override {
+        auto &PP = getCompilerInstance().getPreprocessor();
+        PP.addPPCallbacks(Out.record(PP));
+        PreprocessOnlyAction::ExecuteAction();
+      }
+    };
+    Inputs.MakeAction = [this] {
+      return std::make_unique<RecordAction>(Recorded);
+    };
+  }
+
+  TestAST build() { return TestAST(Inputs); }
+};
+
+// Matches an Include with a particular spelling.
+MATCHER_P(spelled, S, "") { return arg.Spelled == S; }
+
+TEST_F(RecordPPTest, CapturesIncludes) {
+  llvm::Annotations MainFile(R"cpp(
+    $H^#include "./header.h"
+    $M^#include "missing.h"
+  )cpp");
+  Inputs.Code = MainFile.code();
+  Inputs.ExtraFiles["header.h"] = "";
+  Inputs.ErrorOK = true; // missing header
+  auto AST = build();
+
+  ASSERT_THAT(
+      Recorded.Includes.all(),
+      testing::ElementsAre(spelled("./header.h"), spelled("missing.h")));
+
+  auto &H = Recorded.Includes.all().front();
+  EXPECT_EQ(H.Line, 2u);
+  EXPECT_EQ(H.Location,
+            AST.sourceManager().getComposedLoc(
+                AST.sourceManager().getMainFileID(), MainFile.point("H")));
+  EXPECT_EQ(H.Resolved, AST.fileManager().getFile("header.h").get());
+
+  auto &M = Recorded.Includes.all().back();
+  EXPECT_EQ(M.Line, 3u);
+  EXPECT_EQ(M.Location,
+            AST.sourceManager().getComposedLoc(
+                AST.sourceManager().getMainFileID(), MainFile.point("M")));
+  EXPECT_EQ(M.Resolved, nullptr);
+}
+
+TEST_F(RecordPPTest, CapturesMacroRefs) {
+  llvm::Annotations Header(R"cpp(
+    #define $def^X 1
+
+    // Refs, but not in main file.
+    #define Y X
+    int one = x;
+  )cpp");
+  llvm::Annotations MainFile(R"cpp(
+    #define EARLY X // not a ref, no definition
+    #include "header.h"
+    #define LATE ^X
+    #define LATE2 ^X // a ref even if not expanded
+
+    int one = ^X;
+    int uno = $exp^LATE; // a ref in LATE's expansion
+
+    #define IDENT(X) X // not a ref, shadowed
+    int eins = IDENT(^X);
+
+    #undef ^X
+    // Not refs, rather a new macro with the same name.
+    #define X 2
+    int two = X;
+  )cpp");
+  Inputs.Code = MainFile.code();
+  Inputs.ExtraFiles["header.h"] = Header.code();
+  Inputs.ErrorOK = true; // missing header
+  auto AST = build();
+  const auto &SM = AST.sourceManager();
+
+  SourceLocation Def = SM.getComposedLoc(
+      SM.translateFile(AST.fileManager().getFile("header.h").get()),
+      Header.point("def"));
+  ASSERT_NE(Recorded.MacroReferences.size(), 0u);
+  Symbol OrigX = Recorded.MacroReferences.front().Symbol;
+  EXPECT_EQ("X", OrigX.macro().Name->getName());
+  EXPECT_EQ(Def, OrigX.macro().Definition);
+
+  std::vector<unsigned> RefOffsets;
+  std::vector<unsigned> ExpOffsets; // Expansion locs of refs in macro locs.
+  std::vector<SourceLocation> RefMacroLocs;
+  for (const auto &Ref : Recorded.MacroReferences) {
+    if (Ref.Symbol == OrigX) {
+      auto [FID, Off] = SM.getDecomposedLoc(Ref.RefLocation);
+      if (FID == SM.getMainFileID()) {
+        RefOffsets.push_back(Off);
+      } else if (Ref.RefLocation.isMacroID() &&
+                 SM.isWrittenInMainFile(SM.getExpansionLoc(Ref.RefLocation))) {
+        ExpOffsets.push_back(
+            SM.getDecomposedExpansionLoc(Ref.RefLocation).second);
+      } else {
+        ADD_FAILURE() << Ref.RefLocation.printToString(SM);
+      }
+    }
+  }
+  EXPECT_THAT(RefOffsets, ElementsAreArray(MainFile.points()));
+  EXPECT_THAT(ExpOffsets, ElementsAreArray(MainFile.points("exp")));
+}
+
+// Matches an Include* on the specified line;
+MATCHER_P(line, N, "") { return arg->Line == (unsigned)N; }
+
+TEST(RecordedIncludesTest, Match) {
+  // We're using synthetic data, but need a FileManager to obtain FileEntry*s.
+  // Ensure it doesn't do any actual IO.
+  auto FS = llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
+  FileManager FM(FileSystemOptions{});
+  const FileEntry *A = FM.getVirtualFile("/path/a", /*Size=*/0, time_t{});
+  const FileEntry *B = FM.getVirtualFile("/path/b", /*Size=*/0, time_t{});
+
+  RecordedPP::RecordedIncludes Includes;
+  Includes.add(Include{"a", A, SourceLocation(), 1});
+  Includes.add(Include{"a2", A, SourceLocation(), 2});
+  Includes.add(Include{"b", B, SourceLocation(), 3});
+  Includes.add(Include{"vector", B, SourceLocation(), 4});
+  Includes.add(Include{"missing", nullptr, SourceLocation(), 5});
+
+  EXPECT_THAT(Includes.match(A), ElementsAre(line(1), line(2)));
+  EXPECT_THAT(Includes.match(B), ElementsAre(line(3), line(4)));
+  EXPECT_THAT(Includes.match(*tooling::stdlib::Header::named("<vector>")),
+              ElementsAre(line(4)));
+}
+
 } // namespace
 } // namespace clang::include_cleaner
Index: clang-tools-extra/include-cleaner/lib/Types.cpp
===================================================================
--- clang-tools-extra/include-cleaner/lib/Types.cpp
+++ clang-tools-extra/include-cleaner/lib/Types.cpp
@@ -19,6 +19,8 @@
     if (const auto *ND = llvm::dyn_cast<NamedDecl>(&S.declaration()))
       return OS << ND->getNameAsString();
     return OS << S.declaration().getDeclKindName();
+  case Symbol::Macro:
+    return OS << S.macro().Name;
   case Symbol::Standard:
     return OS << S.standard().scope() << S.standard().name();
   }
@@ -35,4 +37,9 @@
   llvm_unreachable("Unhandled Header kind");
 }
 
+llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const Include &I) {
+  return OS << I.Line << ": " << I.Spelled << " => "
+            << (I.Resolved ? I.Resolved->getName() : "<missing>");
+}
+
 } // namespace clang::include_cleaner
Index: clang-tools-extra/include-cleaner/lib/Record.cpp
===================================================================
--- clang-tools-extra/include-cleaner/lib/Record.cpp
+++ clang-tools-extra/include-cleaner/lib/Record.cpp
@@ -11,9 +11,93 @@
 #include "clang/AST/ASTContext.h"
 #include "clang/AST/DeclGroup.h"
 #include "clang/Basic/SourceManager.h"
+#include "clang/Lex/MacroInfo.h"
+#include "clang/Lex/PPCallbacks.h"
+#include "clang/Lex/Preprocessor.h"
 
 namespace clang::include_cleaner {
 
+class PPRecorder : public PPCallbacks {
+public:
+  PPRecorder(RecordedPP &Recorded, const Preprocessor &PP)
+      : Recorded(Recorded), PP(PP), SM(PP.getSourceManager()) {}
+
+  virtual void FileChanged(SourceLocation Loc, FileChangeReason Reason,
+                           SrcMgr::CharacteristicKind FileType,
+                           FileID PrevFID) override {
+    Active = SM.isWrittenInMainFile(Loc);
+  }
+
+  void InclusionDirective(SourceLocation Hash, const Token &IncludeTok,
+                          StringRef SpelledFilename, bool IsAngled,
+                          CharSourceRange FilenameRange,
+                          llvm::Optional<FileEntryRef> File,
+                          StringRef SearchPath, StringRef RelativePath,
+                          const Module *, SrcMgr::CharacteristicKind) override {
+    if (!Active)
+      return;
+
+    Include I;
+    I.Location = Hash;
+    I.Resolved = File ? &File->getFileEntry() : nullptr;
+    I.Line = SM.getSpellingLineNumber(Hash);
+    I.Spelled = SpelledFilename;
+    Recorded.Includes.add(I);
+  }
+
+  void MacroExpands(const Token &MacroName, const MacroDefinition &MD,
+                    SourceRange Range, const MacroArgs *Args) override {
+    if (!Active)
+      return;
+    recordMacroRef(MacroName, *MD.getMacroInfo());
+  }
+
+  void MacroDefined(const Token &MacroName, const MacroDirective *MD) override {
+    if (!Active)
+      return;
+
+    const auto *MI = MD->getMacroInfo();
+    // The tokens of a macro definition could refer to a macro.
+    // Formally this reference isn't resolved until this macro is expanded,
+    // but we want to treat it as a reference anyway.
+    for (const auto &Tok : MI->tokens()) {
+      auto *II = Tok.getIdentifierInfo();
+      // Could this token be a reference to a macro? (Not param to this macro).
+      if (!II || !II->hadMacroDefinition() ||
+          llvm::is_contained(MI->params(), II))
+        continue;
+      if (const MacroInfo *MI = PP.getMacroInfo(II))
+        recordMacroRef(Tok, *MI);
+    }
+  }
+
+  void MacroUndefined(const Token &MacroName, const MacroDefinition &MD,
+                      const MacroDirective *) override {
+    if (!Active)
+      return;
+    if (const auto *MI = MD.getMacroInfo())
+      recordMacroRef(MacroName, *MI);
+  }
+
+private:
+  void recordMacroRef(const Token &Tok, const MacroInfo &MI) {
+    if (MI.isBuiltinMacro())
+      return; // __FILE__ is not a reference.
+    Recorded.MacroReferences.push_back(
+        SymbolReference{Macro{Tok.getIdentifierInfo(), MI.getDefinitionLoc()},
+                        Tok.getLocation()});
+  }
+
+  bool Active = false;
+  RecordedPP &Recorded;
+  const Preprocessor &PP;
+  const SourceManager &SM;
+};
+
+std::unique_ptr<PPCallbacks> RecordedPP::record(const Preprocessor &PP) {
+  return std::make_unique<PPRecorder>(*this, PP);
+}
+
 std::unique_ptr<ASTConsumer> RecordedAST::record() {
   class Recorder : public ASTConsumer {
     RecordedAST *Out;
@@ -36,4 +120,31 @@
   return std::make_unique<Recorder>(this);
 }
 
+void RecordedPP::RecordedIncludes::add(const Include &I) {
+  unsigned Index = All.size();
+  All.push_back(I);
+  auto BySpellingIt = BySpelling.try_emplace(I.Spelled).first;
+  All.back().Spelled = BySpellingIt->first(); // Now we own the backing string.
+
+  BySpellingIt->second.push_back(Index);
+  if (I.Resolved)
+    ByFile[I.Resolved].push_back(Index);
+}
+
+llvm::SmallVector<const Include *>
+RecordedPP::RecordedIncludes::match(Header H) const {
+  llvm::SmallVector<const Include *> Result;
+  switch (H.kind()) {
+  case Header::Physical:
+    for (unsigned I : ByFile.lookup(H.physical()))
+      Result.push_back(&All[I]);
+    break;
+  case Header::Standard:
+    for (unsigned I : BySpelling.lookup(H.standard().name().trim("<>")))
+      Result.push_back(&All[I]);
+    break;
+  }
+  return Result;
+}
+
 } // namespace clang::include_cleaner
Index: clang-tools-extra/include-cleaner/include/clang-include-cleaner/Types.h
===================================================================
--- clang-tools-extra/include-cleaner/include/clang-include-cleaner/Types.h
+++ clang-tools-extra/include-cleaner/include/clang-include-cleaner/Types.h
@@ -19,9 +19,10 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef CLANG_INCLUDE_CLEANER_RECORD_H
-#define CLANG_INCLUDE_CLEANER_RECORD_H
+#ifndef CLANG_INCLUDE_CLEANER_TYPES_H
+#define CLANG_INCLUDE_CLEANER_TYPES_H
 
+#include "clang/Basic/SourceLocation.h"
 #include "clang/Tooling/Inclusions/StandardLibrary.h"
 #include <memory>
 #include <vector>
@@ -32,34 +33,57 @@
 namespace clang {
 class Decl;
 class FileEntry;
+class IdentifierInfo;
 namespace include_cleaner {
 
+/// We consider a macro to be a different symbol each time it is defined.
+struct Macro {
+  IdentifierInfo *Name;
+  // The location of the Name where the macro is defined.
+  SourceLocation Definition;
+
+  bool operator==(const Macro &S) const {
+    return Name == S.Name && Definition == S.Definition;
+  }
+};
+
 /// An entity that can be referenced in the code.
 struct Symbol {
   enum Kind {
     // A canonical clang declaration.
     Declaration,
+    // A preprocessor macro, as defined in a specific location.
+    Macro,
     // A recognized symbol from the standard library, like std::string.
     Standard,
   };
 
   Symbol(Decl &D) : Storage(&D) {}
+  Symbol(struct Macro M) : Storage(M) {}
   Symbol(tooling::stdlib::Symbol S) : Storage(S) {}
 
   Kind kind() const { return static_cast<Kind>(Storage.index()); }
   bool operator==(const Symbol &RHS) const { return Storage == RHS.Storage; }
 
+  Decl &declaration() const { return *std::get<Declaration>(Storage); }
+  struct Macro macro() const { return std::get<struct Macro>(Storage); }
   tooling::stdlib::Symbol standard() const {
     return std::get<Standard>(Storage);
   }
-  Decl &declaration() const { return *std::get<Declaration>(Storage); }
 
 private:
-  // FIXME: Add support for macros.
-  std::variant<Decl *, tooling::stdlib::Symbol> Storage;
+  std::variant<Decl *, struct Macro, tooling::stdlib::Symbol> Storage;
 };
 llvm::raw_ostream &operator<<(llvm::raw_ostream &, const Symbol &);
 
+// Indicates that a piece of code refers to a symbol.
+struct SymbolReference {
+  // The symbol referred to.
+  Symbol Symbol;
+  // The point in the code that refers to the symbol.
+  SourceLocation RefLocation;
+};
+
 /// Represents a file that provides some symbol. Might not be includeable, e.g.
 /// built-in or main-file itself.
 struct Header {
@@ -87,8 +111,16 @@
 };
 llvm::raw_ostream &operator<<(llvm::raw_ostream &, const Header &);
 
+// A single #include directive written in the main file.
+struct Include {
+  llvm::StringRef Spelled;             // e.g. vector
+  const FileEntry *Resolved = nullptr; // e.g. /path/to/c++/v1/vector
+  SourceLocation Location;             // of hash in #include <vector>
+  unsigned Line = 0;                   // 1-based line number for #include
+};
+llvm::raw_ostream &operator<<(llvm::raw_ostream &, const Include &);
+
 } // namespace include_cleaner
 } // namespace clang
 
 #endif
-
Index: clang-tools-extra/include-cleaner/include/clang-include-cleaner/Record.h
===================================================================
--- clang-tools-extra/include-cleaner/include/clang-include-cleaner/Record.h
+++ clang-tools-extra/include-cleaner/include/clang-include-cleaner/Record.h
@@ -17,6 +17,10 @@
 #ifndef CLANG_INCLUDE_CLEANER_RECORD_H
 #define CLANG_INCLUDE_CLEANER_RECORD_H
 
+#include "clang-include-cleaner/Types.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/StringMap.h"
 #include <memory>
 #include <vector>
 
@@ -24,9 +28,12 @@
 class ASTConsumer;
 class ASTContext;
 class Decl;
+class FileEntry;
+class Preprocessor;
+class PPCallbacks;
 namespace include_cleaner {
 
-// Contains recorded parser events relevant to include-cleaner.
+// Recorded main-file parser events relevant to include-cleaner.
 struct RecordedAST {
   // The consumer (when installed into clang) tracks declarations in this.
   std::unique_ptr<ASTConsumer> record();
@@ -39,6 +46,41 @@
   std::vector<Decl *> Roots;
 };
 
+// Recorded main-file preprocessor events relevant to include-cleaner.
+//
+// This doesn't include facts that we record globally for the whole TU, even
+// when they occur in the main file (e.g. IWYU pragmas).
+struct RecordedPP {
+  // The callback (when installed into clang) tracks macros/includes in this.
+  std::unique_ptr<PPCallbacks> record(const Preprocessor &PP);
+
+  // Describes where macros were used in the main file.
+  std::vector<SymbolReference> MacroReferences;
+
+  // A container for all includes present in the main file.
+  // Supports efficiently hit-testing Headers against Includes.
+  // FIXME: is there a more natural header for this class?
+  class RecordedIncludes {
+  public:
+    void add(const Include &);
+
+    // All #includes seen, in the order they appear.
+    llvm::ArrayRef<Include> all() const { return All; }
+
+    // Determine #includes that match a header (that provides a used symbol).
+    //
+    // Matching is based on the type of Header specified:
+    //  - for a physical file like /path/to/foo.h, we check Resolved
+    //  - for a logical file like <vector>, we check Spelled
+    llvm::SmallVector<const Include *> match(Header H) const;
+
+  private:
+    std::vector<Include> All;
+    llvm::StringMap<llvm::SmallVector<unsigned>> BySpelling;
+    llvm::DenseMap<const FileEntry *, llvm::SmallVector<unsigned>> ByFile;
+  } Includes;
+};
+
 } // namespace include_cleaner
 } // namespace clang
 
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to