arphaman created this revision.
arphaman added reviewers: Bigcheese, aganea.
Herald added subscribers: tschuett, dexonsmith, jkorous, mgorny.
Herald added a project: clang.

This patch implements the fast dependency scanning mode in `clang-scan-deps`: 
the preprocessing is done on files that are minimized using the dependency 
directives source minimizer.

A shared file system cache is used to ensure that the file system requests and 
source minimization is performed only once. The cache assumes that the 
underlying filesystem won't change during the course of the scan (or if it 
will, it will not affect the output), and it can't be evicted. This means that 
the service and workers can be used for a single run of a dependency scanner, 
and can't be reused across multiple, incremental runs. This is something that 
we'll most likely support in the future though. Note that the driver still 
utilizes the underlying real filesystem.

This patch is also still missing the fast skipped PP block skipping 
optimization that I mentioned at EuroLLVM talk.


Repository:
  rC Clang

https://reviews.llvm.org/D63907

Files:
  clang/include/clang/Basic/FileManager.h
  clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h
  clang/include/clang/Tooling/DependencyScanning/DependencyScanningService.h
  clang/include/clang/Tooling/DependencyScanning/DependencyScanningWorker.h
  clang/lib/Tooling/DependencyScanning/CMakeLists.txt
  clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp
  clang/lib/Tooling/DependencyScanning/DependencyScanningService.cpp
  clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp
  
clang/test/ClangScanDeps/Inputs/frameworks/Framework.framework/Headers/Framework.h
  
clang/test/ClangScanDeps/Inputs/frameworks/Framework.framework/PrivateHeaders/PrivateHeader.h
  clang/test/ClangScanDeps/Inputs/header_stat_before_open_cdb.json
  clang/test/ClangScanDeps/Inputs/vfsoverlay.yaml
  clang/test/ClangScanDeps/Inputs/vfsoverlay_cdb.json
  clang/test/ClangScanDeps/header_stat_before_open.m
  clang/test/ClangScanDeps/regular_cdb.cpp
  clang/test/ClangScanDeps/vfsoverlay.cpp
  clang/tools/clang-scan-deps/ClangScanDeps.cpp

Index: clang/tools/clang-scan-deps/ClangScanDeps.cpp
===================================================================
--- clang/tools/clang-scan-deps/ClangScanDeps.cpp
+++ clang/tools/clang-scan-deps/ClangScanDeps.cpp
@@ -8,6 +8,7 @@
 
 #include "clang/Frontend/CompilerInstance.h"
 #include "clang/Tooling/CommonOptionsParser.h"
+#include "clang/Tooling/DependencyScanning/DependencyScanningService.h"
 #include "clang/Tooling/DependencyScanning/DependencyScanningWorker.h"
 #include "clang/Tooling/JSONCompilationDatabase.h"
 #include "llvm/Support/InitLLVM.h"
@@ -45,9 +46,10 @@
   ///
   /// \param Compilations     The reference to the compilation database that's
   /// used by the clang tool.
-  DependencyScanningTool(const tooling::CompilationDatabase &Compilations,
+  DependencyScanningTool(DependencyScanningService &Service,
+                         const tooling::CompilationDatabase &Compilations,
                          SharedStream &OS, SharedStream &Errs)
-      : Compilations(Compilations), OS(OS), Errs(Errs) {}
+      : Worker(Service), Compilations(Compilations), OS(OS), Errs(Errs) {}
 
   /// Computes the dependencies for the given file and prints them out.
   ///
@@ -80,6 +82,20 @@
 
 llvm::cl::OptionCategory DependencyScannerCategory("Tool options");
 
+static llvm::cl::opt<ScanningMode> ScanMode(
+    "mode",
+    llvm::cl::desc("The preprocessing mode used to compute the dependencies"),
+    llvm::cl::values(
+        clEnumValN(ScanningMode::MinimizedSourcePreprocessing,
+                   "preprocess-minimized-sources",
+                   "The set of dependencies is computed by preprocessing the "
+                   "source files that were minimized to only include the "
+                   "contents that might affect the dependencies"),
+        clEnumValN(ScanningMode::CanonicalPreprocessing, "preprocess",
+                   "The set of dependencies is computed by preprocessing the "
+                   "unmodified source files")),
+    llvm::cl::init(ScanningMode::MinimizedSourcePreprocessing));
+
 llvm::cl::opt<unsigned>
     NumThreads("j", llvm::cl::Optional,
                llvm::cl::desc("Number of worker threads to use (default: use "
@@ -135,12 +151,14 @@
   SharedStream Errs(llvm::errs());
   // Print out the dependency results to STDOUT by default.
   SharedStream DependencyOS(llvm::outs());
+
+  DependencyScanningService Service(ScanMode);
   unsigned NumWorkers =
       NumThreads == 0 ? llvm::hardware_concurrency() : NumThreads;
   std::vector<std::unique_ptr<DependencyScanningTool>> WorkerTools;
   for (unsigned I = 0; I < NumWorkers; ++I)
     WorkerTools.push_back(llvm::make_unique<DependencyScanningTool>(
-        *AdjustingCompilations, DependencyOS, Errs));
+        Service, *AdjustingCompilations, DependencyOS, Errs));
 
   std::vector<std::thread> WorkerThreads;
   std::atomic<bool> HadErrors(false);
Index: clang/test/ClangScanDeps/vfsoverlay.cpp
===================================================================
--- /dev/null
+++ clang/test/ClangScanDeps/vfsoverlay.cpp
@@ -0,0 +1,17 @@
+// RUN: rm -rf %t.dir
+// RUN: rm -rf %t.cdb
+// RUN: mkdir -p %t.dir
+// RUN: cp %s %t.dir/vfsoverlay.cpp
+// RUN: sed -e "s|DIR|%/t.dir|g" %S/Inputs/vfsoverlay.yaml > %t.dir/vfsoverlay.yaml
+// RUN: mkdir %t.dir/Inputs
+// RUN: cp %S/Inputs/header.h %t.dir/Inputs/header.h
+// RUN: sed -e "s|DIR|%/t.dir|g" %S/Inputs/vfsoverlay_cdb.json > %t.cdb
+//
+// RUN: clang-scan-deps -compilation-database %t.cdb -j 1 | \
+// RUN:   FileCheck %s
+
+#include "not_real.h"
+
+// CHECK: clang-scan-deps dependency
+// CHECK-NEXT: vfsoverlay.cpp
+// CHECK-NEXT: Inputs{{/|\\}}header.h
Index: clang/test/ClangScanDeps/regular_cdb.cpp
===================================================================
--- clang/test/ClangScanDeps/regular_cdb.cpp
+++ clang/test/ClangScanDeps/regular_cdb.cpp
@@ -8,7 +8,9 @@
 // RUN: cp %S/Inputs/header2.h %t.dir/Inputs/header2.h
 // RUN: sed -e "s|DIR|%/t.dir|g" %S/Inputs/regular_cdb.json > %t.cdb
 //
-// RUN: clang-scan-deps -compilation-database %t.cdb -j 1 | \
+// RUN: clang-scan-deps -compilation-database %t.cdb -j 1 -mode preprocess-minimized-sources | \
+// RUN:   FileCheck --check-prefixes=CHECK1,CHECK2,CHECK2NO %s
+// RUN: clang-scan-deps -compilation-database %t.cdb -j 1 -mode preprocess | \
 // RUN:   FileCheck --check-prefixes=CHECK1,CHECK2,CHECK2NO %s
 //
 // Make sure we didn't produce any dependency files!
@@ -20,9 +22,13 @@
 // as it might fail if the results for `regular_cdb.cpp` are reported before
 // `regular_cdb2.cpp`.
 //
-// RUN: clang-scan-deps -compilation-database %t.cdb -j 2 | \
+// RUN: clang-scan-deps -compilation-database %t.cdb -j 2 -mode preprocess-minimized-sources | \
+// RUN:   FileCheck --check-prefix=CHECK1 %s
+// RUN: clang-scan-deps -compilation-database %t.cdb -j 2 -mode preprocess | \
 // RUN:   FileCheck --check-prefix=CHECK1 %s
-// RUN: clang-scan-deps -compilation-database %t.cdb -j 2 | \
+// RUN: clang-scan-deps -compilation-database %t.cdb -j 2 -mode preprocess-minimized-sources | \
+// RUN:   FileCheck --check-prefix=CHECK2 %s
+// RUN: clang-scan-deps -compilation-database %t.cdb -j 2 -mode preprocess | \
 // RUN:   FileCheck --check-prefix=CHECK2 %s
 
 #include "header.h"
Index: clang/test/ClangScanDeps/header_stat_before_open.m
===================================================================
--- /dev/null
+++ clang/test/ClangScanDeps/header_stat_before_open.m
@@ -0,0 +1,18 @@
+// RUN: rm -rf %t.dir
+// RUN: rm -rf %t.cdb
+// RUN: mkdir -p %t.dir
+// RUN: cp %s %t.dir/header_stat_before_open.m
+// RUN: mkdir %t.dir/Inputs
+// RUN: cp -R %S/Inputs/frameworks %t.dir/Inputs/frameworks
+// RUN: sed -e "s|DIR|%/t.dir|g" %S/Inputs/header_stat_before_open_cdb.json > %t.cdb
+//
+// RUN: clang-scan-deps -compilation-database %t.cdb -j 1 | \
+// RUN:   FileCheck %s
+
+#include "Framework/Framework.h"
+#include "Framework/PrivateHeader.h"
+
+// CHECK: clang-scan-deps dependency
+// CHECK-NEXT: header_stat_before_open.m
+// CHECK-NEXT: Inputs/frameworks/Framework.framework/Headers/Framework.h
+// CHECK-NEXT: Inputs/frameworks/Framework.framework/PrivateHeaders/PrivateHeader.h
Index: clang/test/ClangScanDeps/Inputs/vfsoverlay_cdb.json
===================================================================
--- /dev/null
+++ clang/test/ClangScanDeps/Inputs/vfsoverlay_cdb.json
@@ -0,0 +1,7 @@
+[
+{
+  "directory": "DIR",
+  "command": "clang -E DIR/vfsoverlay.cpp -IInputs -ivfsoverlay DIR/vfsoverlay.yaml",
+  "file": "DIR/vfsoverlay.cpp"
+}
+]
Index: clang/test/ClangScanDeps/Inputs/vfsoverlay.yaml
===================================================================
--- /dev/null
+++ clang/test/ClangScanDeps/Inputs/vfsoverlay.yaml
@@ -0,0 +1,12 @@
+{
+  'version': 0,
+  'roots': [
+    { 'name': 'DIR', 'type': 'directory',
+      'contents': [
+        { 'name': 'not_real.h', 'type': 'file',
+          'external-contents': 'DIR/Inputs/header.h'
+        }
+      ]
+    }
+  ]
+}
Index: clang/test/ClangScanDeps/Inputs/header_stat_before_open_cdb.json
===================================================================
--- /dev/null
+++ clang/test/ClangScanDeps/Inputs/header_stat_before_open_cdb.json
@@ -0,0 +1,7 @@
+[
+{
+  "directory": "DIR",
+  "command": "clang -E DIR/header_stat_before_open.m -iframework Inputs/frameworks",
+  "file": "DIR/header_stat_before_open.m"
+}
+]
Index: clang/test/ClangScanDeps/Inputs/frameworks/Framework.framework/PrivateHeaders/PrivateHeader.h
===================================================================
--- /dev/null
+++ clang/test/ClangScanDeps/Inputs/frameworks/Framework.framework/PrivateHeaders/PrivateHeader.h
@@ -0,0 +1,2 @@
+// This comment is stripped when file is opened, so size will change
+#define PRIV 0
Index: clang/test/ClangScanDeps/Inputs/frameworks/Framework.framework/Headers/Framework.h
===================================================================
--- /dev/null
+++ clang/test/ClangScanDeps/Inputs/frameworks/Framework.framework/Headers/Framework.h
@@ -0,0 +1,2 @@
+// This comment is stripped, so size is changed when file is opened
+#define FRAMEWORK 0
Index: clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp
===================================================================
--- clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp
+++ clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp
@@ -8,9 +8,11 @@
 
 #include "clang/Tooling/DependencyScanning/DependencyScanningWorker.h"
 #include "clang/Frontend/CompilerInstance.h"
+#include "clang/Frontend/CompilerInvocation.h"
 #include "clang/Frontend/FrontendActions.h"
 #include "clang/Frontend/TextDiagnosticPrinter.h"
 #include "clang/Frontend/Utils.h"
+#include "clang/Tooling/DependencyScanning/DependencyScanningService.h"
 #include "clang/Tooling/Tooling.h"
 
 using namespace clang;
@@ -62,10 +64,12 @@
 /// dependency scanning for the given compiler invocation.
 class DependencyScanningAction : public tooling::ToolAction {
 public:
-  DependencyScanningAction(StringRef WorkingDirectory,
-                           std::string &DependencyFileContents)
+  DependencyScanningAction(
+      StringRef WorkingDirectory, std::string &DependencyFileContents,
+      llvm::IntrusiveRefCntPtr<DependencyScanningFilesystem> DepFS)
       : WorkingDirectory(WorkingDirectory),
-        DependencyFileContents(DependencyFileContents) {}
+        DependencyFileContents(DependencyFileContents),
+        DepFS(std::move(DepFS)) {}
 
   bool runInvocation(std::shared_ptr<CompilerInvocation> Invocation,
                      FileManager *FileMgr,
@@ -74,8 +78,6 @@
     // Create a compiler instance to handle the actual work.
     CompilerInstance Compiler(std::move(PCHContainerOps));
     Compiler.setInvocation(std::move(Invocation));
-    FileMgr->getFileSystemOpts().WorkingDir = WorkingDirectory;
-    Compiler.setFileManager(FileMgr);
 
     // Don't print 'X warnings and Y errors generated'.
     Compiler.getDiagnosticOpts().ShowCarets = false;
@@ -84,6 +86,27 @@
     if (!Compiler.hasDiagnostics())
       return false;
 
+    // Use the dependency scanning optimized file system if we can.
+    if (DepFS) {
+      // FIXME: Purge the symlink entries from the stat cache in the FM.
+      const CompilerInvocation &CI = Compiler.getInvocation();
+      // Add any filenames that were explicity passed in the build settings and
+      // that might be might be opened, as we want to ensure we don't run source
+      // minimization on them.
+      DepFS->IgnoredFiles.clear();
+      for (const auto &Entry : CI.getHeaderSearchOpts().UserEntries)
+        DepFS->IgnoredFiles.insert(Entry.Path);
+      for (const auto &Entry : CI.getHeaderSearchOpts().VFSOverlayFiles)
+        DepFS->IgnoredFiles.insert(Entry);
+
+      // Support for virtual file system overlays on top of the caching
+      // filesystem.
+      FileMgr->setVirtualFileSystem(createVFSFromCompilerInvocation(
+          CI, Compiler.getDiagnostics(), DepFS));
+    }
+
+    FileMgr->getFileSystemOpts().WorkingDir = WorkingDirectory;
+    Compiler.setFileManager(FileMgr);
     Compiler.createSourceManager(*FileMgr);
 
     // Create the dependency collector that will collect the produced
@@ -103,7 +126,8 @@
 
     auto Action = llvm::make_unique<PreprocessOnlyAction>();
     const bool Result = Compiler.ExecuteAction(*Action);
-    FileMgr->clearStatCache();
+    if (!DepFS)
+      FileMgr->clearStatCache();
     return Result;
   }
 
@@ -111,16 +135,18 @@
   StringRef WorkingDirectory;
   /// The dependency file will be written to this string.
   std::string &DependencyFileContents;
+  llvm::IntrusiveRefCntPtr<DependencyScanningFilesystem> DepFS;
 };
 
 } // end anonymous namespace
 
-DependencyScanningWorker::DependencyScanningWorker() {
+DependencyScanningWorker::DependencyScanningWorker(
+    DependencyScanningService &Service) {
   DiagOpts = new DiagnosticOptions();
   PCHContainerOps = std::make_shared<PCHContainerOperations>();
-  /// FIXME: Use the shared file system from the service for fast scanning
-  /// mode.
-  WorkerFS = new ProxyFileSystemWithoutChdir(llvm::vfs::getRealFileSystem());
+  RealFS = new ProxyFileSystemWithoutChdir(llvm::vfs::getRealFileSystem());
+  if (Service.getMode() == ScanningMode::MinimizedSourcePreprocessing)
+    DepFS = new DependencyScanningFilesystem(Service.getSharedCache(), RealFS);
 }
 
 llvm::Expected<std::string>
@@ -133,14 +159,17 @@
   llvm::raw_string_ostream DiagnosticsOS(DiagnosticOutput);
   TextDiagnosticPrinter DiagPrinter(DiagnosticsOS, DiagOpts.get());
 
-  WorkerFS->setCurrentWorkingDirectory(WorkingDirectory);
-  tooling::ClangTool Tool(CDB, Input, PCHContainerOps, WorkerFS);
+  RealFS->setCurrentWorkingDirectory(WorkingDirectory);
+  /// Create the tool that uses the underlying file system to ensure that any
+  /// file system requests that are made by the driver do not go through the
+  /// dependency scanning filesystem.
+  tooling::ClangTool Tool(CDB, Input, PCHContainerOps, RealFS);
   Tool.clearArgumentsAdjusters();
   Tool.setRestoreWorkingDir(false);
   Tool.setPrintErrorMessage(false);
   Tool.setDiagnosticConsumer(&DiagPrinter);
   std::string Output;
-  DependencyScanningAction Action(WorkingDirectory, Output);
+  DependencyScanningAction Action(WorkingDirectory, Output, DepFS);
   if (Tool.run(&Action)) {
     return llvm::make_error<llvm::StringError>(DiagnosticsOS.str(),
                                                llvm::inconvertibleErrorCode());
Index: clang/lib/Tooling/DependencyScanning/DependencyScanningService.cpp
===================================================================
--- /dev/null
+++ clang/lib/Tooling/DependencyScanning/DependencyScanningService.cpp
@@ -0,0 +1,16 @@
+//===- DependencyScanningService.cpp - clang-scan-deps service ------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Tooling/DependencyScanning/DependencyScanningService.h"
+
+using namespace clang;
+using namespace tooling;
+using namespace dependencies;
+
+DependencyScanningService::DependencyScanningService(ScanningMode Mode)
+    : Mode(Mode) {}
Index: clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp
===================================================================
--- /dev/null
+++ clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp
@@ -0,0 +1,232 @@
+//===- DependencyScanningFilesystem.cpp - clang-scan-deps fs --------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h"
+#include "clang/Lex/DependencyDirectivesSourceMinimizer.h"
+#include "llvm/Support/MemoryBuffer.h"
+
+using namespace clang;
+using namespace tooling;
+using namespace dependencies;
+
+CachedFileSystemEntry CachedFileSystemEntry::createFileEntry(
+    StringRef Filename, llvm::vfs::FileSystem &FS, bool Minimize) {
+  // Load the file and its content from the file system.
+  llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>> MaybeFile =
+      FS.openFileForRead(Filename);
+  if (!MaybeFile)
+    return MaybeFile.getError();
+  llvm::ErrorOr<llvm::vfs::Status> Stat = (*MaybeFile)->status();
+  if (!Stat)
+    return Stat.getError();
+
+  llvm::vfs::File &F = **MaybeFile;
+  llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> MaybeBuffer =
+      F.getBuffer(Stat->getName());
+  if (!MaybeBuffer)
+    return MaybeBuffer.getError();
+
+  llvm::SmallString<1024> MinimizedFileContents;
+  unsigned SmallCapacity = MinimizedFileContents.capacity();
+  // Minimize the file down to directives that might affect the dependencies.
+  const auto &Buffer = *MaybeBuffer;
+  SmallVector<minimize_source_to_dependency_directives::Token, 64> Tokens;
+  if (!Minimize || minimizeSourceToDependencyDirectives(
+                       Buffer->getBuffer(), MinimizedFileContents, Tokens)) {
+    // Use the original file unless requested otherwise, or
+    // if the minimization failed.
+    // FIXME: Propage the diagnostic if desired by the client.
+    CachedFileSystemEntry Result;
+    Result.MaybeStat = std::move(*Stat);
+    Result.Contents.reserve(Buffer->getBufferSize() + 1);
+    Result.Contents.append(Buffer->getBufferStart(), Buffer->getBufferEnd());
+    // Null terminate the contents.
+    Result.Contents.push_back('\0');
+    Result.Contents.pop_back();
+    return Result;
+  }
+
+  CachedFileSystemEntry Result;
+  size_t Size = MinimizedFileContents.size();
+  Result.MaybeStat = llvm::vfs::Status(Stat->getName(), Stat->getUniqueID(),
+                                       Stat->getLastModificationTime(),
+                                       Stat->getUser(), Stat->getGroup(), Size,
+                                       Stat->getType(), Stat->getPermissions());
+  // The contents produced by the minimizer must be null terminated.
+  assert(MinimizedFileContents.data()[MinimizedFileContents.size()] == '\0' &&
+         "not null terminated contents");
+  if (MinimizedFileContents.capacity() <= SmallCapacity) {
+    // The move will copy and not preserve the null terminator. Let's do
+    // a manual copy instead.
+    Result.Contents.reserve(MinimizedFileContents.size() + 1);
+    Result.Contents.append(MinimizedFileContents.begin(),
+                           MinimizedFileContents.end());
+    // Null terminate the contents.
+    Result.Contents.push_back('\0');
+    Result.Contents.pop_back();
+  } else {
+    // The move will preserve the null terminator.
+    Result.Contents = std::move(MinimizedFileContents);
+  }
+  return Result;
+}
+
+CachedFileSystemEntry
+CachedFileSystemEntry::createDirectoryEntry(llvm::vfs::Status Stat) {
+  assert(Stat.isDirectory() && "not a directory!");
+  auto Result = CachedFileSystemEntry();
+  Result.MaybeStat = std::move(Stat);
+  return Result;
+}
+
+DependencyScanningFilesystemSharedCache::
+    DependencyScanningFilesystemSharedCache() {
+  NumShards = 8;
+  CacheShards = llvm::make_unique<CacheShard[]>(NumShards);
+}
+
+/// Returns a cache entry for the corresponding key.
+///
+/// A new cache entry is created if the key is not in the cache. This is a
+/// thread safe call.
+DependencyScanningFilesystemSharedCache::SharedFileSystemEntry &
+DependencyScanningFilesystemSharedCache::get(StringRef Key) {
+  CacheShard &Shard = CacheShards[llvm::hash_value(Key) % NumShards];
+  std::unique_lock<std::mutex> LockGuard(Shard.CacheLock);
+  auto It =
+      Shard.Cache.try_emplace(Key, std::unique_ptr<SharedFileSystemEntry>());
+  auto &Ptr = It.first->getValue();
+  // Create the actual cache entry if insert succeeded.
+  if (It.second)
+    Ptr = llvm::make_unique<SharedFileSystemEntry>();
+  return *Ptr;
+}
+
+llvm::ErrorOr<llvm::vfs::Status>
+DependencyScanningFilesystem::status(const Twine &Path) {
+  std::string OwnedFilename;
+  StringRef Filename;
+  if (Path.isSingleStringRef()) {
+    Filename = Path.getSingleStringRef();
+  } else {
+    OwnedFilename = Path.str();
+    Filename = OwnedFilename;
+  }
+
+  // Check the local cache first.
+  if (const auto *Entry = getCachedEntry(Filename))
+    return Entry->getStatus();
+
+  // FIXME: Handle PCM/PCH files.
+  // FIXME: Handle module map files.
+
+  bool KeepOriginalSource = IgnoredFiles.count(Filename);
+  auto &SharedCacheEntry = SharedCache.get(Filename);
+  const CachedFileSystemEntry *Result;
+  {
+    std::unique_lock<std::mutex> LockGuard(SharedCacheEntry.Lock);
+    CachedFileSystemEntry &CacheEntry = SharedCacheEntry.Value;
+
+    if (!CacheEntry.isValid()) {
+      llvm::vfs::FileSystem &FS = getUnderlyingFS();
+      auto MaybeStatus = FS.status(Filename);
+      if (!MaybeStatus)
+        CacheEntry = CachedFileSystemEntry(MaybeStatus.getError());
+      else if (MaybeStatus->isDirectory())
+        CacheEntry = CachedFileSystemEntry::createDirectoryEntry(
+            std::move(*MaybeStatus));
+      else
+        CacheEntry = CachedFileSystemEntry::createFileEntry(
+            Filename, FS, !KeepOriginalSource);
+    }
+
+    Result = &CacheEntry;
+  }
+
+  // Store the result in the local cache.
+  setCachedEntry(Filename, Result);
+  return Result->getStatus();
+}
+
+namespace {
+
+/// The VFS that is used by clang consumes the \c CachedFileSystemEntry using
+/// this subclass.
+class MinimizedVFSFile final : public llvm::vfs::File {
+public:
+  MinimizedVFSFile(std::unique_ptr<llvm::MemoryBuffer> Buffer,
+                   llvm::vfs::Status Stat)
+      : Buffer(std::move(Buffer)), Stat(std::move(Stat)) {}
+
+  llvm::ErrorOr<llvm::vfs::Status> status() override { return Stat; }
+
+  const llvm::MemoryBuffer *getBufferPtr() const { return Buffer.get(); }
+
+  llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>>
+  getBuffer(const Twine &Name, int64_t FileSize, bool RequiresNullTerminator,
+            bool IsVolatile) override {
+    return std::move(Buffer);
+  }
+
+  std::error_code close() override { return {}; }
+
+private:
+  std::unique_ptr<llvm::MemoryBuffer> Buffer;
+  llvm::vfs::Status Stat;
+};
+
+llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>>
+createFile(const CachedFileSystemEntry *Entry) {
+  llvm::ErrorOr<StringRef> Contents = Entry->getContents();
+  if (!Contents)
+    return Contents.getError();
+  return llvm::make_unique<MinimizedVFSFile>(
+      llvm::MemoryBuffer::getMemBuffer(*Contents, Entry->getName(),
+                                       /*RequiresNullTerminator=*/false),
+      *Entry->getStatus());
+}
+
+} // end anonymous namespace
+
+llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>>
+DependencyScanningFilesystem::openFileForRead(const Twine &Path) {
+  std::string OwnedFilename;
+  StringRef Filename;
+  if (Path.isSingleStringRef()) {
+    Filename = Path.getSingleStringRef();
+  } else {
+    OwnedFilename = Path.str();
+    Filename = OwnedFilename;
+  }
+
+  // Check the local cache first.
+  if (const auto *Entry = getCachedEntry(Filename))
+    return createFile(Entry);
+
+  // FIXME: Handle PCM/PCH files.
+  // FIXME: Handle module map files.
+
+  bool KeepOriginalSource = IgnoredFiles.count(Filename);
+  auto &SharedCacheEntry = SharedCache.get(Filename);
+  const CachedFileSystemEntry *Result;
+  {
+    std::unique_lock<std::mutex> LockGuard(SharedCacheEntry.Lock);
+    CachedFileSystemEntry &CacheEntry = SharedCacheEntry.Value;
+
+    if (!CacheEntry.isValid()) {
+      CacheEntry = CachedFileSystemEntry::createFileEntry(
+          Filename, getUnderlyingFS(), !KeepOriginalSource);
+    }
+
+    Result = &CacheEntry;
+  }
+
+  // Store the result in the local cache.
+  setCachedEntry(Filename, Result);
+  return createFile(Result);
+}
Index: clang/lib/Tooling/DependencyScanning/CMakeLists.txt
===================================================================
--- clang/lib/Tooling/DependencyScanning/CMakeLists.txt
+++ clang/lib/Tooling/DependencyScanning/CMakeLists.txt
@@ -4,6 +4,8 @@
   )
 
 add_clang_library(clangDependencyScanning
+  DependencyScanningFilesystem.cpp
+  DependencyScanningService.cpp
   DependencyScanningWorker.cpp
 
   DEPENDS
Index: clang/include/clang/Tooling/DependencyScanning/DependencyScanningWorker.h
===================================================================
--- clang/include/clang/Tooling/DependencyScanning/DependencyScanningWorker.h
+++ clang/include/clang/Tooling/DependencyScanning/DependencyScanningWorker.h
@@ -10,6 +10,7 @@
 #define LLVM_CLANG_TOOLING_DEPENDENCY_SCANNING_WORKER_H
 
 #include "clang/Basic/DiagnosticOptions.h"
+#include "clang/Basic/FileManager.h"
 #include "clang/Basic/LLVM.h"
 #include "clang/Frontend/PCHContainerOperations.h"
 #include "clang/Tooling/CompilationDatabase.h"
@@ -21,6 +22,9 @@
 namespace tooling {
 namespace dependencies {
 
+class DependencyScanningService;
+class DependencyScanningFilesystem;
+
 /// An individual dependency scanning worker that is able to run on its own
 /// thread.
 ///
@@ -29,7 +33,7 @@
 /// using the regular processing run.
 class DependencyScanningWorker {
 public:
-  DependencyScanningWorker();
+  DependencyScanningWorker(DependencyScanningService &Service);
 
   /// Print out the dependency information into a string using the dependency
   /// file format that is specified in the options (-MD is the default) and
@@ -45,10 +49,11 @@
   IntrusiveRefCntPtr<DiagnosticOptions> DiagOpts;
   std::shared_ptr<PCHContainerOperations> PCHContainerOps;
 
+  llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem> RealFS;
   /// The file system that is used by each worker when scanning for
   /// dependencies. This filesystem persists accross multiple compiler
   /// invocations.
-  llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem> WorkerFS;
+  llvm::IntrusiveRefCntPtr<DependencyScanningFilesystem> DepFS;
 };
 
 } // end namespace dependencies
Index: clang/include/clang/Tooling/DependencyScanning/DependencyScanningService.h
===================================================================
--- /dev/null
+++ clang/include/clang/Tooling/DependencyScanning/DependencyScanningService.h
@@ -0,0 +1,55 @@
+//===- DependencyScanningService.h - clang-scan-deps service ===-*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_TOOLING_DEPENDENCY_SCANNING_SERVICE_H
+#define LLVM_CLANG_TOOLING_DEPENDENCY_SCANNING_SERVICE_H
+
+#include "clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h"
+
+namespace clang {
+namespace tooling {
+namespace dependencies {
+
+/// The mode in which the dependency scanner will operate to find the
+/// dependencies.
+enum class ScanningMode {
+  /// This mode is used to compute the dependencies by running the preprocessor
+  /// over
+  /// the unmodified source files.
+  CanonicalPreprocessing,
+
+  /// This mode is used to compute the dependencies by running the preprocessor
+  /// over
+  /// the source files that have been minimized to contents that might affect
+  /// the dependencies.
+  MinimizedSourcePreprocessing
+};
+
+/// The dependency scanning service contains the shared state that is used by
+/// the invidual dependency scanning workers.
+class DependencyScanningService {
+public:
+  DependencyScanningService(ScanningMode Mode);
+
+  ScanningMode getMode() const { return Mode; }
+
+  DependencyScanningFilesystemSharedCache &getSharedCache() {
+    return SharedCache;
+  }
+
+private:
+  const ScanningMode Mode;
+  /// The global file system cache.
+  DependencyScanningFilesystemSharedCache SharedCache;
+};
+
+} // end namespace dependencies
+} // end namespace tooling
+} // end namespace clang
+
+#endif // LLVM_CLANG_TOOLING_DEPENDENCY_SCANNING_SERVICE_H
Index: clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h
===================================================================
--- /dev/null
+++ clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h
@@ -0,0 +1,163 @@
+//===- DependencyScanningFilesystem.h - clang-scan-deps fs ===---*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_TOOLING_DEPENDENCY_SCANNING_FILESYSTEM_H
+#define LLVM_CLANG_TOOLING_DEPENDENCY_SCANNING_FILESYSTEM_H
+
+#include "clang/Basic/LLVM.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringSet.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/ErrorOr.h"
+#include "llvm/Support/VirtualFileSystem.h"
+#include <mutex>
+
+namespace clang {
+namespace tooling {
+namespace dependencies {
+
+/// An in-memory representation of a file system entity that is of interest to
+/// the dependency scanning filesystem.
+///
+/// It represents one of the following:
+/// - an opened source file with minimized contents and a stat value.
+/// - an opened source file with original contents and a stat value.
+/// - a directory entry with its stat value.
+/// - an error value to represent a file system error.
+/// - a placeholder with an invalid stat indicating a not yet initialized entry.
+class CachedFileSystemEntry {
+public:
+  /// Default constructor creates an entry with an invalid stat.
+  CachedFileSystemEntry() : MaybeStat(llvm::vfs::Status()) {}
+
+  CachedFileSystemEntry(std::error_code Error) : MaybeStat(std::move(Error)) {}
+
+  /// Create an entry that represents an opened source file with minimized or
+  /// original contents.
+  ///
+  /// The filesystem opens the file even for `stat` calls open to avoid the
+  /// issues with stat + open of minimized files that might lead to a
+  /// mismatching size of the file. If file is not minimized, the full file is
+  /// read and copied into memory to ensure that it's not memory mapped to avoid
+  /// running out of file descriptors.
+  static CachedFileSystemEntry createFileEntry(StringRef Filename,
+                                               llvm::vfs::FileSystem &FS,
+                                               bool Minimize = true);
+
+  /// Create an entry that represents a directory on the filesystem.
+  static CachedFileSystemEntry createDirectoryEntry(llvm::vfs::Status Stat);
+
+  /// \returns True if the entry is valid.
+  bool isValid() const { return !MaybeStat || MaybeStat->isStatusKnown(); }
+
+  /// \returns The error or the file's contents.
+  llvm::ErrorOr<StringRef> getContents() const {
+    if (!MaybeStat)
+      return MaybeStat.getError();
+    assert(!MaybeStat->isDirectory() && "not a file");
+    assert(isValid() && "not initialized");
+    return StringRef(Contents);
+  }
+
+  /// \returns The error or the status of the entry.
+  llvm::ErrorOr<llvm::vfs::Status> getStatus() const {
+    assert(isValid() && "not initialized");
+    return MaybeStat;
+  }
+
+  /// \returns the name of the file.
+  StringRef getName() const {
+    assert(isValid() && "not initialized");
+    return MaybeStat->getName();
+  }
+
+  CachedFileSystemEntry(CachedFileSystemEntry &&) = default;
+  CachedFileSystemEntry &operator=(CachedFileSystemEntry &&) = default;
+
+  CachedFileSystemEntry(const CachedFileSystemEntry &) = delete;
+  CachedFileSystemEntry &operator=(const CachedFileSystemEntry &) = delete;
+
+private:
+  llvm::ErrorOr<llvm::vfs::Status> MaybeStat;
+  // Store the contents in a small string to allowed a
+  // move from the small string for the minimized contents.
+  llvm::SmallString<0> Contents;
+};
+
+/// This class is a shared cache, that caches the 'stat' and 'open' calls to the
+/// underlying real file system.
+///
+/// It is sharded based on the hash of the key to reduce the lock contention for
+/// the worker threads.
+class DependencyScanningFilesystemSharedCache {
+public:
+  /// A \c CachedFileSystemEntry with a lock.
+  struct SharedFileSystemEntry {
+    std::mutex Lock;
+    CachedFileSystemEntry Value;
+  };
+
+  DependencyScanningFilesystemSharedCache();
+
+  /// Returns a cache entry for the corresponding key.
+  ///
+  /// A new cache entry is created if the key is not in the cache. This is a
+  /// thread safe call.
+  SharedFileSystemEntry &get(StringRef Key);
+
+private:
+  struct CacheShard {
+    std::mutex CacheLock;
+    llvm::StringMap<std::unique_ptr<SharedFileSystemEntry>> Cache;
+  };
+  std::unique_ptr<CacheShard[]> CacheShards;
+  unsigned NumShards;
+};
+
+/// A virtual file system optimized for the dependency discovery.
+///
+/// It is primarily designed to work with source files whose contents was was
+/// preprocessed to remove any tokens that are unlikely to affect the dependency
+/// computation.
+class DependencyScanningFilesystem : public llvm::vfs::ProxyFileSystem {
+public:
+  DependencyScanningFilesystem(
+      DependencyScanningFilesystemSharedCache &SharedCache,
+      IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS)
+      : ProxyFileSystem(std::move(FS)), SharedCache(SharedCache) {}
+
+  llvm::ErrorOr<llvm::vfs::Status> status(const Twine &Path) override;
+  llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>>
+  openFileForRead(const Twine &Path) override;
+
+  /// The set of files that should not be minimized.
+  llvm::StringSet<> IgnoredFiles;
+
+private:
+  void setCachedEntry(StringRef Filename, const CachedFileSystemEntry *Entry) {
+    bool IsInserted = Cache.try_emplace(Filename, Entry).second;
+    (void)IsInserted;
+    assert(IsInserted && "local cache is updated more than once");
+  }
+
+  const CachedFileSystemEntry *getCachedEntry(StringRef Filename) {
+    auto It = Cache.find(Filename);
+    return It == Cache.end() ? nullptr : It->getValue();
+  }
+
+  DependencyScanningFilesystemSharedCache &SharedCache;
+  /// The local cache is used by the worker thread to cache file system queries
+  /// locally instead of querying the global cache every time.
+  llvm::StringMap<const CachedFileSystemEntry *, llvm::BumpPtrAllocator> Cache;
+};
+
+} // end namespace dependencies
+} // end namespace tooling
+} // end namespace clang
+
+#endif // LLVM_CLANG_TOOLING_DEPENDENCY_SCANNING_FILESYSTEM_H
Index: clang/include/clang/Basic/FileManager.h
===================================================================
--- clang/include/clang/Basic/FileManager.h
+++ clang/include/clang/Basic/FileManager.h
@@ -223,6 +223,10 @@
 
   llvm::vfs::FileSystem &getVirtualFileSystem() const { return *FS; }
 
+  void setVirtualFileSystem(IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS) {
+    this->FS = std::move(FS);
+  }
+
   /// Retrieve a file entry for a "virtual" file that acts as
   /// if there were a file with the given name on disk.
   ///
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to