jansvoboda11 updated this revision to Diff 394600.
jansvoboda11 marked 5 inline comments as done.
jansvoboda11 added a comment.

Rebase on top of D115346 <https://reviews.llvm.org/D115346>, apply suggested 
changes to the cache structure and allocation strategy.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D114966/new/

https://reviews.llvm.org/D114966

Files:
  clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h
  clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp

Index: clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp
===================================================================
--- clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp
+++ clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp
@@ -17,7 +17,9 @@
 using namespace dependencies;
 
 llvm::ErrorOr<llvm::vfs::Status>
-CachedFileSystemEntry::initFile(StringRef Filename, llvm::vfs::FileSystem &FS) {
+CachedFileContents::read(StringRef Filename, llvm::vfs::FileSystem &FS) {
+  assert(!OriginalAccess.load() && "reading file for the second time");
+
   // Load the file and its content from the file system.
   llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>> MaybeFile =
       FS.openFileForRead(Filename);
@@ -33,32 +35,39 @@
   if (!MaybeBuffer)
     return MaybeBuffer.getError();
 
-  OriginalContents = std::move(*MaybeBuffer);
+  OriginalStorage = std::move(*MaybeBuffer);
+  OriginalAccess.store(OriginalStorage.get());
   return Stat;
 }
 
-void CachedFileSystemEntry::minimizeFile() {
-  assert(OriginalContents && "minimizing missing contents");
+void CachedFileContents::minimize() {
+  auto *Original = OriginalAccess.load();
+  // FIXME: This should really be an assert. We should make it one once we
+  // tackle the big FIXME before `CachedFileContents::read` call in
+  // `DependencyScanningWorkerFilesystem::getOrCreateFileSystemEntry`.
+  if (!Original)
+    return;
+
+  assert(!MinimizedAccess.load() && "minimizing file for the second time");
 
   llvm::SmallString<1024> MinimizedFileContents;
   // Minimize the file down to directives that might affect the dependencies.
   SmallVector<minimize_source_to_dependency_directives::Token, 64> Tokens;
-  if (minimizeSourceToDependencyDirectives(OriginalContents->getBuffer(),
+  if (minimizeSourceToDependencyDirectives(Original->getBuffer(),
                                            MinimizedFileContents, Tokens)) {
     // FIXME: Propagate the diagnostic if desired by the client.
     // Use the original file if the minimization failed.
-    MinimizedContentsStorage =
-        llvm::MemoryBuffer::getMemBuffer(*OriginalContents);
-    MinimizedContentsAccess.store(MinimizedContentsStorage.get());
+    MinimizedStorage = llvm::MemoryBuffer::getMemBuffer(*Original);
+    MinimizedAccess.store(MinimizedStorage.get());
     return;
   }
 
   // The contents produced by the minimizer must be null terminated.
   assert(MinimizedFileContents.data()[MinimizedFileContents.size()] == '\0' &&
          "not null terminated contents");
-  MinimizedContentsStorage = std::make_unique<llvm::SmallVectorMemoryBuffer>(
+  MinimizedStorage = std::make_unique<llvm::SmallVectorMemoryBuffer>(
       std::move(MinimizedFileContents));
-  MinimizedContentsAccess.store(MinimizedContentsStorage.get());
+  MinimizedAccess.store(MinimizedStorage.get());
 
   // Compute the skipped PP ranges that speedup skipping over inactive
   // preprocessor blocks.
@@ -92,11 +101,20 @@
 }
 
 DependencyScanningFilesystemSharedCache::SharedFileSystemEntry &
-DependencyScanningFilesystemSharedCache::get(StringRef Key) {
+DependencyScanningFilesystemSharedCache::getEntry(StringRef Key) {
   CacheShard &Shard = CacheShards[llvm::hash_value(Key) % NumShards];
   std::lock_guard<std::mutex> LockGuard(Shard.CacheLock);
-  auto It = Shard.Cache.try_emplace(Key);
-  return It.first->getValue();
+  return Shard.Cache[Key];
+}
+
+CachedFileContents &DependencyScanningFilesystemSharedCache::getFileContents(
+    StringRef Filename, llvm::sys::fs::UniqueID UID) {
+  CacheShard &Shard = CacheShards[llvm::hash_value(Filename) % NumShards];
+  std::lock_guard<std::mutex> LockGuard(Shard.CacheLock);
+  CachedFileContents *&Value = Shard.ContentsCache[UID];
+  if (!Value)
+    Value = new (Shard.ContentsAllocator.Allocate()) CachedFileContents;
+  return *Value;
 }
 
 /// Whitelist file extensions that should be minimized, treating no extension as
@@ -142,15 +160,6 @@
   return !NotToBeMinimized.contains(Filename);
 }
 
-void CachedFileSystemEntry::init(llvm::ErrorOr<llvm::vfs::Status> &&MaybeStatus,
-                                 StringRef Filename,
-                                 llvm::vfs::FileSystem &FS) {
-  if (!MaybeStatus || MaybeStatus->isDirectory())
-    MaybeStat = std::move(MaybeStatus);
-  else
-    MaybeStat = initFile(Filename, FS);
-}
-
 llvm::ErrorOr<EntryRef>
 DependencyScanningWorkerFilesystem::getOrCreateFileSystemEntry(
     StringRef Filename) {
@@ -163,7 +172,7 @@
   // FIXME: Handle PCM/PCH files.
   // FIXME: Handle module map files.
 
-  auto &SharedCacheEntry = SharedCache.get(Filename);
+  auto &SharedCacheEntry = SharedCache.getEntry(Filename);
   {
     std::lock_guard<std::mutex> LockGuard(SharedCacheEntry.ValueLock);
     CachedFileSystemEntry &CacheEntry = SharedCacheEntry.Value;
@@ -176,14 +185,35 @@
         //   files before building them, and then looks for them again. If we
         //   cache the stat failure, it won't see them the second time.
         return MaybeStatus.getError();
-      CacheEntry.init(std::move(MaybeStatus), Filename, getUnderlyingFS());
+      CacheEntry.init(std::move(MaybeStatus));
     }
 
-    // Checking `needsUpdate` verifies the entry represents an opened file.
-    // Only checking `needsMinimization` could lead to minimization of files
-    // that we failed to load (such files don't have `OriginalContents`).
-    if (CacheEntry.needsUpdate(ShouldBeMinimized))
-      CacheEntry.minimizeFile();
+    if (CacheEntry.isReadable()) {
+      auto *Contents = CacheEntry.getOrCreateContents([&]() {
+        return &SharedCache.getFileContents(Filename, CacheEntry.getUniqueID());
+      });
+
+      if (Contents->needsUpdate(ShouldBeMinimized)) {
+        std::lock_guard<std::mutex> ContentLockGuard(Contents->ValueLock);
+
+        if (Contents->needsRead())
+          // FIXME: This read can fail.
+          // In that case, we should probably update `CacheEntry::MaybeStat`.
+          // However, that is concurrently read at the start of this function
+          // (`needsUpdate` -> `isReadable`), leading to a data race. If we try
+          // to avoid the data race by returning an `error_code` here (without
+          // updating the entry stat value, we can end up attempting to read the
+          // file again in the future, which breaks the consistency guarantees
+          // we want to provide in presence of FS changes. (We would fail for
+          // the first time and potentially succeed for the second time.) The
+          // correct solution would be to keep an `AttemptedRead` bit, set it to
+          // `true` on the first read and prevent future reads of the same file.
+          Contents->read(CacheEntry.getName(), getUnderlyingFS());
+
+        if (Contents->needsMinimization(ShouldBeMinimized))
+          Contents->minimize();
+      }
+    }
   }
 
   // Store the result in the local cache.
Index: clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h
===================================================================
--- clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h
+++ clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h
@@ -22,10 +22,55 @@
 namespace tooling {
 namespace dependencies {
 
+/// Original and minimized contents of a cached file entry. Can be shared
+/// between multiple instances of `CachedFileSystemEntry`.
+struct CachedFileContents {
+  CachedFileContents() : OriginalAccess(nullptr), MinimizedAccess(nullptr) {}
+
+  /// Read the original contents of the file.
+  ///
+  /// The filesystem opens the file even for `stat` calls open to avoid the
+  /// issues with stat + open of minimized files that might lead to a
+  /// mismatching size of the file.
+  llvm::ErrorOr<llvm::vfs::Status> read(StringRef Filename,
+                                        llvm::vfs::FileSystem &FS);
+
+  /// Minimize the original contents.
+  void minimize();
+
+  /// \returns True if the contents needs to be updated.
+  bool needsUpdate(bool ShouldBeMinimized) const {
+    return needsRead() || needsMinimization(ShouldBeMinimized);
+  }
+
+  /// \returns True if the original contents need to be read from the underlying
+  /// filesystem.
+  bool needsRead() const { return !OriginalAccess.load(); }
+
+  /// \returns True if the contents need to be minimized.
+  bool needsMinimization(bool ShouldBeMinimized) const {
+    return ShouldBeMinimized && !MinimizedAccess.load();
+  }
+
+  std::mutex ValueLock;
+
+  /// Owning storage for file contents. Always mutated under locked `ValueLock`.
+  std::unique_ptr<llvm::MemoryBuffer> OriginalStorage;
+  std::unique_ptr<llvm::MemoryBuffer> MinimizedStorage;
+
+  /// Atomic view of the file contents. This prevents data races when multiple
+  /// threads call `needsRead` or `needsMinimization`.
+  std::atomic<llvm::MemoryBuffer *> OriginalAccess;
+  std::atomic<llvm::MemoryBuffer *> MinimizedAccess;
+
+  PreprocessorSkippedRangeMapping PPSkippedRangeMapping;
+};
+
 /// An in-memory representation of a file system entity that is of interest to
 /// the dependency scanning filesystem.
 ///
 /// It represents one of the following:
+/// - opened file with no contents (yet) and a stat value,
 /// - opened file with original contents and a stat value,
 /// - opened file with original contents, minimized contents and a stat value,
 /// - directory entry with its stat value,
@@ -35,11 +80,12 @@
 public:
   /// Creates an uninitialized entry.
   CachedFileSystemEntry()
-      : MaybeStat(llvm::vfs::Status()), MinimizedContentsAccess(nullptr) {}
+      : MaybeStat(llvm::vfs::Status()), ContentsAccess(nullptr) {}
 
   /// Initialize the cached file system entry.
-  void init(llvm::ErrorOr<llvm::vfs::Status> &&MaybeStatus, StringRef Filename,
-            llvm::vfs::FileSystem &FS);
+  void init(llvm::ErrorOr<llvm::vfs::Status> &&MaybeStatus) {
+    MaybeStat = std::move(MaybeStatus);
+  }
 
   /// Initialize the entry as file with minimized or original contents.
   ///
@@ -66,8 +112,11 @@
       return MaybeStat.getError();
     assert(!MaybeStat->isDirectory() && "not a file");
     assert(isInitialized() && "not initialized");
-    assert(OriginalContents && "not read");
-    return OriginalContents->getBuffer();
+    CachedFileContents *Contents = ContentsAccess.load();
+    assert(Contents && "contents member not initialized");
+    llvm::MemoryBuffer *Buffer = Contents->OriginalAccess.load();
+    assert(Buffer && "not read");
+    return Buffer->getBuffer();
   }
 
   /// \returns The error or the file's minimized contents.
@@ -76,22 +125,35 @@
       return MaybeStat.getError();
     assert(!MaybeStat->isDirectory() && "not a file");
     assert(isInitialized() && "not initialized");
-    llvm::MemoryBuffer *Buffer = MinimizedContentsAccess.load();
+    CachedFileContents *Contents = ContentsAccess.load();
+    assert(Contents && "contents member not initialized");
+    llvm::MemoryBuffer *Buffer = Contents->MinimizedAccess.load();
     assert(Buffer && "not minimized");
     return Buffer->getBuffer();
   }
 
+  /// Get the contents associated with this entry. If none is present, create
+  /// one with the given factory function. 
+  CachedFileContents *getOrCreateContents(
+      llvm::function_ref<CachedFileContents *()> Create) {
+    CachedFileContents *Contents = ContentsAccess.load();
+    if (!Contents) {
+      Contents = Create();
+      ContentsAccess.store(Contents);
+    }
+    return Contents;
+  }
+
   /// \returns True if this entry represents a file that can be read.
   bool isReadable() const { return MaybeStat && !MaybeStat->isDirectory(); }
 
   /// \returns True if this cached entry needs to be updated.
   bool needsUpdate(bool ShouldBeMinimized) const {
-    return isReadable() && needsMinimization(ShouldBeMinimized);
-  }
-
-  /// \returns True if the contents of this entry need to be minimized.
-  bool needsMinimization(bool ShouldBeMinimized) const {
-    return ShouldBeMinimized && !MinimizedContentsAccess.load();
+    if (isReadable()) {
+      CachedFileContents *Contents = ContentsAccess.load();
+      return !Contents || Contents->needsUpdate(ShouldBeMinimized);
+    }
+    return false;
   }
 
   /// \returns The error or the status of the entry.
@@ -106,23 +168,27 @@
     return MaybeStat->getName();
   }
 
+  /// \returns The unique ID of the entry.
+  llvm::sys::fs::UniqueID getUniqueID() const {
+    assert(isInitialized() && "not initialized");
+    assert(MaybeStat && "not valid directory entry");
+    return MaybeStat->getUniqueID();
+  }
+
   /// Return the mapping between location -> distance that is used to speed up
   /// the block skipping in the preprocessor.
   const PreprocessorSkippedRangeMapping &getPPSkippedRangeMapping() const {
-    return PPSkippedRangeMapping;
+    CachedFileContents *Contents = std::atomic_load(&ContentsAccess);
+    assert(Contents && "not minimized");
+    return Contents->PPSkippedRangeMapping;
   }
 
 private:
   llvm::ErrorOr<llvm::vfs::Status> MaybeStat;
-  std::unique_ptr<llvm::MemoryBuffer> OriginalContents;
-
-  /// Owning storage for the minimized file contents.
-  std::unique_ptr<llvm::MemoryBuffer> MinimizedContentsStorage;
-  /// Atomic view of the minimized file contents.
-  /// This prevents data races when multiple threads call `needsMinimization`.
-  std::atomic<llvm::MemoryBuffer *> MinimizedContentsAccess;
 
-  PreprocessorSkippedRangeMapping PPSkippedRangeMapping;
+  /// Non-owning pointer to the file contents. This can be shared between
+  /// multiple instances of this class (e.g. between a symlink and its target).
+  std::atomic<CachedFileContents *> ContentsAccess;
 };
 
 /// This class is a shared cache, that caches the 'stat' and 'open' calls to the
@@ -140,16 +206,24 @@
 
   DependencyScanningFilesystemSharedCache();
 
-  /// Returns a cache entry for the corresponding key.
-  ///
-  /// A new cache entry is created if the key is not in the cache. This is a
-  /// thread safe call.
-  SharedFileSystemEntry &get(StringRef Key);
+  /// Returns a cache entry for the corresponding path.
+  /// New cache entry is created if the path is not in the cache. This is
+  /// a thread safe call.
+  SharedFileSystemEntry &getEntry(StringRef Path);
+
+  /// Returns cached contents for the given file.
+  /// Entry is created in the cache and returned in case contents of file with
+  /// the given UID were not read yet. This is a thread safe call.
+  CachedFileContents &getFileContents(StringRef Filename,
+                                      llvm::sys::fs::UniqueID UID);
 
 private:
   struct CacheShard {
     std::mutex CacheLock;
     llvm::StringMap<SharedFileSystemEntry, llvm::BumpPtrAllocator> Cache;
+
+    llvm::SpecificBumpPtrAllocator<CachedFileContents> ContentsAllocator;
+    llvm::DenseMap<llvm::sys::fs::UniqueID, CachedFileContents *> ContentsCache;
   };
   std::unique_ptr<CacheShard[]> CacheShards;
   unsigned NumShards;
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to