https://github.com/jansvoboda11 updated https://github.com/llvm/llvm-project/pull/190207
>From 8e6d7ada504e38f138d28e5101d051fbcb3d0e9c Mon Sep 17 00:00:00 2001 From: Jan Svoboda <[email protected]> Date: Fri, 20 Mar 2026 21:09:27 -0700 Subject: [PATCH 1/2] [clang] Store size & mtime in in-memory module cache --- .../clang/Serialization/InMemoryModuleCache.h | 25 +++++++++---- .../include/clang/Serialization/ModuleCache.h | 7 ++-- .../clang/Serialization/ModuleManager.h | 6 ++-- .../InProcessModuleCache.cpp | 5 +-- clang/lib/Frontend/CompilerInstance.cpp | 8 +++-- clang/lib/Serialization/ASTReader.cpp | 4 ++- .../lib/Serialization/InMemoryModuleCache.cpp | 18 +++++++--- clang/lib/Serialization/ModuleCache.cpp | 16 +++++++-- clang/lib/Serialization/ModuleManager.cpp | 33 +++++------------ .../Serialization/InMemoryModuleCacheTest.cpp | 36 +++++++++++-------- 10 files changed, 95 insertions(+), 63 deletions(-) diff --git a/clang/include/clang/Serialization/InMemoryModuleCache.h b/clang/include/clang/Serialization/InMemoryModuleCache.h index fc3ba334fc64d..5e3fc19c48ff0 100644 --- a/clang/include/clang/Serialization/InMemoryModuleCache.h +++ b/clang/include/clang/Serialization/InMemoryModuleCache.h @@ -28,16 +28,24 @@ namespace clang { /// each \a ModuleManager sees the same files. class InMemoryModuleCache : public llvm::RefCountedBase<InMemoryModuleCache> { struct PCM { + /// The contents of the PCM as produced by \c ASTWriter. std::unique_ptr<llvm::MemoryBuffer> Buffer; + /// The size of this PCM. This may be different from the size of \c Buffer + /// when it's wrapped in an object file. + off_t Size = 0; + + /// The modification time of this PCM. + time_t ModTime = 0; + /// Track whether this PCM is known to be good (either built or /// successfully imported by a CompilerInstance/ASTReader using this /// cache). bool IsFinal = false; PCM() = default; - PCM(std::unique_ptr<llvm::MemoryBuffer> Buffer) - : Buffer(std::move(Buffer)) {} + PCM(std::unique_ptr<llvm::MemoryBuffer> Buffer, off_t Size, time_t ModTime) + : Buffer(std::move(Buffer)), Size(Size), ModTime(ModTime) {} }; /// Cache of buffers. @@ -64,7 +72,8 @@ class InMemoryModuleCache : public llvm::RefCountedBase<InMemoryModuleCache> { /// \post state is Tentative /// \return a reference to the buffer as a convenience. llvm::MemoryBuffer &addPCM(llvm::StringRef Filename, - std::unique_ptr<llvm::MemoryBuffer> Buffer); + std::unique_ptr<llvm::MemoryBuffer> Buffer, + off_t Size, time_t ModTime); /// Store a just-built PCM under the Filename. /// @@ -72,7 +81,8 @@ class InMemoryModuleCache : public llvm::RefCountedBase<InMemoryModuleCache> { /// \pre state is not Tentative. /// \return a reference to the buffer as a convenience. llvm::MemoryBuffer &addBuiltPCM(llvm::StringRef Filename, - std::unique_ptr<llvm::MemoryBuffer> Buffer); + std::unique_ptr<llvm::MemoryBuffer> Buffer, + off_t Size, time_t ModTime); /// Try to remove a buffer from the cache. No effect if state is Final. /// @@ -87,8 +97,11 @@ class InMemoryModuleCache : public llvm::RefCountedBase<InMemoryModuleCache> { /// \post state is Final. void finalizePCM(llvm::StringRef Filename); - /// Get a pointer to the pCM if it exists; else nullptr. - llvm::MemoryBuffer *lookupPCM(llvm::StringRef Filename) const; + /// Get a pointer to the PCM if it exists and set \c Size and \c ModTime to + /// its on-disk size and modification time. Otherwise, return nullptr and + /// don't change \c Size and \c ModTime. + llvm::MemoryBuffer *lookupPCM(llvm::StringRef Filename, off_t &Size, + time_t &ModTime) const; /// Check whether the PCM is final and has been shown to work. /// diff --git a/clang/include/clang/Serialization/ModuleCache.h b/clang/include/clang/Serialization/ModuleCache.h index 6683511b56a05..0be884133ace8 100644 --- a/clang/include/clang/Serialization/ModuleCache.h +++ b/clang/include/clang/Serialization/ModuleCache.h @@ -58,8 +58,8 @@ class ModuleCache { virtual const InMemoryModuleCache &getInMemoryModuleCache() const = 0; /// Write the PCM contents to the given path in the module cache. - virtual std::error_code write(StringRef Path, - llvm::MemoryBufferRef Buffer) = 0; + virtual std::error_code write(StringRef Path, llvm::MemoryBufferRef Buffer, + off_t &Size, time_t &ModTime) = 0; virtual Expected<std::unique_ptr<llvm::MemoryBuffer>> read(StringRef FileName, off_t &Size, time_t &ModTime) = 0; @@ -77,7 +77,8 @@ std::shared_ptr<ModuleCache> createCrossProcessModuleCache(); void maybePruneImpl(StringRef Path, time_t PruneInterval, time_t PruneAfter); /// Shared implementation of `ModuleCache::write()`. -std::error_code writeImpl(StringRef Path, llvm::MemoryBufferRef Buffer); +std::error_code writeImpl(StringRef Path, llvm::MemoryBufferRef Buffer, + off_t &Size, time_t &ModTime); /// Shared implementation of `ModuleCache::read()`. Expected<std::unique_ptr<llvm::MemoryBuffer>> diff --git a/clang/include/clang/Serialization/ModuleManager.h b/clang/include/clang/Serialization/ModuleManager.h index 1ef9aeee7e1fd..80f43ea922a17 100644 --- a/clang/include/clang/Serialization/ModuleManager.h +++ b/clang/include/clang/Serialization/ModuleManager.h @@ -73,7 +73,8 @@ class ModuleManager { /// Preprocessor's HeaderSearchInfo containing the module map. const HeaderSearch &HeaderSearchInfo; - /// A lookup of in-memory (virtual file) buffers + /// A lookup of in-memory (virtual file) buffers. + // FIXME: No need to key this by `FileEntry`. llvm::DenseMap<const FileEntry *, std::unique_ptr<llvm::MemoryBuffer>> InMemoryBuffers; @@ -182,7 +183,8 @@ class ModuleManager { ModuleFile *lookup(ModuleFileKey Key) const; /// Returns the in-memory (virtual file) buffer with the given name - std::unique_ptr<llvm::MemoryBuffer> lookupBuffer(StringRef Name); + std::unique_ptr<llvm::MemoryBuffer> lookupBuffer(StringRef Name, off_t &Size, + time_t &ModTime); /// Number of modules loaded unsigned size() const { return Chain.size(); } diff --git a/clang/lib/DependencyScanning/InProcessModuleCache.cpp b/clang/lib/DependencyScanning/InProcessModuleCache.cpp index 0565f5eebfe04..6ef20a8806b8c 100644 --- a/clang/lib/DependencyScanning/InProcessModuleCache.cpp +++ b/clang/lib/DependencyScanning/InProcessModuleCache.cpp @@ -134,13 +134,14 @@ class InProcessModuleCache : public ModuleCache { return InMemory; } - std::error_code write(StringRef Path, llvm::MemoryBufferRef Buffer) override { + std::error_code write(StringRef Path, llvm::MemoryBufferRef Buffer, + off_t &Size, time_t &ModTime) override { // This is a compiler-internal input/output, let's bypass the sandbox. auto BypassSandbox = llvm::sys::sandbox::scopedDisable(); // FIXME: This could use an in-memory cache to avoid IO, and only write to // disk at the end of the scan. - return writeImpl(Path, Buffer); + return writeImpl(Path, Buffer, Size, ModTime); } Expected<std::unique_ptr<llvm::MemoryBuffer>> diff --git a/clang/lib/Frontend/CompilerInstance.cpp b/clang/lib/Frontend/CompilerInstance.cpp index a504cde306a35..19ee7a01d7974 100644 --- a/clang/lib/Frontend/CompilerInstance.cpp +++ b/clang/lib/Frontend/CompilerInstance.cpp @@ -1470,8 +1470,10 @@ static bool compileModuleImpl(CompilerInstance &ImportingInstance, } } - std::error_code EC = - ImportingInstance.getModuleCache().write(ModuleFileName, *Buffer); + off_t Size; + time_t ModTime; + std::error_code EC = ImportingInstance.getModuleCache().write( + ModuleFileName, *Buffer, Size, ModTime); if (EC) { ImportingInstance.getDiagnostics().Report(ModuleNameLoc, diag::err_module_not_written) @@ -1498,7 +1500,7 @@ static bool compileModuleImpl(CompilerInstance &ImportingInstance, Buffer = llvm::MemoryBuffer::getMemBufferCopy(ExtractedBuffer); ImportingInstance.getModuleCache().getInMemoryModuleCache().addBuiltPCM( - ModuleFileName, std::move(Buffer)); + ModuleFileName, std::move(Buffer), Size, ModTime); return true; } diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp index b211b0d32e1de..45a0feb99f54f 100644 --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -5934,9 +5934,11 @@ bool ASTReader::readASTFileControlBlock( ASTReaderListener &Listener, bool ValidateDiagnosticOptions, unsigned ClientLoadCapabilities) { // Open the AST file. + off_t Size; + time_t ModTime; std::unique_ptr<llvm::MemoryBuffer> OwnedBuffer; llvm::MemoryBuffer *Buffer = - ModCache.getInMemoryModuleCache().lookupPCM(Filename); + ModCache.getInMemoryModuleCache().lookupPCM(Filename, Size, ModTime); if (!Buffer) { // FIXME: We should add the pcm to the InMemoryModuleCache if it could be // read again later, but we do not have the context here to determine if it diff --git a/clang/lib/Serialization/InMemoryModuleCache.cpp b/clang/lib/Serialization/InMemoryModuleCache.cpp index d35fa2a807f4d..dcd6395434c16 100644 --- a/clang/lib/Serialization/InMemoryModuleCache.cpp +++ b/clang/lib/Serialization/InMemoryModuleCache.cpp @@ -23,28 +23,36 @@ InMemoryModuleCache::getPCMState(llvm::StringRef Filename) const { llvm::MemoryBuffer & InMemoryModuleCache::addPCM(llvm::StringRef Filename, - std::unique_ptr<llvm::MemoryBuffer> Buffer) { - auto Insertion = PCMs.insert(std::make_pair(Filename, std::move(Buffer))); + std::unique_ptr<llvm::MemoryBuffer> Buffer, + off_t Size, time_t ModTime) { + auto Insertion = PCMs.insert( + std::make_pair(Filename, PCM(std::move(Buffer), Size, ModTime))); assert(Insertion.second && "Already has a PCM"); return *Insertion.first->second.Buffer; } llvm::MemoryBuffer & InMemoryModuleCache::addBuiltPCM(llvm::StringRef Filename, - std::unique_ptr<llvm::MemoryBuffer> Buffer) { + std::unique_ptr<llvm::MemoryBuffer> Buffer, + off_t Size, time_t ModTime) { auto &PCM = PCMs[Filename]; assert(!PCM.IsFinal && "Trying to override finalized PCM?"); assert(!PCM.Buffer && "Trying to override tentative PCM?"); PCM.Buffer = std::move(Buffer); + PCM.Size = Size; + PCM.ModTime = ModTime; PCM.IsFinal = true; return *PCM.Buffer; } -llvm::MemoryBuffer * -InMemoryModuleCache::lookupPCM(llvm::StringRef Filename) const { +llvm::MemoryBuffer *InMemoryModuleCache::lookupPCM(llvm::StringRef Filename, + off_t &Size, + time_t &ModTime) const { auto I = PCMs.find(Filename); if (I == PCMs.end()) return nullptr; + Size = I->second.Size; + ModTime = I->second.ModTime; return I->second.Buffer.get(); } diff --git a/clang/lib/Serialization/ModuleCache.cpp b/clang/lib/Serialization/ModuleCache.cpp index e82875f6f2831..36ae19aff466b 100644 --- a/clang/lib/Serialization/ModuleCache.cpp +++ b/clang/lib/Serialization/ModuleCache.cpp @@ -112,7 +112,8 @@ void clang::maybePruneImpl(StringRef Path, time_t PruneInterval, } } -std::error_code clang::writeImpl(StringRef Path, llvm::MemoryBufferRef Buffer) { +std::error_code clang::writeImpl(StringRef Path, llvm::MemoryBufferRef Buffer, + off_t &Size, time_t &ModTime) { StringRef Extension = llvm::sys::path::extension(Path); SmallString<128> ModelPath = StringRef(Path).drop_back(Extension.size()); ModelPath += "-%%%%%%%%"; @@ -134,11 +135,19 @@ std::error_code clang::writeImpl(StringRef Path, llvm::MemoryBufferRef Buffer) { return EC; } + llvm::sys::fs::file_status Status; { llvm::raw_fd_ostream OS(FD, /*shouldClose=*/true); OS << Buffer.getBuffer(); + // Using the status from an open file descriptor ensures this is not racy. + if ((EC = llvm::sys::fs::status(FD, Status))) + return EC; } + Size = Status.getSize(); + ModTime = llvm::sys::toTimeT(Status.getLastModificationTime()); + + // This preserves both size and modification time. if ((EC = llvm::sys::fs::rename(TmpPath, Path))) return EC; @@ -225,11 +234,12 @@ class CrossProcessModuleCache : public ModuleCache { return InMemory; } - std::error_code write(StringRef Path, llvm::MemoryBufferRef Buffer) override { + std::error_code write(StringRef Path, llvm::MemoryBufferRef Buffer, + off_t &Size, time_t &ModTime) override { // This is a compiler-internal input/output, let's bypass the sandbox. auto BypassSandbox = llvm::sys::sandbox::scopedDisable(); - return writeImpl(Path, Buffer); + return writeImpl(Path, Buffer, Size, ModTime); } Expected<std::unique_ptr<llvm::MemoryBuffer>> diff --git a/clang/lib/Serialization/ModuleManager.cpp b/clang/lib/Serialization/ModuleManager.cpp index 022e2ef42f635..b7d0ee85bc05e 100644 --- a/clang/lib/Serialization/ModuleManager.cpp +++ b/clang/lib/Serialization/ModuleManager.cpp @@ -59,11 +59,13 @@ ModuleFile *ModuleManager::lookup(ModuleFileKey Key) const { } std::unique_ptr<llvm::MemoryBuffer> -ModuleManager::lookupBuffer(StringRef Name) { +ModuleManager::lookupBuffer(StringRef Name, off_t &Size, time_t &ModTime) { auto Entry = FileMgr.getOptionalFileRef(Name, /*OpenFile=*/false, /*CacheFailure=*/false); if (!Entry) return nullptr; + Size = Entry->getSize(); + ModTime = Entry->getModificationTime(); return std::move(InMemoryBuffers[*Entry]); } @@ -157,32 +159,15 @@ ModuleManager::AddModuleResult ModuleManager::addModule( time_t ModTime = ExpectedModTime; llvm::MemoryBuffer *ModuleBuffer = nullptr; std::unique_ptr<llvm::MemoryBuffer> NewFileBuffer = nullptr; - if (std::unique_ptr<llvm::MemoryBuffer> Buffer = lookupBuffer(FileName)) { + if (std::unique_ptr<llvm::MemoryBuffer> Buffer = + lookupBuffer(FileName, Size, ModTime)) { // The buffer was already provided for us. ModuleBuffer = &getModuleCache().getInMemoryModuleCache().addBuiltPCM( - FileName, std::move(Buffer)); + FileName, std::move(Buffer), Size, ModTime); } else if (llvm::MemoryBuffer *Buffer = getModuleCache().getInMemoryModuleCache().lookupPCM( - FileName)) { + FileName, Size, ModTime)) { ModuleBuffer = Buffer; - if (!FileName.getImplicitModuleSuffixLength()) { - // Explicitly-built PCM files maintain consistency via mtime/size - // expectations on their imports. Even if we've previously successfully - // loaded a PCM file and stored it in the in-memory module cache, that - // does not mean its mtime/size matches current importer's expectations. - // Get that information so that it can be checked below. - // FIXME: Even though this FileManager access is likely already cached, we - // should store this directly in the in-memory module cache. - OptionalFileEntryRef Entry = - FileMgr.getOptionalFileRef(FileName, /*OpenFile=*/true, - /*CacheFailure=*/false); - if (!Entry) { - ErrorStr = "module file not found"; - return Missing; - } - ModTime = Entry->getModificationTime(); - Size = Entry->getSize(); - } } else if (getModuleCache().getInMemoryModuleCache().shouldBuildPCM( FileName)) { // Report that the module is out of date, since we tried (and failed) to @@ -246,8 +231,8 @@ ModuleManager::AddModuleResult ModuleManager::addModule( return OutOfDate; if (NewFileBuffer) - getModuleCache().getInMemoryModuleCache().addPCM(FileName, - std::move(NewFileBuffer)); + getModuleCache().getInMemoryModuleCache().addPCM( + FileName, std::move(NewFileBuffer), Size, ModTime); // We're keeping this module. Store it in the map. Module = Modules[*FileKey] = NewModule.get(); diff --git a/clang/unittests/Serialization/InMemoryModuleCacheTest.cpp b/clang/unittests/Serialization/InMemoryModuleCacheTest.cpp index ed5e1538eba74..f0cfa2f8f0c3d 100644 --- a/clang/unittests/Serialization/InMemoryModuleCacheTest.cpp +++ b/clang/unittests/Serialization/InMemoryModuleCacheTest.cpp @@ -39,15 +39,17 @@ TEST(InMemoryModuleCacheTest, addPCM) { auto *RawB = B.get(); InMemoryModuleCache Cache; - EXPECT_EQ(RawB, &Cache.addPCM("B", std::move(B))); + EXPECT_EQ(RawB, &Cache.addPCM("B", std::move(B), 0, 0)); EXPECT_EQ(InMemoryModuleCache::Tentative, Cache.getPCMState("B")); - EXPECT_EQ(RawB, Cache.lookupPCM("B")); + off_t Size; + time_t ModTime; + EXPECT_EQ(RawB, Cache.lookupPCM("B", Size, ModTime)); EXPECT_FALSE(Cache.isPCMFinal("B")); EXPECT_FALSE(Cache.shouldBuildPCM("B")); #if !defined(NDEBUG) && GTEST_HAS_DEATH_TEST - EXPECT_DEATH(Cache.addPCM("B", getBuffer(2)), "Already has a PCM"); - EXPECT_DEATH(Cache.addBuiltPCM("B", getBuffer(2)), + EXPECT_DEATH(Cache.addPCM("B", getBuffer(2), 0, 0), "Already has a PCM"); + EXPECT_DEATH(Cache.addBuiltPCM("B", getBuffer(2), 0, 0), "Trying to override tentative PCM"); #endif } @@ -57,15 +59,17 @@ TEST(InMemoryModuleCacheTest, addBuiltPCM) { auto *RawB = B.get(); InMemoryModuleCache Cache; - EXPECT_EQ(RawB, &Cache.addBuiltPCM("B", std::move(B))); + EXPECT_EQ(RawB, &Cache.addBuiltPCM("B", std::move(B), 0, 0)); EXPECT_EQ(InMemoryModuleCache::Final, Cache.getPCMState("B")); - EXPECT_EQ(RawB, Cache.lookupPCM("B")); + off_t Size; + time_t ModTime; + EXPECT_EQ(RawB, Cache.lookupPCM("B", Size, ModTime)); EXPECT_TRUE(Cache.isPCMFinal("B")); EXPECT_FALSE(Cache.shouldBuildPCM("B")); #if !defined(NDEBUG) && GTEST_HAS_DEATH_TEST - EXPECT_DEATH(Cache.addPCM("B", getBuffer(2)), "Already has a PCM"); - EXPECT_DEATH(Cache.addBuiltPCM("B", getBuffer(2)), + EXPECT_DEATH(Cache.addPCM("B", getBuffer(2), 0, 0), "Already has a PCM"); + EXPECT_DEATH(Cache.addBuiltPCM("B", getBuffer(2), 0, 0), "Trying to override finalized PCM"); #endif } @@ -79,27 +83,31 @@ TEST(InMemoryModuleCacheTest, tryToDropPCM) { InMemoryModuleCache Cache; EXPECT_EQ(InMemoryModuleCache::Unknown, Cache.getPCMState("B")); - EXPECT_EQ(RawB1, &Cache.addPCM("B", std::move(B1))); + EXPECT_EQ(RawB1, &Cache.addPCM("B", std::move(B1), 0, 0)); EXPECT_FALSE(Cache.tryToDropPCM("B")); - EXPECT_EQ(nullptr, Cache.lookupPCM("B")); + off_t Size1; + time_t ModTime1; + EXPECT_EQ(nullptr, Cache.lookupPCM("B", Size1, ModTime1)); EXPECT_EQ(InMemoryModuleCache::ToBuild, Cache.getPCMState("B")); EXPECT_FALSE(Cache.isPCMFinal("B")); EXPECT_TRUE(Cache.shouldBuildPCM("B")); #if !defined(NDEBUG) && GTEST_HAS_DEATH_TEST - EXPECT_DEATH(Cache.addPCM("B", getBuffer(2)), "Already has a PCM"); + EXPECT_DEATH(Cache.addPCM("B", getBuffer(2), 0, 0), "Already has a PCM"); EXPECT_DEATH(Cache.tryToDropPCM("B"), "PCM to remove is scheduled to be built"); EXPECT_DEATH(Cache.finalizePCM("B"), "Trying to finalize a dropped PCM"); #endif // Add a new one. - EXPECT_EQ(RawB2, &Cache.addBuiltPCM("B", std::move(B2))); + EXPECT_EQ(RawB2, &Cache.addBuiltPCM("B", std::move(B2), 0, 0)); EXPECT_TRUE(Cache.isPCMFinal("B")); // Can try to drop again, but this should error and do nothing. EXPECT_TRUE(Cache.tryToDropPCM("B")); - EXPECT_EQ(RawB2, Cache.lookupPCM("B")); + off_t Size2; + time_t ModTime2; + EXPECT_EQ(RawB2, Cache.lookupPCM("B", Size2, ModTime2)); } TEST(InMemoryModuleCacheTest, finalizePCM) { @@ -108,7 +116,7 @@ TEST(InMemoryModuleCacheTest, finalizePCM) { InMemoryModuleCache Cache; EXPECT_EQ(InMemoryModuleCache::Unknown, Cache.getPCMState("B")); - EXPECT_EQ(RawB, &Cache.addPCM("B", std::move(B))); + EXPECT_EQ(RawB, &Cache.addPCM("B", std::move(B), 0, 0)); // Call finalize. Cache.finalizePCM("B"); >From 407839708cca837c97e39b108bb8363433453cc0 Mon Sep 17 00:00:00 2001 From: Jan Svoboda <[email protected]> Date: Mon, 6 Apr 2026 07:52:14 -0700 Subject: [PATCH 2/2] WIP --- clang/include/clang/Basic/Module.h | 127 ++++++++++++------ clang/include/clang/Serialization/ASTReader.h | 6 - .../clang/Serialization/ModuleManager.h | 13 -- clang/lib/Basic/Module.cpp | 39 +++--- clang/lib/Frontend/ASTUnit.cpp | 5 +- clang/lib/Frontend/ChainedIncludesSource.cpp | 9 +- clang/lib/Serialization/ASTReader.cpp | 19 +-- clang/lib/Serialization/ASTWriter.cpp | 9 +- clang/lib/Serialization/ModuleManager.cpp | 84 +++++------- 9 files changed, 171 insertions(+), 140 deletions(-) diff --git a/clang/include/clang/Basic/Module.h b/clang/include/clang/Basic/Module.h index 70668860dadc2..87c167d547762 100644 --- a/clang/include/clang/Basic/Module.h +++ b/clang/include/clang/Basic/Module.h @@ -67,26 +67,54 @@ using ModuleId = SmallVector<std::pair<std::string, SourceLocation>, 2>; /// module file path. Because input explicitly-built modules do not change /// during the lifetime of the compiler, inode recycling is not of concern here. class ModuleFileKey { - /// The FileManager entity used for deduplication. - const void *Ptr; - /// The path relative to the module cache path for implicit module file, empty - /// for other kinds of module files. - std::string ImplicitModulePathSuffix; + struct Empty { + bool operator==(const Empty &) const { return true; } + }; - friend class ModuleFileName; - friend llvm::DenseMapInfo<ModuleFileKey>; + struct Tombstone { + bool operator==(const Tombstone &) const { return true; } + }; + + struct InMemory { + std::string Identifier; + + bool operator==(const InMemory &Other) const { + return Identifier == Other.Identifier; + } + }; + + struct OnDisk { + const FileEntry *Entry; + + bool operator==(const OnDisk &Other) const { + return Entry == Other.Entry; + } + }; - ModuleFileKey(const void *Ptr) : Ptr(Ptr) {} + struct InModuleCache { + llvm::sys::fs::UniqueID ModuleCacheUID; + std::string RelativePath; - ModuleFileKey(const FileEntry *ModuleFile) : Ptr(ModuleFile) {} + bool operator==(const InModuleCache &Other) const { + return ModuleCacheUID == Other.ModuleCacheUID && + RelativePath == Other.RelativePath; + } + }; + + std::variant<Empty, Tombstone, InMemory, OnDisk, InModuleCache> Location; + + friend class ModuleFileName; + friend llvm::DenseMapInfo<ModuleFileKey>; - ModuleFileKey(const DirectoryEntry *ModuleCacheDir, StringRef PathSuffix) - : Ptr(ModuleCacheDir), ImplicitModulePathSuffix(PathSuffix) {} + ModuleFileKey(Empty) : Location(Empty{}) {} + ModuleFileKey(Tombstone) : Location(Tombstone{}) {} + ModuleFileKey(InMemory IM) : Location(std::move(IM)) {} + ModuleFileKey(OnDisk OD) : Location(std::move(OD)) {} + ModuleFileKey(InModuleCache IMC) : Location(std::move(IMC)) {} public: bool operator==(const ModuleFileKey &Other) const { - return Ptr == Other.Ptr && - ImplicitModulePathSuffix == Other.ImplicitModulePathSuffix; + return Location == Other.Location; } bool operator!=(const ModuleFileKey &Other) const { @@ -100,44 +128,46 @@ class ModuleFileKey { /// path and the module file name with the (optional) context hash. For all /// other types of module files, this is just the file system path. class ModuleFileName { +public: + struct InMemory {}; + struct OnDisk {}; + struct InModuleCache { + unsigned ImplicitModuleSuffixLength = 0; + }; + +private: std::string Path; - unsigned ImplicitModuleSuffixLength = 0; + std::variant<InMemory, OnDisk, InModuleCache> Location; public: /// Creates an empty module file name. ModuleFileName() = default; - /// Creates a file name for an explicit module. - static ModuleFileName makeExplicit(std::string Name) { - ModuleFileName File; - File.Path = std::move(Name); - return File; - } - - /// Creates a file name for an explicit module. - static ModuleFileName makeExplicit(StringRef Name) { - return makeExplicit(Name.str()); + static ModuleFileName makeInMemory(StringRef Path) { + ModuleFileName Result; + Result.Path = Path; + Result.Location = InMemory{}; + return Result; } - /// Creates a file name for an implicit module. - static ModuleFileName makeImplicit(std::string Name, unsigned SuffixLength) { - assert(SuffixLength != 0 && "Empty suffix for implicit module file name"); - assert(SuffixLength <= Name.size() && - "Suffix for implicit module file name out-of-bounds"); - ModuleFileName File; - File.Path = std::move(Name); - File.ImplicitModuleSuffixLength = SuffixLength; - return File; + static ModuleFileName makeExplicit(StringRef Path) { + ModuleFileName Result; + Result.Path = Path; + Result.Location = OnDisk{}; + return Result; } - /// Creates a file name for an implicit module. - static ModuleFileName makeImplicit(StringRef Name, unsigned SuffixLength) { - return makeImplicit(Name.str(), SuffixLength); + static ModuleFileName makeImplicit(StringRef Path, unsigned SuffixLength) { + ModuleFileName Result; + Result.Path = Path; + Result.Location = InModuleCache{SuffixLength}; + return Result; } /// Returns the suffix length for an implicit module name, zero otherwise. - unsigned getImplicitModuleSuffixLength() const { - return ImplicitModuleSuffixLength; + template <class FnTy> + auto visitLocation(FnTy &&Fn) const { + return std::visit(std::forward<FnTy>(Fn), Location); } /// Returns the plain module file name. @@ -1042,15 +1072,30 @@ class VisibleModuleSet { template <> struct llvm::DenseMapInfo<clang::ModuleFileKey> { static clang::ModuleFileKey getEmptyKey() { - return DenseMapInfo<const void *>::getEmptyKey(); + return clang::ModuleFileKey::Empty{}; } static clang::ModuleFileKey getTombstoneKey() { - return DenseMapInfo<const void *>::getTombstoneKey(); + return clang::ModuleFileKey::Empty{}; } static unsigned getHashValue(const clang::ModuleFileKey &Val) { - return hash_combine(Val.Ptr, Val.ImplicitModulePathSuffix); + return std::visit( + makeVisitor( + [](clang::ModuleFileKey::Empty) { return hash_value(0); }, + [](clang::ModuleFileKey::Tombstone) { return hash_value(1); }, + [](const clang::ModuleFileKey::InMemory &IM) { + return hash_combine(2, IM.Identifier); + }, + [](const clang::ModuleFileKey::OnDisk &OD) { + return hash_combine(3, OD.Entry); + }, + [](const clang::ModuleFileKey::InModuleCache &IMC) { + return hash_combine(4, IMC.ModuleCacheUID.getDevice(), + IMC.ModuleCacheUID.getFile(), + IMC.RelativePath); + }), + Val.Location); } static bool isEqual(const clang::ModuleFileKey &LHS, diff --git a/clang/include/clang/Serialization/ASTReader.h b/clang/include/clang/Serialization/ASTReader.h index e66fd3d1eccb7..97fa04e95a8f6 100644 --- a/clang/include/clang/Serialization/ASTReader.h +++ b/clang/include/clang/Serialization/ASTReader.h @@ -1994,12 +1994,6 @@ class ASTReader /// Update the state of Sema after loading some additional modules. void UpdateSema(); - /// Add in-memory (virtual file) buffer. - void addInMemoryBuffer(StringRef &FileName, - std::unique_ptr<llvm::MemoryBuffer> Buffer) { - ModuleMgr.addInMemoryBuffer(FileName, std::move(Buffer)); - } - /// Finalizes the AST reader's state before writing an AST file to /// disk. /// diff --git a/clang/include/clang/Serialization/ModuleManager.h b/clang/include/clang/Serialization/ModuleManager.h index 80f43ea922a17..55d61b8e760e6 100644 --- a/clang/include/clang/Serialization/ModuleManager.h +++ b/clang/include/clang/Serialization/ModuleManager.h @@ -73,11 +73,6 @@ class ModuleManager { /// Preprocessor's HeaderSearchInfo containing the module map. const HeaderSearch &HeaderSearchInfo; - /// A lookup of in-memory (virtual file) buffers. - // FIXME: No need to key this by `FileEntry`. - llvm::DenseMap<const FileEntry *, std::unique_ptr<llvm::MemoryBuffer>> - InMemoryBuffers; - /// The visitation order. SmallVector<ModuleFile *, 4> VisitOrder; @@ -182,10 +177,6 @@ class ModuleManager { /// Returns the module associated with the given module file key. ModuleFile *lookup(ModuleFileKey Key) const; - /// Returns the in-memory (virtual file) buffer with the given name - std::unique_ptr<llvm::MemoryBuffer> lookupBuffer(StringRef Name, off_t &Size, - time_t &ModTime); - /// Number of modules loaded unsigned size() const { return Chain.size(); } @@ -251,10 +242,6 @@ class ModuleManager { /// Remove the modules starting from First (to the end). void removeModules(ModuleIterator First); - /// Add an in-memory buffer the list of known buffers - void addInMemoryBuffer(StringRef FileName, - std::unique_ptr<llvm::MemoryBuffer> Buffer); - /// Set the global module index. void setGlobalIndex(GlobalModuleIndex *Index); diff --git a/clang/lib/Basic/Module.cpp b/clang/lib/Basic/Module.cpp index 81e28e46d36ca..ccc35db618c9b 100644 --- a/clang/lib/Basic/Module.cpp +++ b/clang/lib/Basic/Module.cpp @@ -24,6 +24,7 @@ #include "llvm/ADT/StringSwitch.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/IOSandbox.h" #include "llvm/Support/raw_ostream.h" #include <cassert> #include <functional> @@ -35,21 +36,29 @@ using namespace clang; std::optional<ModuleFileKey> ModuleFileName::makeKey(FileManager &FileMgr) const { - if (ImplicitModuleSuffixLength) { - StringRef ModuleCachePath = - StringRef(Path).drop_back(ImplicitModuleSuffixLength); - StringRef ImplicitModuleSuffix = - StringRef(Path).take_back(ImplicitModuleSuffixLength); - if (auto ModuleCache = FileMgr.getOptionalDirectoryRef( - ModuleCachePath, /*CacheFailure=*/false)) - return ModuleFileKey(*ModuleCache, ImplicitModuleSuffix); - } else { - if (auto ModuleFile = FileMgr.getOptionalFileRef(Path, /*OpenFile=*/true, - /*CacheFailure=*/false)) - return ModuleFileKey(*ModuleFile); - } - - return std::nullopt; + return std::visit(llvm::makeVisitor( + [&](InMemory) -> std::optional<ModuleFileKey> { + return ModuleFileKey(ModuleFileKey::InMemory{Path}); + }, + [&](OnDisk) -> std::optional<ModuleFileKey> { + if (auto ModuleFile = + FileMgr.getOptionalFileRef(Path, /*OpenFile=*/true, + /*CacheFailure=*/false)) + return ModuleFileKey(ModuleFileKey::OnDisk{*ModuleFile}); + return std::nullopt; + }, + [&](const InModuleCache &IMC) -> std::optional<ModuleFileKey> { + StringRef ModuleCachePath = + StringRef(Path).drop_back(IMC.ImplicitModuleSuffixLength); + StringRef ImplicitModuleSuffix = + StringRef(Path).take_back(IMC.ImplicitModuleSuffixLength); + auto BypassSandbox = llvm::sys::sandbox::scopedDisable(); + llvm::sys::fs::file_status Status; + if (!llvm::sys::fs::status(ModuleCachePath, Status)) + return ModuleFileKey(ModuleFileKey::InModuleCache{ + Status.getUniqueID(), ImplicitModuleSuffix.str()}); + return std::nullopt; + }), Location); } Module::Module(ModuleConstructorTag, StringRef Name, diff --git a/clang/lib/Frontend/ASTUnit.cpp b/clang/lib/Frontend/ASTUnit.cpp index 05ae1f348f920..e437b07699e85 100644 --- a/clang/lib/Frontend/ASTUnit.cpp +++ b/clang/lib/Frontend/ASTUnit.cpp @@ -60,6 +60,7 @@ #include "clang/Sema/SemaCodeCompletion.h" #include "clang/Serialization/ASTReader.h" #include "clang/Serialization/ASTWriter.h" +#include "clang/Serialization/InMemoryModuleCache.h" #include "clang/Serialization/ModuleCache.h" #include "clang/Serialization/ModuleFile.h" #include "clang/Serialization/PCHContainerOperations.h" @@ -828,7 +829,9 @@ std::unique_ptr<ASTUnit> ASTUnit::LoadFromASTFile( if (auto BufRef = TmpFileMgr.getBufferForFile(*FE)) { auto Buf = llvm::MemoryBuffer::getMemBufferCopy( (*BufRef)->getBuffer(), (*BufRef)->getBufferIdentifier()); - AST->Reader->getModuleManager().addInMemoryBuffer("-", std::move(Buf)); + off_t BufSize = Buf->getBufferSize(); + AST->ModCache->getInMemoryModuleCache().addBuiltPCM( + "-", std::move(Buf), BufSize, /*ModTime=*/0); } // Reinstate the provided options that are relevant for reading AST files. diff --git a/clang/lib/Frontend/ChainedIncludesSource.cpp b/clang/lib/Frontend/ChainedIncludesSource.cpp index aa93785322a5c..7a552a572ca78 100644 --- a/clang/lib/Frontend/ChainedIncludesSource.cpp +++ b/clang/lib/Frontend/ChainedIncludesSource.cpp @@ -22,6 +22,8 @@ #include "clang/Sema/MultiplexExternalSemaSource.h" #include "clang/Serialization/ASTReader.h" #include "clang/Serialization/ASTWriter.h" +#include "clang/Serialization/InMemoryModuleCache.h" +#include "clang/Serialization/ModuleCache.h" #include "llvm/Support/MemoryBuffer.h" using namespace clang; @@ -65,11 +67,12 @@ createASTReader(CompilerInstance &CI, StringRef pchFile, /*Extensions=*/ArrayRef<std::shared_ptr<ModuleFileExtension>>(), /*isysroot=*/"", DisableValidationForModuleKind::PCH); for (unsigned ti = 0; ti < bufNames.size(); ++ti) { - StringRef sr(bufNames[ti]); - Reader->addInMemoryBuffer(sr, std::move(MemBufs[ti])); + off_t MemBufSize = MemBufs[ti]->getBufferSize(); + CI.getModuleCache().getInMemoryModuleCache().addBuiltPCM( + bufNames[ti], std::move(MemBufs[ti]), MemBufSize, /*ModTime=*/0); } Reader->setDeserializationListener(deserialListener); - switch (Reader->ReadAST(ModuleFileName::makeExplicit(pchFile), + switch (Reader->ReadAST(ModuleFileName::makeInMemory(pchFile), serialization::MK_PCH, SourceLocation(), ASTReader::ARR_None)) { case ASTReader::Success: diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp index 45a0feb99f54f..90ff72d570470 100644 --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -3477,7 +3477,7 @@ ASTReader::ReadControlBlock(ModuleFile &F, off_t StoredSize = 0; time_t StoredModTime = 0; - unsigned ImplicitModuleSuffixLength = 0; + unsigned FileNameKind = 0; ASTFileSignature StoredSignature; ModuleFileName ImportedFile; std::string StoredFile; @@ -3501,7 +3501,7 @@ ASTReader::ReadControlBlock(ModuleFile &F, if (!IsImportingStdCXXModule) { StoredSize = (off_t)Record[Idx++]; StoredModTime = (time_t)Record[Idx++]; - ImplicitModuleSuffixLength = (unsigned)Record[Idx++]; + FileNameKind = (unsigned)Record[Idx++]; StringRef SignatureBytes = Blob.substr(0, ASTFileSignature::size); StoredSignature = ASTFileSignature::create(SignatureBytes.begin(), @@ -3510,12 +3510,13 @@ ASTReader::ReadControlBlock(ModuleFile &F, StoredFile = ReadPathBlob(BaseDirectoryAsWritten, Record, Idx, Blob); if (ImportedFile.empty()) { - ImportedFile = ImplicitModuleSuffixLength - ? ModuleFileName::makeImplicit( - StoredFile, ImplicitModuleSuffixLength) - : ModuleFileName::makeExplicit(StoredFile); - assert((ImportedKind == MK_ImplicitModule) == - (ImplicitModuleSuffixLength != 0)); + ImportedFile = [&]() { + if (FileNameKind == 0) + return ModuleFileName::makeInMemory(StoredFile); + if (FileNameKind == 1) + return ModuleFileName::makeExplicit(StoredFile); + return ModuleFileName::makeImplicit(StoredFile, FileNameKind); + }(); } else if (!getDiags().isIgnored( diag::warn_module_file_mapping_mismatch, CurrentImportLoc)) { @@ -4619,6 +4620,8 @@ void ASTReader::ReadModuleOffsetMap(ModuleFile &F) const { Kind == MK_ImplicitModule ? ModuleMgr.lookupByModuleName(Name) : ModuleMgr.lookupByFileName(ModuleFileName::makeExplicit(Name))); + if (!OM) + OM = ModuleMgr.lookupByFileName(ModuleFileName::makeInMemory(Name)); if (!OM) { std::string Msg = "refers to unknown module, cannot find "; Msg.append(std::string(Name)); diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp index 4b3adce07f10c..3bd111f377275 100644 --- a/clang/lib/Serialization/ASTWriter.cpp +++ b/clang/lib/Serialization/ASTWriter.cpp @@ -1568,7 +1568,7 @@ void ASTWriter::WriteControlBlock(Preprocessor &PP, StringRef isysroot) { Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // Standard C++ mod Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // File size Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // File timestamp - Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Implicit suff len + Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // File name kind Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // File name len Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // Strings unsigned AbbrevCode = Stream.EmitAbbrev(std::move(Abbrev)); @@ -1604,7 +1604,12 @@ void ASTWriter::WriteControlBlock(Preprocessor &PP, StringRef isysroot) { llvm::append_range(Blob, M.Signature); - Record.push_back(M.FileName.getImplicitModuleSuffixLength()); + Record.push_back(M.FileName.visitLocation( + makeVisitor([](ModuleFileName::InMemory) { return 0u; }, + [](ModuleFileName::OnDisk) { return 1u; }, + [](const ModuleFileName::InModuleCache &IMC) { + return IMC.ImplicitModuleSuffixLength; + }))); AddPathBlob(M.FileName, Record, Blob); } diff --git a/clang/lib/Serialization/ModuleManager.cpp b/clang/lib/Serialization/ModuleManager.cpp index b7d0ee85bc05e..8012879132e4b 100644 --- a/clang/lib/Serialization/ModuleManager.cpp +++ b/clang/lib/Serialization/ModuleManager.cpp @@ -58,17 +58,6 @@ ModuleFile *ModuleManager::lookup(ModuleFileKey Key) const { return Modules.lookup(Key); } -std::unique_ptr<llvm::MemoryBuffer> -ModuleManager::lookupBuffer(StringRef Name, off_t &Size, time_t &ModTime) { - auto Entry = FileMgr.getOptionalFileRef(Name, /*OpenFile=*/false, - /*CacheFailure=*/false); - if (!Entry) - return nullptr; - Size = Entry->getSize(); - ModTime = Entry->getModificationTime(); - return std::move(InMemoryBuffers[*Entry]); -} - static bool checkModuleFile(off_t Size, time_t ModTime, off_t ExpectedSize, time_t ExpectedModTime, std::string &ErrorStr) { if (ExpectedSize && ExpectedSize != Size) { @@ -159,14 +148,9 @@ ModuleManager::AddModuleResult ModuleManager::addModule( time_t ModTime = ExpectedModTime; llvm::MemoryBuffer *ModuleBuffer = nullptr; std::unique_ptr<llvm::MemoryBuffer> NewFileBuffer = nullptr; - if (std::unique_ptr<llvm::MemoryBuffer> Buffer = - lookupBuffer(FileName, Size, ModTime)) { - // The buffer was already provided for us. - ModuleBuffer = &getModuleCache().getInMemoryModuleCache().addBuiltPCM( - FileName, std::move(Buffer), Size, ModTime); - } else if (llvm::MemoryBuffer *Buffer = - getModuleCache().getInMemoryModuleCache().lookupPCM( - FileName, Size, ModTime)) { + if (llvm::MemoryBuffer *Buffer = + getModuleCache().getInMemoryModuleCache().lookupPCM(FileName, Size, + ModTime)) { ModuleBuffer = Buffer; } else if (getModuleCache().getInMemoryModuleCache().shouldBuildPCM( FileName)) { @@ -174,30 +158,36 @@ ModuleManager::AddModuleResult ModuleManager::addModule( // import it earlier. return OutOfDate; } else { - auto Buf = [&]() -> Expected<std::unique_ptr<llvm::MemoryBuffer>> { - // Implicit modules live in the module cache. - if (FileName.getImplicitModuleSuffixLength()) - return ModCache.read(FileName, Size, ModTime); - - // Explicit modules are treated as any other compiler input file, load - // them via FileManager. - Expected<FileEntryRef> Entry = - FileName == StringRef("-") - ? FileMgr.getSTDIN() - : FileMgr.getFileRef(FileName, /*OpenFile=*/true, - /*CacheFailure=*/false); - if (!Entry) - return Entry.takeError(); - - Size = Entry->getSize(); - ModTime = Entry->getModificationTime(); - - // RequiresNullTerminator is false because module files don't need it, and - // this allows the file to still be mmapped. - return llvm::errorOrToExpected( - FileMgr.getBufferForFile(*Entry, /*IsVolatile=*/false, - /*RequiresNullTerminator=*/false)); - }(); + auto Buf = FileName.visitLocation(llvm::makeVisitor( + [](ModuleFileName::InMemory) + -> Expected<std::unique_ptr<llvm::MemoryBuffer>> { + llvm_unreachable("In-memory module file name not found"); + }, + [&](ModuleFileName::OnDisk) + -> Expected<std::unique_ptr<llvm::MemoryBuffer>> { + // Explicit modules are treated as any other compiler input file, load + // them via FileManager. + Expected<FileEntryRef> Entry = + FileName == StringRef("-") + ? FileMgr.getSTDIN() + : FileMgr.getFileRef(FileName, /*OpenFile=*/true, + /*CacheFailure=*/false); + if (!Entry) + return Entry.takeError(); + + Size = Entry->getSize(); + ModTime = Entry->getModificationTime(); + + // RequiresNullTerminator is false because module files don't need + // it, and this allows the file to still be mmapped. + return llvm::errorOrToExpected( + FileMgr.getBufferForFile(*Entry, /*IsVolatile=*/false, + /*RequiresNullTerminator=*/false)); + }, + [&](ModuleFileName::InModuleCache) + -> Expected<std::unique_ptr<llvm::MemoryBuffer>> { + return ModCache.read(FileName, Size, ModTime); + })); if (!Buf) { ErrorStr = llvm::toString(Buf.takeError()); @@ -286,14 +276,6 @@ void ModuleManager::removeModules(ModuleIterator First) { Chain.erase(Chain.begin() + (First - begin()), Chain.end()); } -void -ModuleManager::addInMemoryBuffer(StringRef FileName, - std::unique_ptr<llvm::MemoryBuffer> Buffer) { - FileEntryRef Entry = - FileMgr.getVirtualFileRef(FileName, Buffer->getBufferSize(), 0); - InMemoryBuffers[Entry] = std::move(Buffer); -} - std::unique_ptr<ModuleManager::VisitState> ModuleManager::allocateVisitState() { // Fast path: if we have a cached state, use it. if (FirstVisitState) { _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
