friss created this revision. friss added reviewers: jansvoboda11, bnbarham, arphaman. Herald added a subscriber: hiraditya. Herald added a project: All. friss requested review of this revision. Herald added projects: clang, LLVM. Herald added subscribers: llvm-commits, cfe-commits.
Every Clang instance uses an internal FileSystemStatCache to avoid stating the same content multiple times. However, different instances of Clang will contend for filesystem access for their initial stats during HeaderSearch or module validation. On some workloads, the time spent in the kernel in these concurrent stat calls has been measured to be over 20% of the overall compilation time. This is extremly wassteful when most of the stat calls target mostly immutable content like a SDK. This commit introduces a new tool `clang-stat-cache` able to generate an OnDiskHashmap containing the stat data for a given filesystem hierarchy. The driver part of this has been modeled after -ivfsoverlay given the similarities with what it influences. It introduces a new -ivfsstatcache driver option to instruct Clang to use a stat cache generated by `clang-stat-cache`. These stat caches are inserted at the bottom of the VFS stack (right above the real filesystem). Repository: rG LLVM Github Monorepo https://reviews.llvm.org/D136651 Files: clang/include/clang/Basic/DiagnosticFrontendKinds.td clang/include/clang/Driver/Options.td clang/include/clang/Lex/HeaderSearchOptions.h clang/lib/Frontend/CompilerInvocation.cpp clang/test/Driver/vfsstatcache.c clang/test/clang-stat-cache/cache-effects.c clang/test/clang-stat-cache/errors.test clang/tools/CMakeLists.txt clang/tools/clang-stat-cache/CMakeLists.txt clang/tools/clang-stat-cache/clang-stat-cache.cpp llvm/include/llvm/Support/VirtualFileSystem.h llvm/lib/Support/VirtualFileSystem.cpp llvm/unittests/Support/VirtualFileSystemTest.cpp
Index: llvm/unittests/Support/VirtualFileSystemTest.cpp =================================================================== --- llvm/unittests/Support/VirtualFileSystemTest.cpp +++ llvm/unittests/Support/VirtualFileSystemTest.cpp @@ -17,6 +17,7 @@ #include "llvm/Testing/Support/SupportHelpers.h" #include "gmock/gmock.h" #include "gtest/gtest.h" +#include <list> #include <map> #include <string> @@ -3159,3 +3160,300 @@ " DummyFileSystem (RecursiveContents)\n", Output); } + +class StatCacheFileSystemTest : public ::testing::Test { +public: + void SetUp() override {} + + template <typename StringCollection> + void createStatCacheFileSystem( + StringRef OutputFile, StringRef BaseDir, bool IsCaseSensitive, + IntrusiveRefCntPtr<vfs::StatCacheFileSystem> &Result, + StringCollection &Filenames, + IntrusiveRefCntPtr<vfs::FileSystem> Lower = new ErrorDummyFileSystem(), + uint64_t ValidityToken = 0) { + vfs::StatCacheFileSystem::StatCacheWriter Generator( + BaseDir, IsCaseSensitive, ValidityToken); + std::error_code ErrorCode; + + Result.reset(); + + for (sys::fs::recursive_directory_iterator I(BaseDir, ErrorCode), E; + I != E && !ErrorCode; I.increment(ErrorCode)) { + Filenames.push_back(I->path()); + StringRef Path(Filenames.back().c_str()); + sys::fs::file_status s; + status(Path, s); + Generator.addEntry(Path, s); + } + + { + raw_fd_ostream StatCacheFile(OutputFile, ErrorCode); + ASSERT_FALSE(ErrorCode); + Generator.writeStatCache(StatCacheFile); + } + + loadCacheFile(OutputFile, ValidityToken, Lower, Result); + } + + void loadCacheFile(StringRef OutputFile, uint64_t ExpectedValidityToken, + IntrusiveRefCntPtr<vfs::FileSystem> Lower, + IntrusiveRefCntPtr<vfs::StatCacheFileSystem> &Result) { + auto ErrorOrBuffer = MemoryBuffer::getFile(OutputFile); + EXPECT_TRUE(ErrorOrBuffer); + StringRef CacheBaseDir; + bool IsCaseSensitive; + uint64_t FileValidityToken; + auto E = vfs::StatCacheFileSystem::validateCacheFile( + (*ErrorOrBuffer)->getMemBufferRef(), CacheBaseDir, IsCaseSensitive, + FileValidityToken); + ASSERT_FALSE(E); + EXPECT_EQ(FileValidityToken, ExpectedValidityToken); + auto ExpectedCache = vfs::StatCacheFileSystem::create( + std::move(*ErrorOrBuffer), OutputFile, Lower); + ASSERT_FALSE(ExpectedCache.takeError()); + Result = *ExpectedCache; + } + + template <typename StringCollection> + void + compareStatCacheToRealFS(IntrusiveRefCntPtr<vfs::StatCacheFileSystem> CacheFS, + const StringCollection &Files) { + IntrusiveRefCntPtr<vfs::FileSystem> RealFS = vfs::getRealFileSystem(); + + for (auto &File : Files) { + auto ErrorOrStatus1 = RealFS->status(File); + auto ErrorOrStatus2 = CacheFS->status(File); + + EXPECT_EQ((bool)ErrorOrStatus1, (bool)ErrorOrStatus2); + if (!ErrorOrStatus1 || !ErrorOrStatus2) + continue; + + vfs::Status s1 = *ErrorOrStatus1, s2 = *ErrorOrStatus2; + EXPECT_EQ(s1.getName(), s2.getName()); + EXPECT_EQ(s1.getType(), s2.getType()); + EXPECT_EQ(s1.getPermissions(), s2.getPermissions()); + EXPECT_EQ(s1.getLastModificationTime(), s2.getLastModificationTime()); + EXPECT_EQ(s1.getUniqueID(), s2.getUniqueID()); + EXPECT_EQ(s1.getUser(), s2.getUser()); + EXPECT_EQ(s1.getGroup(), s2.getGroup()); + EXPECT_EQ(s1.getSize(), s2.getSize()); + } + } +}; + +TEST_F(StatCacheFileSystemTest, Basic) { + TempDir TestDirectory("virtual-file-system-test", /*Unique*/ true); + TempDir _a(TestDirectory.path("a")); + TempFile _ab(TestDirectory.path("a/b")); + TempDir _ac(TestDirectory.path("a/c")); + TempFile _acd(TestDirectory.path("a/c/d"), "", "Dummy contents"); + TempFile _ace(TestDirectory.path("a/c/e")); + TempFile _acf(TestDirectory.path("a/c/f"), "", "More dummy contents"); + TempDir _ag(TestDirectory.path("a/g")); + TempFile _agh(TestDirectory.path("a/g/h")); + + StringRef BaseDir(_a.path()); + + SmallVector<std::string, 10> Filenames; + IntrusiveRefCntPtr<vfs::StatCacheFileSystem> StatCacheFS; + createStatCacheFileSystem(TestDirectory.path("stat.cache"), BaseDir, + /* IsCaseSensitive= */ true, StatCacheFS, + Filenames); + ASSERT_TRUE(StatCacheFS); + compareStatCacheToRealFS(StatCacheFS, Filenames); +} + +TEST_F(StatCacheFileSystemTest, CaseSensitivity) { + TempDir TestDirectory("virtual-file-system-test", /*Unique*/ true); + TempDir _a(TestDirectory.path("a")); + TempDir _ac(TestDirectory.path("a/c")); + TempFile _acd(TestDirectory.path("a/c/d"), "", "Dummy contents"); + TempDir _b(TestDirectory.path("B")); + TempDir _bc(TestDirectory.path("B/c")); + TempFile _bcd(TestDirectory.path("B/c/D"), "", "Dummy contents"); + + StringRef BaseDir(TestDirectory.path()); + SmallVector<std::string, 10> Filenames; + IntrusiveRefCntPtr<vfs::StatCacheFileSystem> StatCacheFS; + createStatCacheFileSystem(TestDirectory.path("stat.cache"), BaseDir, + /* IsCaseSensitive= */ true, StatCacheFS, + Filenames); + ASSERT_TRUE(StatCacheFS); + + auto ErrorOrStatus = StatCacheFS->status(_acd.path()); + EXPECT_TRUE(ErrorOrStatus); + ErrorOrStatus = StatCacheFS->status(_bcd.path()); + EXPECT_TRUE(ErrorOrStatus); + ErrorOrStatus = StatCacheFS->status(TestDirectory.path("a/C/d")); + EXPECT_FALSE(ErrorOrStatus); + ErrorOrStatus = StatCacheFS->status(TestDirectory.path("A/C/d")); + EXPECT_FALSE(ErrorOrStatus); + ErrorOrStatus = StatCacheFS->status(TestDirectory.path("a/c/D")); + EXPECT_FALSE(ErrorOrStatus); + ErrorOrStatus = StatCacheFS->status(TestDirectory.path("b/c/d")); + EXPECT_FALSE(ErrorOrStatus); + ErrorOrStatus = StatCacheFS->status(TestDirectory.path("b/C/d")); + EXPECT_FALSE(ErrorOrStatus); + ErrorOrStatus = StatCacheFS->status(TestDirectory.path("B/C/D")); + EXPECT_FALSE(ErrorOrStatus); + + createStatCacheFileSystem(TestDirectory.path("stat.cache"), BaseDir, + /* IsCaseSensitive= */ false, StatCacheFS, + Filenames); + ASSERT_TRUE(StatCacheFS); + ErrorOrStatus = StatCacheFS->status(_acd.path()); + EXPECT_TRUE(ErrorOrStatus); + ErrorOrStatus = StatCacheFS->status(_bcd.path()); + EXPECT_TRUE(ErrorOrStatus); + ErrorOrStatus = StatCacheFS->status(TestDirectory.path("a/C/d")); + EXPECT_TRUE(ErrorOrStatus); + ErrorOrStatus = StatCacheFS->status(TestDirectory.path("A/C/d")); + EXPECT_TRUE(ErrorOrStatus); + ErrorOrStatus = StatCacheFS->status(TestDirectory.path("a/c/D")); + EXPECT_TRUE(ErrorOrStatus); + ErrorOrStatus = StatCacheFS->status(TestDirectory.path("b/c/d")); + EXPECT_TRUE(ErrorOrStatus); + ErrorOrStatus = StatCacheFS->status(TestDirectory.path("b/C/d")); + EXPECT_TRUE(ErrorOrStatus); + ErrorOrStatus = StatCacheFS->status(TestDirectory.path("B/C/D")); + EXPECT_TRUE(ErrorOrStatus); +} + +TEST_F(StatCacheFileSystemTest, DotDot) { + TempDir TestDirectory("virtual-file-system-test", /*Unique*/ true); + TempDir _a(TestDirectory.path("a")); + TempDir _ab(TestDirectory.path("a/b")); + TempFile _abd(TestDirectory.path("a/b/d")); + TempDir _ac(TestDirectory.path("a/c")); + TempFile _acd(TestDirectory.path("a/c/d")); + + StringRef BaseDir(_a.path()); + SmallVector<std::string, 10> Filenames; + IntrusiveRefCntPtr<vfs::StatCacheFileSystem> StatCacheFS; + auto RealFS = vfs::getRealFileSystem(); + createStatCacheFileSystem(TestDirectory.path("stat.cache"), BaseDir, + /* IsCaseSensitive= */ true, StatCacheFS, Filenames, + RealFS); + ASSERT_TRUE(StatCacheFS); + + // Create a file in the cached prefix after the cache was created. + TempFile _abe(TestDirectory.path("a/b/e")); + // Verify the cache is kicking in. + ASSERT_FALSE(StatCacheFS->status(_abe.path())); + // We can access the new file using a ".." because the StatCache will + // just pass that request to the FileSystem below it. + const SmallString<128> PathsToTest[] = { + TestDirectory.path("a/b/../e"), + TestDirectory.path("a/b/../c/d"), + TestDirectory.path("a/b/.."), + }; + compareStatCacheToRealFS(StatCacheFS, PathsToTest); +} + +#ifdef LLVM_ON_UNIX +TEST_F(StatCacheFileSystemTest, Links) { + TempDir TestDirectory("virtual-file-system-test", /*Unique*/ true); + TempDir _a(TestDirectory.path("a")); + TempLink _ab("d", TestDirectory.path("a/b")); + TempFile _ac(TestDirectory.path("a/c")); + TempDir _ad(TestDirectory.path("a/d")); + TempFile _add(TestDirectory.path("a/d/d"), "", "Dummy contents"); + TempFile _ade(TestDirectory.path("a/d/e")); + TempFile _adf(TestDirectory.path("a/d/f"), "", "More dummy contents"); + TempLink _adg(_ad.path(), TestDirectory.path("a/d/g")); + TempDir _ah(TestDirectory.path("a/h")); + TempLink _ahi(_ad.path(), TestDirectory.path("a/h/i")); + TempLink _ahj("no_such_file", TestDirectory.path("a/h/j")); + + StringRef BaseDir(_a.path()); + + SmallVector<std::string, 10> Filenames; + IntrusiveRefCntPtr<vfs::StatCacheFileSystem> StatCacheFS; + createStatCacheFileSystem(TestDirectory.path("stat.cache"), BaseDir, + /* IsCaseSensitive= */ true, StatCacheFS, + Filenames); + ASSERT_TRUE(StatCacheFS); + EXPECT_NE(std::find(Filenames.begin(), Filenames.end(), + TestDirectory.path("a/d/g/g")), + Filenames.end()); + EXPECT_NE(std::find(Filenames.begin(), Filenames.end(), + TestDirectory.path("a/b/e")), + Filenames.end()); + EXPECT_NE(std::find(Filenames.begin(), Filenames.end(), + TestDirectory.path("a/h/i/f")), + Filenames.end()); + EXPECT_NE(std::find(Filenames.begin(), Filenames.end(), + TestDirectory.path("a/h/j")), + Filenames.end()); + compareStatCacheToRealFS(StatCacheFS, Filenames); + + createStatCacheFileSystem(TestDirectory.path("stat.cache"), BaseDir, + /* IsCaseSensitive= */ true, StatCacheFS, Filenames, + vfs::getRealFileSystem()); + const SmallString<128> PathsToTest[] = { + TestDirectory.path("a/h/i/../c"), + TestDirectory.path("a/b/../d"), + TestDirectory.path("a/g/g/../c"), + TestDirectory.path("a/b/.."), + }; + compareStatCacheToRealFS(StatCacheFS, PathsToTest); +} +#endif + +TEST_F(StatCacheFileSystemTest, Canonical) { + TempDir TestDirectory("virtual-file-system-test", /*Unique*/ true); + TempDir _a(TestDirectory.path("a")); + TempFile _ab(TestDirectory.path("a/b")); + TempDir _ac(TestDirectory.path("a/c")); + TempFile _acd(TestDirectory.path("a/c/d"), "", "Dummy contents"); + + StringRef BaseDir(_a.path()); + SmallVector<std::string, 10> Filenames; + IntrusiveRefCntPtr<vfs::StatCacheFileSystem> StatCacheFS; + createStatCacheFileSystem(TestDirectory.path("stat.cache"), BaseDir, + /* IsCaseSensitive= */ true, StatCacheFS, + Filenames); + ASSERT_TRUE(StatCacheFS); + + const SmallString<128> PathsToTest[] = { + TestDirectory.path("./a/b"), TestDirectory.path("a//./b"), + TestDirectory.path("a///b"), TestDirectory.path("a//c//d"), + TestDirectory.path("a//c/./d"), TestDirectory.path("a/./././b"), + TestDirectory.path("a/.//.//.//b"), + }; + compareStatCacheToRealFS(StatCacheFS, PathsToTest); +} + +TEST_F(StatCacheFileSystemTest, ValidityToken) { + TempDir TestDirectory("virtual-file-system-test", /*Unique*/ true); + TempDir _a(TestDirectory.path("a")); + TempFile _ab(TestDirectory.path("a/b")); + TempDir _ac(TestDirectory.path("a/c")); + TempFile _acd(TestDirectory.path("a/c/d"), "", "Dummy contents"); + + StringRef BaseDir(_a.path()); + IntrusiveRefCntPtr<vfs::StatCacheFileSystem> StatCacheFS; + { + SmallVector<std::string, 10> Filenames; + uint64_t ValidityToken = 0x1234567890abcfef; + createStatCacheFileSystem(TestDirectory.path("stat.cache"), BaseDir, + /* IsCaseSensitive= */ true, StatCacheFS, + Filenames, new DummyFileSystem(), ValidityToken); + ASSERT_TRUE(StatCacheFS); + } + + uint64_t UpdatedValidityToken = 0xabcdef0123456789; + { + std::error_code EC; + raw_fd_ostream CacheFile(TestDirectory.path("stat.cache"), EC, + sys::fs::CD_OpenAlways); + ASSERT_FALSE(EC); + vfs::StatCacheFileSystem::updateValidityToken(CacheFile, + UpdatedValidityToken); + } + + loadCacheFile(TestDirectory.path("stat.cache"), UpdatedValidityToken, + new DummyFileSystem(), StatCacheFS); + EXPECT_TRUE(StatCacheFS); +} Index: llvm/lib/Support/VirtualFileSystem.cpp =================================================================== --- llvm/lib/Support/VirtualFileSystem.cpp +++ llvm/lib/Support/VirtualFileSystem.cpp @@ -35,6 +35,7 @@ #include "llvm/Support/FileSystem.h" #include "llvm/Support/FileSystem/UniqueID.h" #include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/OnDiskHashTable.h" #include "llvm/Support/Path.h" #include "llvm/Support/SMLoc.h" #include "llvm/Support/SourceMgr.h" @@ -2811,3 +2812,263 @@ return *this; } + +class StatCacheFileSystem::StatCacheLookupInfo { +public: + typedef StringRef external_key_type; + typedef StringRef internal_key_type; + typedef llvm::sys::fs::file_status data_type; + typedef uint32_t hash_value_type; + typedef uint32_t offset_type; + + static bool EqualKey(const internal_key_type &a, const internal_key_type &b) { + return a == b; + } + + static hash_value_type ComputeHash(const internal_key_type &a) { + return llvm::hash_value(a); + } + + static std::pair<unsigned, unsigned> + ReadKeyDataLength(const unsigned char *&d) { + using namespace llvm::support; + unsigned KeyLen = endian::readNext<uint16_t, little, unaligned>(d); + unsigned DataLen = endian::readNext<uint16_t, little, unaligned>(d); + return std::make_pair(KeyLen, DataLen); + } + + static const internal_key_type &GetInternalKey(const external_key_type &x) { + return x; + } + + static const external_key_type &GetExternalKey(const internal_key_type &x) { + return x; + } + + static internal_key_type ReadKey(const unsigned char *d, unsigned n) { + return StringRef((const char *)d, n); + } + + static data_type ReadData(const internal_key_type &k, const unsigned char *d, + unsigned DataLen) { + data_type Result; + memcpy(&Result, d, sizeof(Result)); + return Result; + } +}; + +class StatCacheFileSystem::StatCacheGenerationInfo { +public: + typedef StringRef key_type; + typedef const StringRef &key_type_ref; + typedef sys::fs::file_status data_type; + typedef const sys::fs::file_status &data_type_ref; + typedef uint32_t hash_value_type; + typedef uint32_t offset_type; + + /// Calculate the hash for Key + static hash_value_type ComputeHash(key_type_ref Key) { + return static_cast<size_t>(hash_value(Key)); + } + + /// Return the lengths, in bytes, of the given Key/Data pair. + static std::pair<unsigned, unsigned> + EmitKeyDataLength(raw_ostream &Out, key_type_ref Key, data_type_ref Data) { + using namespace llvm::support; + endian::Writer LE(Out, little); + unsigned KeyLen = Key.size(); + unsigned DataLen = sizeof(Data); + LE.write<uint16_t>(KeyLen); + LE.write<uint16_t>(DataLen); + return std::make_pair(KeyLen, DataLen); + } + + static void EmitKey(raw_ostream &Out, key_type_ref Key, unsigned KeyLen) { + Out.write(Key.data(), KeyLen); + } + + /// Write Data to Out. DataLen is the length from EmitKeyDataLength. + static void EmitData(raw_ostream &Out, key_type_ref Key, data_type_ref Data, + unsigned Len) { + Out.write((const char *)&Data, Len); + } + + static bool EqualKey(key_type_ref Key1, key_type_ref Key2) { + return Key1 == Key2; + } +}; + +StatCacheFileSystem::StatCacheFileSystem( + std::unique_ptr<llvm::MemoryBuffer> &&CacheFile, + IntrusiveRefCntPtr<FileSystem> FS, bool IsCaseSensitive) + : ProxyFileSystem(std::move(FS)), StatCacheFile(std::move(CacheFile)), + IsCaseSensitive(IsCaseSensitive) { + uint32_t BucketOffset; + // BucketOffset is right after the Magic number. See StatCacheHeader below. + memcpy(&BucketOffset, StatCacheFile->getBufferStart() + 4, + sizeof(BucketOffset)); + const char *CacheFileStart = StatCacheFile->getBufferStart(); + // The base directory is at offest 16 and zero-terminated. + StatCachePrefix = StringRef(CacheFileStart + 16); + // HashTableStart points at the beginning of the data emitted by the + // OnDiskHashTable. + const unsigned char *HashTableStart = + (const unsigned char *)CacheFileStart + 16 + StatCachePrefix.size() + 1; + StatCache.reset(StatCacheType::Create( + (const unsigned char *)CacheFileStart + BucketOffset, HashTableStart, + (const unsigned char *)CacheFileStart)); +} + +Expected<IntrusiveRefCntPtr<StatCacheFileSystem>> +StatCacheFileSystem::create(std::unique_ptr<llvm::MemoryBuffer> &&CacheBuffer, + StringRef CacheFilename, + IntrusiveRefCntPtr<FileSystem> FS) { + StringRef BaseDir; + bool IsCaseSensitive; + uint64_t ValidityToken; + if (auto E = validateCacheFile(*CacheBuffer, BaseDir, IsCaseSensitive, + ValidityToken)) + return E; + return new StatCacheFileSystem(std::move(CacheBuffer), FS, IsCaseSensitive); +} + +llvm::ErrorOr<llvm::vfs::Status> +StatCacheFileSystem::status(const Twine &Path) { + SmallString<180> StringPath; + Path.toVector(StringPath); + + // If the cache is not case sensitive, do all operations on lower-cased paths. + if (!IsCaseSensitive) + std::transform(StringPath.begin(), StringPath.end(), StringPath.begin(), + toLower); + + // Canonicalize the path. This removes single dot path components, + // but it also gets rid of repeating separators. + llvm::sys::path::remove_dots(StringPath); + + // Check if the requested path falls into the cache. + StringRef SuffixPath(StringPath); + if (!SuffixPath.consume_front(StatCachePrefix)) + return ProxyFileSystem::status(Path); + + auto It = StatCache->find(SuffixPath); + if (It == StatCache->end()) { + // We didn't find the file in the cache even though it started with the + // cache prefix. It could be that the file doesn't exist, or the spelling + // the pathis different. The canonicalization that the call to remove_dots() + // does leaves only '..' with symlinks as a source of confusion. If the path + // does not contain '..' we can safely say it doesn't exist. + if (std::find(sys::path::begin(SuffixPath), sys::path::end(SuffixPath), + "..") == sys::path::end(SuffixPath)) { + return llvm::errc::no_such_file_or_directory; + } + return ProxyFileSystem::status(Path); + } + + // clang-stat-cache will record entries for broken symlnks with a default- + // constructed Status. This will have a default-constructed UinqueID. + if ((*It).getUniqueID() == llvm::sys::fs::UniqueID()) + return llvm::errc::no_such_file_or_directory; + + return llvm::vfs::Status::copyWithNewName(*It, Path); +} + +StatCacheFileSystem::StatCacheWriter::StatCacheWriter(StringRef BaseDir, + bool IsCaseSensitive, + uint64_t ValidityToken) + : BaseDir(IsCaseSensitive ? BaseDir.str() : BaseDir.lower()), + IsCaseSensitive(IsCaseSensitive), ValidityToken(ValidityToken), + Generator(new StatCacheGeneratorType()) {} + +StatCacheFileSystem::StatCacheWriter::~StatCacheWriter() { delete Generator; } + +void StatCacheFileSystem::StatCacheWriter::addEntry( + StringRef Path, const sys::fs::file_status &Status) { + std::string StoredPath; + if (!IsCaseSensitive) { + StoredPath = Path.lower(); + Path = StoredPath; + } + + __attribute__((unused)) bool Consumed = Path.consume_front(BaseDir); + assert(Consumed && "Path does not start with expected prefix."); + + PathStorage.emplace_back(Path.str()); + Generator->insert(PathStorage.back(), Status); +} + +// The format of the stat cache is (pseudo-code): +// struct stat_cache { +// char Magic[4]; // "STAT" or "Stat" +// uint32_t BucketOffset; // See BucketOffset in OnDiskHashTable.h +// uint64_t ValidityToken; // Platofrm specific data allowing to check +// // whether the cache is up-to-date. +// char BaseDir[N]; // Zero terminated path to the base directory +// < OnDiskHashtable Data > // Data for the has table. The keys are the +// // relative paths under BaseDir. The data is +// // llvm::sys::fs::file_status structures. +// }; + +namespace { +struct StatCacheHeader { + char Magic[4]; + uint32_t BucketOffset; + uint64_t ValidityToken; + char BaseDir[1]; +}; +} // namespace + +#define MAGIC_CASE_SENSITIVE "Stat" +#define MAGIC_CASE_INSENSITIVE "STAT" + +size_t +StatCacheFileSystem::StatCacheWriter::writeStatCache(raw_fd_ostream &Out) { + // Magic value. + if (IsCaseSensitive) + Out.write(MAGIC_CASE_SENSITIVE, 4); + else + Out.write(MAGIC_CASE_INSENSITIVE, 4); + // Placeholder for BucketOffset, filled in below. + Out.write("\0\0\0\0", 4); + // Write out the validity token. + Out.write((const char *)&ValidityToken, sizeof(ValidityToken)); + // Write out the base directory for the cache. + Out.write(BaseDir.c_str(), BaseDir.size() + 1); + // Write out the hashtable data. + uint32_t BucketOffset = Generator->Emit(Out); + int Size = Out.tell(); + // Move back to right after the Magic to insert BucketOffset + Out.seek(4); + Out.write((const char *)&BucketOffset, sizeof(BucketOffset)); + return Size; +} + +Error StatCacheFileSystem::validateCacheFile(llvm::MemoryBufferRef Buffer, + StringRef &BaseDir, + bool &IsCaseSensitive, + uint64_t &ValidityToken) { + auto *Header = + reinterpret_cast<const StatCacheHeader *>(Buffer.getBufferStart()); + if (Buffer.getBufferSize() < sizeof(StatCacheHeader) || + (memcmp(Header->Magic, MAGIC_CASE_INSENSITIVE, sizeof(Header->Magic)) && + memcmp(Header->Magic, MAGIC_CASE_SENSITIVE, sizeof(Header->Magic))) || + Header->BucketOffset > Buffer.getBufferSize()) + return createStringError(inconvertibleErrorCode(), "Invalid cache file"); + + auto PathLen = strnlen(Header->BaseDir, Buffer.getBufferSize() - 16); + if (Header->BaseDir[PathLen] != 0) + return createStringError(inconvertibleErrorCode(), "Invalid cache file"); + + IsCaseSensitive = Header->Magic[1] == MAGIC_CASE_SENSITIVE[1]; + BaseDir = StringRef(Header->BaseDir, PathLen); + ValidityToken = Header->ValidityToken; + + return ErrorSuccess(); +} + +void StatCacheFileSystem::updateValidityToken(raw_fd_ostream &CacheFile, + uint64_t ValidityToken) { + CacheFile.pwrite(reinterpret_cast<char *>(&ValidityToken), + sizeof(ValidityToken), + offsetof(StatCacheHeader, ValidityToken)); +} Index: llvm/include/llvm/Support/VirtualFileSystem.h =================================================================== --- llvm/include/llvm/Support/VirtualFileSystem.h +++ llvm/include/llvm/Support/VirtualFileSystem.h @@ -17,13 +17,13 @@ #include "llvm/ADT/IntrusiveRefCntPtr.h" #include "llvm/ADT/None.h" #include "llvm/ADT/Optional.h" +#include "llvm/ADT/STLFunctionalExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" -#include "llvm/ADT/STLFunctionalExtras.h" #include "llvm/Support/Chrono.h" +#include "llvm/Support/Errc.h" #include "llvm/Support/Error.h" #include "llvm/Support/ErrorOr.h" -#include "llvm/Support/Errc.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/Path.h" #include "llvm/Support/SourceMgr.h" @@ -31,6 +31,7 @@ #include <cassert> #include <cstdint> #include <ctime> +#include <list> #include <memory> #include <stack> #include <string> @@ -40,6 +41,8 @@ namespace llvm { +template <typename T> class OnDiskIterableChainedHashTable; +template <typename T> class OnDiskChainedHashTableGenerator; class MemoryBuffer; class MemoryBufferRef; class Twine; @@ -1106,6 +1109,90 @@ void write(llvm::raw_ostream &OS); }; +/// A ProxyFileSystem using cached information for status() rather than going to +/// the real filesystem. +/// +/// When dealing with a huge tree of (mostly) immutable filesystem content +/// like and SDK, it can be very costly to ask the underlying filesystem for +/// `stat` data. Even when caching the `stat`s internally, having many +/// concurrent Clangs accessing the same tree in a similar way causes +/// contention. As SDK files are mostly immutable, we can pre-compute the status +/// information using clang-stat-cache and use that information directly without +/// accessing the real filesystem until Clang needs to open a file. This can +/// speed up module verification and HeaderSearch by significant amounts. +class StatCacheFileSystem : public llvm::vfs::ProxyFileSystem { + class StatCacheLookupInfo; + using StatCacheType = + llvm::OnDiskIterableChainedHashTable<StatCacheLookupInfo>; + + class StatCacheGenerationInfo; + using StatCacheGeneratorType = + llvm::OnDiskChainedHashTableGenerator<StatCacheGenerationInfo>; + + explicit StatCacheFileSystem(std::unique_ptr<llvm::MemoryBuffer> &&CacheFile, + IntrusiveRefCntPtr<FileSystem> FS, + bool IsCaseSensitive); + +public: + /// Create a StatCacheFileSystem from the passed \a CacheBuffer, a + /// MemoryBuffer representing the contents of the \a CacheFilename file. The + /// returned filesystem will be overlaid on top of \a FS. + static Expected<IntrusiveRefCntPtr<StatCacheFileSystem>> + create(std::unique_ptr<llvm::MemoryBuffer> &&CacheBuffer, + StringRef CacheFilename, IntrusiveRefCntPtr<FileSystem> FS); + + /// The status override which will consult the cache if \a Path is in the + /// cached filesystem tree. + llvm::ErrorOr<llvm::vfs::Status> status(const Twine &Path) override; + +public: + /// A helper class to generate stat caches. + class StatCacheWriter { + std::string BaseDir; + bool IsCaseSensitive; + uint64_t ValidityToken; + StatCacheGeneratorType *Generator; + std::list<std::string> PathStorage; + + public: + /// Create a StatCacheWriter + /// + /// \param BaseDir The base directory for the path. Every filename passed to + /// addEntry() needs to start with this base directory. + /// \param IsCaseSensitive Whether the cache is case sensitive. + /// \param ValidityToken A 64 bits token that gets embedded in the cache and + /// can be used by generator tools to check for the + /// cache validity in a platform-specific way. + StatCacheWriter(StringRef BaseDir, bool IsCaseSensitive, + uint64_t ValidityToken = 0); + ~StatCacheWriter(); + + /// Add a cache entry storing \a Status for the file at \a Path. + void addEntry(StringRef Path, const sys::fs::file_status &Status); + + /// Write the cache file to \a Out. + size_t writeStatCache(raw_fd_ostream &Out); + }; + +public: + /// Validate that the file content in \a Buffer is a valid stat cache file. + /// \a BaseDir, \a IsCaseSensitive and \a ValidityToken are output parameters + /// that get populated by this call. + static Error validateCacheFile(llvm::MemoryBufferRef Buffer, + StringRef &BaseDir, bool &IsCaseSensitive, + uint64_t &ValidityToken); + + /// Update the ValidityToken data in \a CacheFile. + static void updateValidityToken(raw_fd_ostream &CacheFile, + uint64_t ValidityToken); + +private: + std::unique_ptr<llvm::MemoryBuffer> StatCacheFile; + llvm::StringRef StatCachePrefix; + std::unique_ptr<StatCacheType> StatCache; + bool IsCaseSensitive = true; +}; + } // namespace vfs } // namespace llvm Index: clang/tools/clang-stat-cache/clang-stat-cache.cpp =================================================================== --- /dev/null +++ clang/tools/clang-stat-cache/clang-stat-cache.cpp @@ -0,0 +1,319 @@ +//===- clang-stat-cache.cpp -----------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/Timer.h" +#include "llvm/Support/VirtualFileSystem.h" +#include "llvm/Support/raw_ostream.h" + +#include <assert.h> +#include <fcntl.h> +#include <stdbool.h> +#include <stddef.h> +#include <stdio.h> +#include <string.h> +#include <sys/attr.h> +#include <sys/errno.h> +#include <sys/stat.h> +#include <sys/syscall.h> +#include <sys/vnode.h> +#include <unistd.h> + +#include <algorithm> +#include <list> +#include <set> + +#ifdef __APPLE__ +#include <CoreServices/CoreServices.h> + +#include <sys/mount.h> +#include <sys/param.h> +#endif // __APPLE__ + +// The clang-stat-cache utility creates an on-disk cache for the stat data +// of a file-system tree which is expected to be immutable during a build. + +using namespace llvm; +using llvm::vfs::StatCacheFileSystem; + +cl::OptionCategory StatCacheCategory("clang-stat-cache options"); + +cl::opt<std::string> OutputFilename("o", cl::Required, + cl::desc("Specify output filename"), + cl::value_desc("filename"), + cl::cat(StatCacheCategory)); + +cl::opt<std::string> TargetDirectory(cl::Positional, cl::Required, + cl::value_desc("dirname"), + cl::cat(StatCacheCategory)); + +cl::opt<bool> Verbose("v", cl::desc("More verbose output")); +cl::opt<bool> Force("f", cl::desc("Force cache generation")); + +namespace { + +#if __APPLE__ +// Used by checkContentsValidity. See below. +struct CallbackInfo { + FSEventStreamEventId LastEvent; + bool SeenChanges = false; +}; + +// Used by checkContentsValidity. See below. +void FSEventsCallback(ConstFSEventStreamRef streamRef, void *CtxInfo, + size_t numEvents, void *eventPaths, + const FSEventStreamEventFlags *eventFlags, + const FSEventStreamEventId *eventIds) { + CallbackInfo *Info = static_cast<CallbackInfo *>(CtxInfo); + for (size_t i = 0; i < numEvents; ++i) { + Info->LastEvent = eventIds[i]; + // The kFSEventStreamEventFlagHistoryDone is set on the last 'historical' + // event passed to the callback. This means it is passed after the callback + // all the relevant activity between the StartEvent of the stream and the + // point the stream was created. + // If the callback didn't see any other event, it means there haven't been + // any alterations to the target directory hierarchy and the cache contents + // is still up-to-date. + if (eventFlags[i] & kFSEventStreamEventFlagHistoryDone) { + // Let's stop the main queue and go back to our non-queue code. + CFRunLoopStop(CFRunLoopGetCurrent()); + break; + } + + // If we see any event outisde of the kFSEventStreamEventFlagHistoryDone + // one, there have been changes to the target directory. + Info->SeenChanges = true; + } +} + +// FSEvents-based check for cache contents validity. We store the latest +// FSEventStreamEventId in the cache as a ValidityToken and check if any +// file system events affected the base directory since the cache was +// generated. +bool checkContentsValidity(uint64_t &ValidityToken) { + CFStringRef TargetDir = CFStringCreateWithCStringNoCopy( + kCFAllocatorDefault, TargetDirectory.c_str(), kCFStringEncodingASCII, + kCFAllocatorNull); + CFArrayRef PathsToWatch = + CFArrayCreate(nullptr, (const void **)&TargetDir, 1, nullptr); + CallbackInfo Info; + FSEventStreamContext Ctx = {0, &Info, nullptr, nullptr, nullptr}; + FSEventStreamRef Stream; + CFAbsoluteTime Latency = 0; // Latency in seconds. Do not wait. + + FSEventStreamEventId StartEvent = ValidityToken; + + // Create the stream + Stream = FSEventStreamCreate( + NULL, + &FSEventsCallback, // Callback function + &Ctx, PathsToWatch, StartEvent, // Or a previous event ID + Latency, kFSEventStreamCreateFlagNone // Flags explained in reference + ); + + // Associate the stream with the main queue. + FSEventStreamSetDispatchQueue(Stream, dispatch_get_main_queue()); + // Start the stream (needs the queue to run to do anything). + if (!FSEventStreamStart(Stream)) { + errs() << "Failed to create FS event stream. " + << "Considering the cache up-to-date.\n"; + return true; + } + + // Start the main queue. It will be exited by our callback when it got + // confirmed it processed all events. + CFRunLoopRun(); + + // Update the Validity token with the new last event we processed. + ValidityToken = Info.LastEvent; + return !Info.SeenChanges; +} + +#else // __APPLE__ + +// There is no cross-platform way to implement a validity check. If this +// platofrm doesn't support it, just consider the cache constents always +// valid. When that's the case, the tool running cache generation needs +// to have the knowledge to do it only when needed. +bool checkContentsValidity(uint64_t &ValidityToken) { return true; } + +#endif // __APPLE__ + +// Populate Generator with the stat cache data for the filesystem tree +// rooted at BasePath. +std::error_code +populateHashTable(StringRef BasePath, + StatCacheFileSystem::StatCacheWriter &Generator) { + using namespace llvm; + using namespace sys::fs; + + std::error_code ErrorCode; + + // Just loop over the target directory using a recursive iterator. + // This invocation follows symlinks, so we are going to potentially + // store the status of the same file multiple times with different + // names. + for (recursive_directory_iterator I(BasePath, ErrorCode), E; + I != E && !ErrorCode; I.increment(ErrorCode)) { + StringRef Path = I->path(); + sys::fs::file_status s; + // This can fail (broken symlink) and leave the file_status with + // its default values. The reader knows this. + status(Path, s); + + Generator.addEntry(Path, s); + } + + return ErrorCode; +} + +bool checkCacheValid(int FD, raw_fd_ostream &Out, uint64_t &ValidityToken) { + sys::fs::file_status Status; + auto EC = sys::fs::status(FD, Status); + if (EC) { + llvm::errs() << "fstat failed\n"; + return false; + } + + auto Size = Status.getSize(); + if (Size == 0) { + // New file. +#ifdef __APPLE__ + // Get the current (global) FSEvent id and use this as ValidityToken. + ValidityToken = FSEventsGetCurrentEventId(); +#endif + return false; + } + + auto ErrorOrBuffer = + MemoryBuffer::getOpenFile(FD, OutputFilename, Status.getSize()); + + // Refuse to write to this cache file if it exists but its contents do + // not look like a valid cache file. + StringRef BaseDir; + bool IsCaseSensitive; + if (auto E = StatCacheFileSystem::validateCacheFile( + (*ErrorOrBuffer)->getMemBufferRef(), BaseDir, IsCaseSensitive, + ValidityToken)) { + llvm::errs() << "The output cache file exists and is not a valid stat " + "cache."; + if (!Force) { + llvm::errs() << " Aborting.\n"; + exit(1); + } + + consumeError(std::move(E)); + llvm::errs() << " Forced update.\n"; + return false; + } + + if (BaseDir != TargetDirectory && + (IsCaseSensitive || !BaseDir.equals_insensitive(TargetDirectory))) { + llvm::errs() << "Existing cache has different directory. Regenerating...\n"; + return false; + } + + // Basic structure checks have passed. Lets see if we can prove that the cache + // contents are still valid. + bool IsValid = checkContentsValidity(ValidityToken); + if (IsValid) { + // The cache is valid, but we might have gotten an updated ValidityToken. + // Update the cache with it as clang-stat-cache is just going to exit after + // returning from this function. + StatCacheFileSystem::updateValidityToken(Out, ValidityToken); + } + return IsValid && !Force; +} + +} // namespace + +int main(int argc, char *argv[]) { + cl::ParseCommandLineOptions(argc, argv); + + // Remove extraneous separators from the end of the basename. + while (!TargetDirectory.empty() && + sys::path::is_separator(TargetDirectory.back())) + TargetDirectory.pop_back(); + StringRef Dirname(TargetDirectory); + + std::error_code EC; + int FD; + EC = sys::fs::openFileForReadWrite( + OutputFilename, FD, llvm::sys::fs::CD_OpenAlways, llvm::sys::fs::OF_None); + if (EC) { + llvm::errs() << "Failed to open cache file: " + << toString(llvm::createFileError(OutputFilename, EC)) << "\n"; + return 1; + } + + raw_fd_ostream Out(FD, /* ShouldClose=*/true); + + uint64_t ValidityToken = 0; + // Check if the cache is valid and up-to-date. + if (checkCacheValid(FD, Out, ValidityToken)) { + if (Verbose) + outs() << "Cache up-to-date, exiting\n"; + return 0; + } + + if (Verbose) + outs() << "Building a stat cache for '" << TargetDirectory << "' into '" + << OutputFilename << "'\n"; + + // Do not generate a cache for NFS. Iterating huge directory hierarchies + // over NFS will be very slow. Better to let the compiler search only the + // pieces that it needs than use a cache that takes ages to populate. + bool IsLocal; + EC = sys::fs::is_local(Dirname, IsLocal); + if (EC) { + errs() << "Failed to stat the target directory: " + << llvm::toString(llvm::errorCodeToError(EC)) << "\n"; + return 1; + } + + if (!IsLocal) { + errs() << "Target directory is not a local filesystem. " + << "Not populating the cache.\n"; + return 0; + } + + // Check if the filesystem hosting the target directory is case sensitive. + bool IsCaseSensitive = true; +#ifdef _PC_CASE_SENSITIVE + IsCaseSensitive = + ::pathconf(TargetDirectory.c_str(), _PC_CASE_SENSITIVE) == 1; +#endif + StatCacheFileSystem::StatCacheWriter Generator(Dirname, IsCaseSensitive, + ValidityToken); + + // Populate the cache. + auto startTime = llvm::TimeRecord::getCurrentTime(); + populateHashTable(Dirname, Generator); + auto endTime = llvm::TimeRecord::getCurrentTime(); + endTime -= startTime; + + if (Verbose) + errs() << "populateHashTable took: " << endTime.getWallTime() << "s\n"; + + // Write the cache to disk. + startTime = llvm::TimeRecord::getCurrentTime(); + int Size = Generator.writeStatCache(Out); + endTime = llvm::TimeRecord::getCurrentTime(); + endTime -= startTime; + + if (Verbose) + errs() << "writeStatCache took: " << endTime.getWallTime() << "s\n"; + + // We might have opened a pre-exising cache which was bigger. + llvm::sys::fs::resize_file(FD, Size); + + return 0; +} Index: clang/tools/clang-stat-cache/CMakeLists.txt =================================================================== --- /dev/null +++ clang/tools/clang-stat-cache/CMakeLists.txt @@ -0,0 +1,19 @@ +set(LLVM_LINK_COMPONENTS + Core + Support + ) + +add_clang_tool(clang-stat-cache + clang-stat-cache.cpp + ) + +if(APPLE) +set(CLANG_STAT_CACHE_LIB_DEPS + "-framework CoreServices" + ) +endif() + +clang_target_link_libraries(clang-stat-cache + PRIVATE + ${CLANG_STAT_CACHE_LIB_DEPS} + ) Index: clang/tools/CMakeLists.txt =================================================================== --- clang/tools/CMakeLists.txt +++ clang/tools/CMakeLists.txt @@ -16,6 +16,7 @@ add_clang_subdirectory(clang-repl) endif() add_clang_subdirectory(clang-cas-test) +add_clang_subdirectory(clang-stat-cache) add_clang_subdirectory(c-index-test) add_clang_subdirectory(IndexStore) Index: clang/test/clang-stat-cache/errors.test =================================================================== --- /dev/null +++ clang/test/clang-stat-cache/errors.test @@ -0,0 +1,42 @@ +RUN: rm -rf %t +RUN: mkdir -p %t + +RUN: not clang-stat-cache %t/not-there -o %t/stat.cache 2>&1 | FileCheck --check-prefix=NO-SUCH-DIR %s +NO-SUCH-DIR: Failed to stat the target directory: No such file or directory + +RUN: not clang-stat-cache %t -o %t/not-there/stat.cache 2>&1 | FileCheck --check-prefix=NO-SUCH-FILE %s +NO-SUCH-FILE: Failed to open cache file: '{{.*}}': No such file or directory + +# Use mixed-case directories to exercise the case insensitive implementation. +RUN: mkdir -p %t/Dir +RUN: mkdir -p %t/Dir2 + +# Try to overwrite a few invalid caches +RUN: echo "Not a stat cache" > %t/stat.cache +RUN: not clang-stat-cache %t/Dir -o %t/stat.cache 2>&1 | FileCheck --check-prefix=INVALID-CACHE %s +RUN: echo "Not a stat cache, but bigger than the stat cache header" > %t/stat.cache +RUN: not clang-stat-cache %t/Dir -o %t/stat.cache 2>&1 | FileCheck --check-prefix=INVALID-CACHE %s +RUN: echo "STAT. This has the correct MAGIC and is bigger than the header." > %t/stat.cache +RUN: not clang-stat-cache %t/Dir -o %t/stat.cache 2>&1 | FileCheck --check-prefix=INVALID-CACHE %s + +INVALID-CACHE: The output cache file exists and is not a valid stat cache. Aborting. + +# Test the force flag +RUN: echo "STAT. This has the correct MAGIC and is bigger than the header." > %t/stat.cache +RUN: clang-stat-cache %t/Dir -f -o %t/stat.cache 2>&1 | FileCheck --check-prefix=INVALID-CACHE-FORCE %s +INVALID-CACHE-FORCE: The output cache file exists and is not a valid stat cache. Forced update. + +# Generate a valid cache for dir +RUN: rm %t/stat.cache +RUN: clang-stat-cache %t/Dir -o %t/stat.cache +RUN: cp %t/stat.cache %t/stat.cache.save + +# Try with same base direcotry but with extraneous separators +RUN: clang-stat-cache %t/Dir/// -v -o %t/stat.cache | FileCheck --check-prefix=EXTRA-SEP %s +EXTRA-SEP-NOT: Existing cache has different directory. Regenerating... +EXTRA-SEP: Cache up-to-date, exiting + +# Rewrite the cache with a different base directory +RUN: clang-stat-cache %t/Dir2 -o %t/stat.cache 2>&1 | FileCheck --check-prefix=OTHER-DIR %s +OTHER-DIR: Existing cache has different directory. Regenerating... + Index: clang/test/clang-stat-cache/cache-effects.c =================================================================== --- /dev/null +++ clang/test/clang-stat-cache/cache-effects.c @@ -0,0 +1,63 @@ +#include "foo.h" + +// Testing the effects of a cache is tricky, because it's just supposed to speed +// things up, not change the behavior. In this test, we are using an outdated +// cache to trick HeaderSearch into finding the wrong module and show that it is +// being used. + +// Clear the module cache. +// RUN: rm -rf %t +// RUN: mkdir -p %t/Inputs +// RUN: mkdir -p %t/Inputs/Foo1 +// RUN: mkdir -p %t/Inputs/Foo2 +// RUN: mkdir -p %t/modules-to-compare + +// === +// Create a Foo module in the Foo1 direcotry. +// RUN: echo 'void meow(void);' > %t/Inputs/Foo1/foo.h +// RUN: echo 'module Foo { header "foo.h" }' > %t/Inputs/Foo1/module.map + +// === +// Compile the module. Note that the compiler has 2 header search paths: +// Foo2 and Foo1 in that order. The module has been created in Foo1, and +// it is the only version available now. +// RUN: %clang_cc1 -cc1 -fmodules -fimplicit-module-maps -fdisable-module-hash -fmodules-cache-path=%t/modules-cache -fsyntax-only -I %t/Inputs/Foo2 -I %t/Inputs/Foo1 -Rmodule-build %s 2>&1 +// RUN: cp %t/modules-cache/Foo.pcm %t/modules-to-compare/Foo-before.pcm + +// === +// Create a stat cache for our inputs directory +// RUN: clang-stat-cache %t/Inputs -o %t/stat.cache + +// === +// As a sanity check, re-run the same compilation with the cache and check that +// the module does not change. +// RUN: %clang_cc1 -cc1 -fmodules -fimplicit-module-maps -fdisable-module-hash -fmodules-cache-path=%t/modules-cache -fsyntax-only -I %t/Inputs/Foo2 -I %t/Inputs/Foo1 -ivfsstatcache %t/stat.cache %s -Rmodule-build 2>&1 +// RUN: cp %t/modules-cache/Foo.pcm %t/modules-to-compare/Foo-after.pcm + +// RUN: diff %t/modules-to-compare/Foo-before.pcm %t/modules-to-compare/Foo-after.pcm + +// === +// Now introduce a different Foo module in the Foo2 directory which is before +// Foo1 in the search paths. +// RUN: echo 'void meow2(void);' > %t/Inputs/Foo2/foo.h +// RUN: echo 'module Foo { header "foo.h" }' > %t/Inputs/Foo2/module.map + +// === +// Because we're using the (now-outdated) stat cache, this compilation +// should still be using the first module. It will not see the new one +// which is earlier in the search paths. +// RUN: %clang_cc1 -cc1 -fmodules -fimplicit-module-maps -fdisable-module-hash -fmodules-cache-path=%t/modules-cache -fsyntax-only -I %t/Inputs/Foo2 -I %t/Inputs/Foo1 -ivfsstatcache %t/stat.cache -Rmodule-build -Rmodule-import %s 2>&1 +// RUN: cp %t/modules-cache/Foo.pcm %t/modules-to-compare/Foo-after.pcm + +// RUN: diff %t/modules-to-compare/Foo-before.pcm %t/modules-to-compare/Foo-after.pcm + +// === +// Regenerate the stat cache for our Inputs directory +// RUN: clang-stat-cache -f %t/Inputs -o %t/stat.cache 2>&1 + +// === +// Use the module and now see that we are recompiling the new one. +// RUN: %clang_cc1 -cc1 -fmodules -fimplicit-module-maps -fdisable-module-hash -fmodules-cache-path=%t/modules-cache -fsyntax-only -I %t/Inputs/Foo2 -I %t/Inputs/Foo1 -ivfsstatcache %t/stat.cache -Rmodule-build %s 2>&1 +// RUN: cp %t/modules-cache/Foo.pcm %t/modules-to-compare/Foo-after.pcm + +// RUN: not diff %t/modules-to-compare/Foo-before.pcm %t/modules-to-compare/Foo-after.pcm Index: clang/test/Driver/vfsstatcache.c =================================================================== --- /dev/null +++ clang/test/Driver/vfsstatcache.c @@ -0,0 +1,5 @@ +// RUN: %clang -ivfsstatcache foo.h -### %s 2>&1 | FileCheck %s +// CHECK: "-ivfsstatcache" "foo.h" + +// RUN: not %clang -ivfsstatcache foo.h %s 2>&1 | FileCheck -check-prefix=CHECK-MISSING %s +// CHECK-MISSING: stat cache file 'foo.h' not found Index: clang/lib/Frontend/CompilerInvocation.cpp =================================================================== --- clang/lib/Frontend/CompilerInvocation.cpp +++ clang/lib/Frontend/CompilerInvocation.cpp @@ -3276,6 +3276,9 @@ GenerateArg(Args, Opt, P.Prefix, SA); } + for (const std::string &F : Opts.VFSStatCacheFiles) + GenerateArg(Args, OPT_ivfsstatcache, F, SA); + for (const std::string &F : Opts.VFSOverlayFiles) GenerateArg(Args, OPT_ivfsoverlay, F, SA); } @@ -3409,6 +3412,9 @@ Opts.AddSystemHeaderPrefix( A->getValue(), A->getOption().matches(OPT_system_header_prefix)); + for (const auto *A : Args.filtered(OPT_ivfsstatcache)) + Opts.AddVFSStatCacheFile(A->getValue()); + for (const auto *A : Args.filtered(OPT_ivfsoverlay)) Opts.AddVFSOverlayFile(A->getValue()); @@ -5106,6 +5112,23 @@ clang::createVFSFromCompilerInvocation( const CompilerInvocation &CI, DiagnosticsEngine &Diags, IntrusiveRefCntPtr<llvm::vfs::FileSystem> BaseFS) { + for (const auto &File : CI.getHeaderSearchOpts().VFSStatCacheFiles) { + llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> Buffer = + llvm::MemoryBuffer::getFile(File); + if (!Buffer) { + Diags.Report(diag::err_missing_vfs_stat_cache_file) << File; + continue; + } + + auto StatCache = llvm::vfs::StatCacheFileSystem::create(std::move(*Buffer), + File, BaseFS); + + if (errorToBool(StatCache.takeError())) + Diags.Report(diag::err_invalid_vfs_stat_cache) << File; + else + BaseFS = std::move(*StatCache); + } + if (CI.getHeaderSearchOpts().VFSOverlayFiles.empty()) return BaseFS; Index: clang/include/clang/Lex/HeaderSearchOptions.h =================================================================== --- clang/include/clang/Lex/HeaderSearchOptions.h +++ clang/include/clang/Lex/HeaderSearchOptions.h @@ -181,6 +181,9 @@ /// of computing the module hash. llvm::SmallSetVector<llvm::CachedHashString, 16> ModulesIgnoreMacros; + /// The set of user-provided stat cache files. + std::vector<std::string> VFSStatCacheFiles; + /// The set of user-provided virtual filesystem overlay files. std::vector<std::string> VFSOverlayFiles; @@ -250,6 +253,10 @@ SystemHeaderPrefixes.emplace_back(Prefix, IsSystemHeader); } + void AddVFSStatCacheFile(StringRef Name) { + VFSStatCacheFiles.push_back(std::string(Name)); + } + void AddVFSOverlayFile(StringRef Name) { VFSOverlayFiles.push_back(std::string(Name)); } Index: clang/include/clang/Driver/Options.td =================================================================== --- clang/include/clang/Driver/Options.td +++ clang/include/clang/Driver/Options.td @@ -3370,6 +3370,8 @@ HelpText<"Add directory to SYSTEM include search path, " "absolute paths are relative to -isysroot">, MetaVarName<"<directory>">, Flags<[CC1Option]>; +def ivfsstatcache : JoinedOrSeparate<["-"], "ivfsstatcache">, Group<clang_i_Group>, Flags<[CC1Option]>, + HelpText<"Use the stat data cached in file instead of doing filesystemsyscalls. See clang-stat-cache utility.">; def ivfsoverlay : JoinedOrSeparate<["-"], "ivfsoverlay">, Group<clang_i_Group>, Flags<[CC1Option]>, HelpText<"Overlay the virtual filesystem described by file over the real file system">; def imultilib : Separate<["-"], "imultilib">, Group<gfortran_Group>; Index: clang/include/clang/Basic/DiagnosticFrontendKinds.td =================================================================== --- clang/include/clang/Basic/DiagnosticFrontendKinds.td +++ clang/include/clang/Basic/DiagnosticFrontendKinds.td @@ -269,6 +269,11 @@ "test module file extension '%0' has different version (%1.%2) than expected " "(%3.%4)">; +def err_missing_vfs_stat_cache_file : Error< + "stat cache file '%0' not found">, DefaultFatal; +def err_invalid_vfs_stat_cache : Error< + "invalid stat cache file '%0'">, DefaultFatal; + def err_missing_vfs_overlay_file : Error< "virtual filesystem overlay file '%0' not found">, DefaultFatal; def err_invalid_vfs_overlay : Error<
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits