https://github.com/Bigcheese updated https://github.com/llvm/llvm-project/pull/132853
>From d19d7c6314dbb68abfe7cdceebd8e8f65aedddc5 Mon Sep 17 00:00:00 2001 From: Michael Spencer <bigchees...@gmail.com> Date: Wed, 29 Jan 2025 12:49:29 -0800 Subject: [PATCH 1/2] [clang][modules] Lazily load by name lookups in module maps Instead of eagerly populating the `clang::ModuleMap` when looking up a module by name, this patch changes `HeaderSearch` to only load the modules that are actually used. This introduces `ModuleMap::findOrLoadModule` which will load modules from parsed but not loaded module maps. This cannot be used anywhere that the module loading code calls into as it can create infinite recursion. This currently just reparses module maps when looking up a module by header. This is fine as redeclarations are allowed from the same file, but future patches will also make looking up a module by header lazy. This patch changes the shadow.m test to use explicitly built modules and `#import`. This test and the shadow feature are very brittle and do not work in general. The test relied on pcm files being left behind by prior failing clang invocations that were then reused by the last invocation. If you clean the cache then the last invocation will always fail. This is because the input module map and the `-fmodule-map-file=` module map are parsed in the same module scope, and `-fmodule-map-file=` is forwarded to implicit module builds. That means you are guaranteed to hit a module redeclaration error if the TU actually imports the module it is trying to shadow. This patch changes when we load A2's module map to after the `A` module has been loaded, which sets the `IsFromModuleFile` bit on `A`. This means that A2's `A` is skipped entirely instead of creating a shadow module, and we get textual inclusion. It is possible to construct a case where this would happen before this patch too. An upcoming patch in this series will rework shadowing to work in the general case, but that's only possible once header -> module lookup is lazy too. --- clang/include/clang/Basic/DiagnosticGroups.td | 1 + .../include/clang/Basic/DiagnosticLexKinds.td | 6 + clang/include/clang/Lex/HeaderSearch.h | 62 +++--- clang/include/clang/Lex/ModuleMap.h | 20 ++ clang/include/clang/Lex/ModuleMapFile.h | 9 + clang/lib/Frontend/CompilerInstance.cpp | 4 +- clang/lib/Lex/HeaderSearch.cpp | 180 +++++++++++++----- clang/lib/Lex/ModuleMap.cpp | 149 +++++++++++++-- clang/lib/Lex/ModuleMapFile.cpp | 3 + clang/lib/Sema/SemaModule.cpp | 2 +- clang/test/Modules/Inputs/shadow/A1/A1.h | 0 .../Modules/Inputs/shadow/A1/module.modulemap | 4 +- clang/test/Modules/Inputs/shadow/A2/A2.h | 0 .../Modules/Inputs/shadow/A2/module.modulemap | 4 +- clang/test/Modules/lazy-by-name-lookup.c | 31 +++ clang/test/Modules/shadow.m | 11 +- 16 files changed, 394 insertions(+), 92 deletions(-) create mode 100644 clang/test/Modules/Inputs/shadow/A1/A1.h create mode 100644 clang/test/Modules/Inputs/shadow/A2/A2.h create mode 100644 clang/test/Modules/lazy-by-name-lookup.c diff --git a/clang/include/clang/Basic/DiagnosticGroups.td b/clang/include/clang/Basic/DiagnosticGroups.td index b9f08d96151c9..1abb63ba3aea6 100644 --- a/clang/include/clang/Basic/DiagnosticGroups.td +++ b/clang/include/clang/Basic/DiagnosticGroups.td @@ -576,6 +576,7 @@ def ModuleImport : DiagGroup<"module-import">; def ModuleConflict : DiagGroup<"module-conflict">; def ModuleFileExtension : DiagGroup<"module-file-extension">; def ModuleIncludeDirectiveTranslation : DiagGroup<"module-include-translation">; +def ModuleMap : DiagGroup<"module-map">; def RoundTripCC1Args : DiagGroup<"round-trip-cc1-args">; def NewlineEOF : DiagGroup<"newline-eof">; def Nullability : DiagGroup<"nullability">; diff --git a/clang/include/clang/Basic/DiagnosticLexKinds.td b/clang/include/clang/Basic/DiagnosticLexKinds.td index 912b8bd46e194..a6866ef868dcd 100644 --- a/clang/include/clang/Basic/DiagnosticLexKinds.td +++ b/clang/include/clang/Basic/DiagnosticLexKinds.td @@ -836,6 +836,12 @@ def warn_pp_date_time : Warning< ShowInSystemHeader, DefaultIgnore, InGroup<DiagGroup<"date-time">>; // Module map parsing +def remark_mmap_parse : Remark< + "parsing modulemap '%0'">, ShowInSystemHeader, InGroup<ModuleMap>; +def remark_mmap_load : Remark< + "loading modulemap '%0'">, ShowInSystemHeader, InGroup<ModuleMap>; +def remark_mmap_load_module : Remark< + "loading parsed module '%0'">, ShowInSystemHeader, InGroup<ModuleMap>; def err_mmap_unknown_token : Error<"skipping stray token">; def err_mmap_expected_module : Error<"expected module declaration">; def err_mmap_expected_module_name : Error<"expected module name">; diff --git a/clang/include/clang/Lex/HeaderSearch.h b/clang/include/clang/Lex/HeaderSearch.h index bccec4dd951d6..d0011fd19eedf 100644 --- a/clang/include/clang/Lex/HeaderSearch.h +++ b/clang/include/clang/Lex/HeaderSearch.h @@ -332,13 +332,27 @@ class HeaderSearch { /// The mapping between modules and headers. mutable ModuleMap ModMap; + struct ModuleMapDirectoryState { + OptionalFileEntryRef ModuleMapFile; + enum { + Parsed, + Loaded, + Invalid, + } Status; + }; + /// Describes whether a given directory has a module map in it. - llvm::DenseMap<const DirectoryEntry *, bool> DirectoryHasModuleMap; + llvm::DenseMap<const DirectoryEntry *, ModuleMapDirectoryState> + DirectoryModuleMap; /// Set of module map files we've already loaded, and a flag indicating /// whether they were valid or not. llvm::DenseMap<const FileEntry *, bool> LoadedModuleMaps; + /// Set of module map files we've already parsed, and a flag indicating + /// whether they were valid or not. + llvm::DenseMap<const FileEntry *, bool> ParsedModuleMaps; + // A map of discovered headers with their associated include file name. llvm::DenseMap<const FileEntry *, llvm::SmallString<64>> IncludeNames; @@ -433,11 +447,6 @@ class HeaderSearch { /// Retrieve the path to the module cache. StringRef getModuleCachePath() const { return ModuleCachePath; } - /// Consider modules when including files from this directory. - void setDirectoryHasModuleMap(const DirectoryEntry* Dir) { - DirectoryHasModuleMap[Dir] = true; - } - /// Forget everything we know about headers so far. void ClearFileInfo() { FileInfo.clear(); @@ -915,26 +924,30 @@ class HeaderSearch { size_t getTotalMemory() const; private: - /// Describes what happened when we tried to load a module map file. - enum LoadModuleMapResult { - /// The module map file had already been loaded. - LMM_AlreadyLoaded, + /// Describes what happened when we tried to load or parse a module map file. + enum ModuleMapResult { + /// The module map file had already been processed. + MMR_AlreadyProcessed, - /// The module map file was loaded by this invocation. - LMM_NewlyLoaded, + /// The module map file was processed by this invocation. + MMR_NewlyProcessed, /// There is was directory with the given name. - LMM_NoDirectory, + MMR_NoDirectory, /// There was either no module map file or the module map file was /// invalid. - LMM_InvalidModuleMap + MMR_InvalidModuleMap }; - LoadModuleMapResult loadModuleMapFileImpl(FileEntryRef File, bool IsSystem, - DirectoryEntryRef Dir, - FileID ID = FileID(), - unsigned *Offset = nullptr); + ModuleMapResult loadModuleMapFileImpl(FileEntryRef File, bool IsSystem, + DirectoryEntryRef Dir, + FileID ID = FileID(), + unsigned *Offset = nullptr); + + ModuleMapResult parseModuleMapFileImpl(FileEntryRef File, bool IsSystem, + DirectoryEntryRef Dir, + FileID ID = FileID()); /// Try to load the module map file in the given directory. /// @@ -945,8 +958,8 @@ class HeaderSearch { /// /// \returns The result of attempting to load the module map file from the /// named directory. - LoadModuleMapResult loadModuleMapFile(StringRef DirName, bool IsSystem, - bool IsFramework); + ModuleMapResult loadModuleMapFile(StringRef DirName, bool IsSystem, + bool IsFramework); /// Try to load the module map file in the given directory. /// @@ -956,8 +969,13 @@ class HeaderSearch { /// /// \returns The result of attempting to load the module map file from the /// named directory. - LoadModuleMapResult loadModuleMapFile(DirectoryEntryRef Dir, bool IsSystem, - bool IsFramework); + ModuleMapResult loadModuleMapFile(DirectoryEntryRef Dir, bool IsSystem, + bool IsFramework); + + ModuleMapResult parseModuleMapFile(StringRef DirName, bool IsSystem, + bool IsFramework); + ModuleMapResult parseModuleMapFile(DirectoryEntryRef Dir, bool IsSystem, + bool IsFramework); }; /// Apply the header search options to get given HeaderSearch object. diff --git a/clang/include/clang/Lex/ModuleMap.h b/clang/include/clang/Lex/ModuleMap.h index 43c3890631bd1..083dbb485ea4a 100644 --- a/clang/include/clang/Lex/ModuleMap.h +++ b/clang/include/clang/Lex/ModuleMap.h @@ -18,6 +18,7 @@ #include "clang/Basic/LangOptions.h" #include "clang/Basic/Module.h" #include "clang/Basic/SourceLocation.h" +#include "clang/Lex/ModuleMapFile.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" @@ -262,6 +263,18 @@ class ModuleMap { /// Describes whether we haved loaded a particular file as a module /// map. llvm::DenseMap<const FileEntry *, bool> LoadedModuleMap; + llvm::DenseMap<const FileEntry *, const modulemap::ModuleMapFile *> + ParsedModuleMap; + + std::vector<std::unique_ptr<modulemap::ModuleMapFile>> ParsedModuleMaps; + + /// Map from top level module name to a list of ModuleDecls in the order they + /// were discovered. This allows handling shadowing correctly and diagnosing + /// redefinitions. + llvm::StringMap<SmallVector<std::pair<const modulemap::ModuleMapFile *, + const modulemap::ModuleDecl *>, + 1>> + ParsedModules; /// Resolve the given export declaration into an actual export /// declaration. @@ -478,6 +491,8 @@ class ModuleMap { /// \returns The named module, if known; otherwise, returns null. Module *findModule(StringRef Name) const; + Module *findOrLoadModule(StringRef Name); + Module *findOrInferSubmodule(Module *Parent, StringRef Name); /// Retrieve a module with the given name using lexical name lookup, @@ -693,6 +708,11 @@ class ModuleMap { void addHeader(Module *Mod, Module::Header Header, ModuleHeaderRole Role, bool Imported = false); + /// Parse a module map without creating `clang::Module` instances. + bool parseModuleMapFile(FileEntryRef File, bool IsSystem, + DirectoryEntryRef Dir, FileID ID = FileID(), + SourceLocation ExternModuleLoc = SourceLocation()); + /// Load the given module map file, and record any modules we /// encounter. /// diff --git a/clang/include/clang/Lex/ModuleMapFile.h b/clang/include/clang/Lex/ModuleMapFile.h index 1219cc2b50753..7d0e36e9ab86c 100644 --- a/clang/include/clang/Lex/ModuleMapFile.h +++ b/clang/include/clang/Lex/ModuleMapFile.h @@ -133,8 +133,17 @@ using TopLevelDecl = std::variant<ModuleDecl, ExternModuleDecl>; /// This holds many reference types (StringRef, SourceLocation, etc.) whose /// lifetimes are bound by the SourceManager and FileManager used. struct ModuleMapFile { + /// The FileID used to parse this module map. This is always a local ID. + FileID ID; + + /// The directory in which the module map was discovered. Declarations in + /// the module map are relative to this directory. + OptionalDirectoryEntryRef Dir; + /// Beginning of the file, used for moduleMapFileRead callback. SourceLocation Start; + + bool IsSystem; std::vector<TopLevelDecl> Decls; void dump(llvm::raw_ostream &out) const; diff --git a/clang/lib/Frontend/CompilerInstance.cpp b/clang/lib/Frontend/CompilerInstance.cpp index 4e13b6ced252f..a8c8f37306572 100644 --- a/clang/lib/Frontend/CompilerInstance.cpp +++ b/clang/lib/Frontend/CompilerInstance.cpp @@ -579,13 +579,13 @@ struct ReadModuleNames : ASTReaderListener { ModuleMap &MM = PP.getHeaderSearchInfo().getModuleMap(); for (const std::string &LoadedModule : LoadedModules) MM.cacheModuleLoad(*PP.getIdentifierInfo(LoadedModule), - MM.findModule(LoadedModule)); + MM.findOrLoadModule(LoadedModule)); LoadedModules.clear(); } void markAllUnavailable() { for (const std::string &LoadedModule : LoadedModules) { - if (Module *M = PP.getHeaderSearchInfo().getModuleMap().findModule( + if (Module *M = PP.getHeaderSearchInfo().getModuleMap().findOrLoadModule( LoadedModule)) { M->HasIncompatibleModuleFile = true; diff --git a/clang/lib/Lex/HeaderSearch.cpp b/clang/lib/Lex/HeaderSearch.cpp index 9283a0f4fce55..c33b73a2b1889 100644 --- a/clang/lib/Lex/HeaderSearch.cpp +++ b/clang/lib/Lex/HeaderSearch.cpp @@ -299,7 +299,7 @@ Module *HeaderSearch::lookupModule(StringRef ModuleName, SourceLocation ImportLoc, bool AllowSearch, bool AllowExtraModuleMapSearch) { // Look in the module map to determine if there is a module by this name. - Module *Module = ModMap.findModule(ModuleName); + Module *Module = ModMap.findOrLoadModule(ModuleName); if (Module || !AllowSearch || !HSOpts.ImplicitModuleMaps) return Module; @@ -359,11 +359,11 @@ Module *HeaderSearch::lookupModule(StringRef ModuleName, StringRef SearchName, // checked DirectoryEntryRef NormalDir = *Dir.getDirRef(); // Search for a module map file in this directory. - if (loadModuleMapFile(NormalDir, IsSystem, - /*IsFramework*/false) == LMM_NewlyLoaded) { + if (parseModuleMapFile(NormalDir, IsSystem, + /*IsFramework*/ false) == MMR_NewlyProcessed) { // We just loaded a module map file; check whether the module is // available now. - Module = ModMap.findModule(ModuleName); + Module = ModMap.findOrLoadModule(ModuleName); if (Module) break; } @@ -373,10 +373,10 @@ Module *HeaderSearch::lookupModule(StringRef ModuleName, StringRef SearchName, SmallString<128> NestedModuleMapDirName; NestedModuleMapDirName = Dir.getDirRef()->getName(); llvm::sys::path::append(NestedModuleMapDirName, ModuleName); - if (loadModuleMapFile(NestedModuleMapDirName, IsSystem, - /*IsFramework*/false) == LMM_NewlyLoaded){ + if (parseModuleMapFile(NestedModuleMapDirName, IsSystem, + /*IsFramework*/ false) == MMR_NewlyProcessed) { // If we just loaded a module map file, look for the module again. - Module = ModMap.findModule(ModuleName); + Module = ModMap.findOrLoadModule(ModuleName); if (Module) break; } @@ -393,7 +393,7 @@ Module *HeaderSearch::lookupModule(StringRef ModuleName, StringRef SearchName, loadSubdirectoryModuleMaps(Dir); // Look again for the module. - Module = ModMap.findModule(ModuleName); + Module = ModMap.findOrLoadModule(ModuleName); if (Module) break; } @@ -1559,7 +1559,7 @@ bool HeaderSearch::hasModuleMap(StringRef FileName, if (!HSOpts.ImplicitModuleMaps) return false; - SmallVector<const DirectoryEntry *, 2> FixUpDirectories; + SmallVector<DirectoryEntryRef, 2> FixUpDirectories; StringRef DirName = FileName; do { @@ -1577,16 +1577,17 @@ bool HeaderSearch::hasModuleMap(StringRef FileName, switch (loadModuleMapFile(*Dir, IsSystem, llvm::sys::path::extension(Dir->getName()) == ".framework")) { - case LMM_NewlyLoaded: - case LMM_AlreadyLoaded: + case MMR_NewlyProcessed: + case MMR_AlreadyProcessed: { // Success. All of the directories we stepped through inherit this module // map file. + const ModuleMapDirectoryState &MMDS = DirectoryModuleMap[*Dir]; for (unsigned I = 0, N = FixUpDirectories.size(); I != N; ++I) - DirectoryHasModuleMap[FixUpDirectories[I]] = true; + DirectoryModuleMap[FixUpDirectories[I]] = MMDS; return true; - - case LMM_NoDirectory: - case LMM_InvalidModuleMap: + } + case MMR_NoDirectory: + case MMR_InvalidModuleMap: break; } @@ -1706,7 +1707,8 @@ bool HeaderSearch::findUsableModuleForFrameworkHeader( static OptionalFileEntryRef getPrivateModuleMap(FileEntryRef File, FileManager &FileMgr, - DiagnosticsEngine &Diags) { + DiagnosticsEngine &Diags, + bool Diagnose = true) { StringRef Filename = llvm::sys::path::filename(File.getName()); SmallString<128> PrivateFilename(File.getDir().getName()); if (Filename == "module.map") @@ -1717,7 +1719,7 @@ static OptionalFileEntryRef getPrivateModuleMap(FileEntryRef File, return std::nullopt; auto PMMFile = FileMgr.getOptionalFileRef(PrivateFilename); if (PMMFile) { - if (Filename == "module.map") + if (Diagnose && Filename == "module.map") Diags.Report(diag::warn_deprecated_module_dot_map) << PrivateFilename << 1 << File.getDir().getName().ends_with(".framework"); @@ -1762,17 +1764,17 @@ bool HeaderSearch::loadModuleMapFile(FileEntryRef File, bool IsSystem, assert(Dir && "module map home directory must exist"); switch (loadModuleMapFileImpl(File, IsSystem, *Dir, ID, Offset)) { - case LMM_AlreadyLoaded: - case LMM_NewlyLoaded: + case MMR_AlreadyProcessed: + case MMR_NewlyProcessed: return false; - case LMM_NoDirectory: - case LMM_InvalidModuleMap: + case MMR_NoDirectory: + case MMR_InvalidModuleMap: return true; } llvm_unreachable("Unknown load module map result"); } -HeaderSearch::LoadModuleMapResult +HeaderSearch::ModuleMapResult HeaderSearch::loadModuleMapFileImpl(FileEntryRef File, bool IsSystem, DirectoryEntryRef Dir, FileID ID, unsigned *Offset) { @@ -1780,24 +1782,51 @@ HeaderSearch::loadModuleMapFileImpl(FileEntryRef File, bool IsSystem, // loaded in case we recursively try to load it from itself. auto AddResult = LoadedModuleMaps.insert(std::make_pair(File, true)); if (!AddResult.second) - return AddResult.first->second ? LMM_AlreadyLoaded : LMM_InvalidModuleMap; + return AddResult.first->second ? MMR_AlreadyProcessed : MMR_InvalidModuleMap; if (ModMap.loadModuleMapFile(File, IsSystem, Dir, ID, Offset)) { LoadedModuleMaps[File] = false; - return LMM_InvalidModuleMap; + return MMR_InvalidModuleMap; } // Try to load a corresponding private module map. if (OptionalFileEntryRef PMMFile = - getPrivateModuleMap(File, FileMgr, Diags)) { + getPrivateModuleMap(File, FileMgr, Diags, !ParsedModuleMaps[File])) { if (ModMap.loadModuleMapFile(*PMMFile, IsSystem, Dir)) { LoadedModuleMaps[File] = false; - return LMM_InvalidModuleMap; + return MMR_InvalidModuleMap; + } + } + + // This directory has a module map. + return MMR_NewlyProcessed; +} + +HeaderSearch::ModuleMapResult +HeaderSearch::parseModuleMapFileImpl(FileEntryRef File, bool IsSystem, + DirectoryEntryRef Dir, FileID ID) { + // Check whether we've already parsed this module map, and mark it as being + // parsed in case we recursively try to parse it from itself. + auto AddResult = ParsedModuleMaps.insert(std::make_pair(File, true)); + if (!AddResult.second) + return AddResult.first->second ? MMR_AlreadyProcessed : MMR_InvalidModuleMap; + + if (ModMap.parseModuleMapFile(File, IsSystem, Dir, ID)) { + ParsedModuleMaps[File] = false; + return MMR_InvalidModuleMap; + } + + // Try to parse a corresponding private module map. + if (OptionalFileEntryRef PMMFile = + getPrivateModuleMap(File, FileMgr, Diags)) { + if (ModMap.parseModuleMapFile(*PMMFile, IsSystem, Dir)) { + ParsedModuleMaps[File] = false; + return MMR_InvalidModuleMap; } } // This directory has a module map. - return LMM_NewlyLoaded; + return MMR_NewlyProcessed; } OptionalFileEntryRef @@ -1838,53 +1867,108 @@ Module *HeaderSearch::loadFrameworkModule(StringRef Name, DirectoryEntryRef Dir, bool IsSystem) { // Try to load a module map file. switch (loadModuleMapFile(Dir, IsSystem, /*IsFramework*/true)) { - case LMM_InvalidModuleMap: + case MMR_InvalidModuleMap: // Try to infer a module map from the framework directory. if (HSOpts.ImplicitModuleMaps) ModMap.inferFrameworkModule(Dir, IsSystem, /*Parent=*/nullptr); break; - case LMM_NoDirectory: + case MMR_NoDirectory: return nullptr; - case LMM_AlreadyLoaded: - case LMM_NewlyLoaded: + case MMR_AlreadyProcessed: + case MMR_NewlyProcessed: break; } - return ModMap.findModule(Name); + return ModMap.findOrLoadModule(Name); } -HeaderSearch::LoadModuleMapResult +HeaderSearch::ModuleMapResult HeaderSearch::loadModuleMapFile(StringRef DirName, bool IsSystem, bool IsFramework) { if (auto Dir = FileMgr.getOptionalDirectoryRef(DirName)) return loadModuleMapFile(*Dir, IsSystem, IsFramework); - return LMM_NoDirectory; + return MMR_NoDirectory; } -HeaderSearch::LoadModuleMapResult +HeaderSearch::ModuleMapResult HeaderSearch::loadModuleMapFile(DirectoryEntryRef Dir, bool IsSystem, bool IsFramework) { - auto KnownDir = DirectoryHasModuleMap.find(Dir); - if (KnownDir != DirectoryHasModuleMap.end()) - return KnownDir->second ? LMM_AlreadyLoaded : LMM_InvalidModuleMap; - - if (OptionalFileEntryRef ModuleMapFile = - lookupModuleMapFile(Dir, IsFramework)) { - LoadModuleMapResult Result = - loadModuleMapFileImpl(*ModuleMapFile, IsSystem, Dir); + auto InsertRes = DirectoryModuleMap.insert(std::pair{ + Dir, ModuleMapDirectoryState{{}, ModuleMapDirectoryState::Invalid}}); + ModuleMapDirectoryState &MMState = InsertRes.first->second; + if (!InsertRes.second) { + switch (MMState.Status) { + case ModuleMapDirectoryState::Parsed: + break; + case ModuleMapDirectoryState::Loaded: + return MMR_AlreadyProcessed; + case ModuleMapDirectoryState::Invalid: + return MMR_InvalidModuleMap; + }; + } + + if (!MMState.ModuleMapFile) + MMState.ModuleMapFile = lookupModuleMapFile(Dir, IsFramework); + + if (MMState.ModuleMapFile) { + ModuleMapResult Result = + loadModuleMapFileImpl(*MMState.ModuleMapFile, IsSystem, Dir); + // Add Dir explicitly in case ModuleMapFile is in a subdirectory. + // E.g. Foo.framework/Modules/module.modulemap + // ^Dir ^ModuleMapFile + if (Result == MMR_NewlyProcessed) + MMState.Status = ModuleMapDirectoryState::Loaded; + else if (Result == MMR_InvalidModuleMap) + MMState.Status = ModuleMapDirectoryState::Invalid; + return Result; + } + return MMR_InvalidModuleMap; +} + +HeaderSearch::ModuleMapResult +HeaderSearch::parseModuleMapFile(StringRef DirName, bool IsSystem, + bool IsFramework) { + if (auto Dir = FileMgr.getOptionalDirectoryRef(DirName)) + return parseModuleMapFile(*Dir, IsSystem, IsFramework); + + return MMR_NoDirectory; +} + +HeaderSearch::ModuleMapResult +HeaderSearch::parseModuleMapFile(DirectoryEntryRef Dir, bool IsSystem, + bool IsFramework) { + auto InsertRes = DirectoryModuleMap.insert(std::pair{ + Dir, ModuleMapDirectoryState{{}, ModuleMapDirectoryState::Invalid}}); + ModuleMapDirectoryState &MMState = InsertRes.first->second; + if (!InsertRes.second) { + switch (MMState.Status) { + case ModuleMapDirectoryState::Parsed: + case ModuleMapDirectoryState::Loaded: + return MMR_AlreadyProcessed; + case ModuleMapDirectoryState::Invalid: + return MMR_InvalidModuleMap; + }; + } + + if (!MMState.ModuleMapFile) + MMState.ModuleMapFile = lookupModuleMapFile(Dir, IsFramework); + + if (MMState.ModuleMapFile) { + ModuleMapResult Result = + parseModuleMapFileImpl(*MMState.ModuleMapFile, IsSystem, Dir); // Add Dir explicitly in case ModuleMapFile is in a subdirectory. // E.g. Foo.framework/Modules/module.modulemap // ^Dir ^ModuleMapFile - if (Result == LMM_NewlyLoaded) - DirectoryHasModuleMap[Dir] = true; - else if (Result == LMM_InvalidModuleMap) - DirectoryHasModuleMap[Dir] = false; + if (Result == MMR_NewlyProcessed) + MMState.Status = ModuleMapDirectoryState::Parsed; + else if (Result == MMR_InvalidModuleMap) + MMState.Status = ModuleMapDirectoryState::Invalid; return Result; } - return LMM_InvalidModuleMap; + return MMR_InvalidModuleMap; } void HeaderSearch::collectAllModules(SmallVectorImpl<Module *> &Modules) { diff --git a/clang/lib/Lex/ModuleMap.cpp b/clang/lib/Lex/ModuleMap.cpp index a1394fd3900b0..f116cc145a2ac 100644 --- a/clang/lib/Lex/ModuleMap.cpp +++ b/clang/lib/Lex/ModuleMap.cpp @@ -1051,6 +1051,8 @@ Module *ModuleMap::inferFrameworkModule(DirectoryEntryRef FrameworkDir, bool IsFrameworkDir = Parent.ends_with(".framework"); if (OptionalFileEntryRef ModMapFile = HeaderInfo.lookupModuleMapFile(*ParentDir, IsFrameworkDir)) { + // TODO: Parsing a module map should populate `InferredDirectories` + // so we don't need to do a full load here. loadModuleMapFile(*ModMapFile, Attrs.IsSystem, *ParentDir); inferred = InferredDirectories.find(*ParentDir); } @@ -1320,6 +1322,83 @@ void ModuleMap::addHeader(Module *Mod, Module::Header Header, Cb->moduleMapAddHeader(HeaderEntry.getName()); } +bool ModuleMap::parseModuleMapFile(FileEntryRef File, bool IsSystem, + DirectoryEntryRef Dir, FileID ID, + SourceLocation ExternModuleLoc) { + llvm::DenseMap<const FileEntry *, const modulemap::ModuleMapFile *>::iterator + Known = ParsedModuleMap.find(File); + if (Known != ParsedModuleMap.end()) + return Known->second == nullptr; + + // If the module map file wasn't already entered, do so now. + if (ID.isInvalid()) { + ID = SourceMgr.translateFile(File); + if (ID.isInvalid() || SourceMgr.isLoadedFileID(ID)) { + auto FileCharacter = + IsSystem ? SrcMgr::C_System_ModuleMap : SrcMgr::C_User_ModuleMap; + ID = SourceMgr.createFileID(File, ExternModuleLoc, FileCharacter); + } + } + + std::optional<llvm::MemoryBufferRef> Buffer = SourceMgr.getBufferOrNone(ID); + if (!Buffer) { + ParsedModuleMap[File] = nullptr; + return true; + } + + Diags.Report(diag::remark_mmap_parse) << File.getName(); + std::optional<modulemap::ModuleMapFile> MaybeMMF = + modulemap::parseModuleMap(ID, Dir, SourceMgr, Diags, IsSystem, nullptr); + + if (!MaybeMMF) { + ParsedModuleMap[File] = nullptr; + return true; + } + + ParsedModuleMaps.push_back( + std::make_unique<modulemap::ModuleMapFile>(std::move(*MaybeMMF))); + const modulemap::ModuleMapFile &MMF = *ParsedModuleMaps.back(); + std::vector<const modulemap::ExternModuleDecl *> PendingExternalModuleMaps; + for (const auto &Decl : MMF.Decls) { + std::visit(llvm::makeVisitor( + [&](const modulemap::ModuleDecl &MD) { + // Only use the first part of the name even for submodules. + // This will correctly load the submodule declarations when + // the module is loaded. + auto &ModuleDecls = + ParsedModules[StringRef(MD.Id.front().first)]; + ModuleDecls.push_back(std::pair(&MMF, &MD)); + }, + [&](const modulemap::ExternModuleDecl &EMD) { + PendingExternalModuleMaps.push_back(&EMD); + }), + Decl); + } + + for (const modulemap::ExternModuleDecl *EMD : PendingExternalModuleMaps) { + StringRef FileNameRef = EMD->Path; + SmallString<128> ModuleMapFileName; + if (llvm::sys::path::is_relative(FileNameRef)) { + ModuleMapFileName += Dir.getName(); + llvm::sys::path::append(ModuleMapFileName, EMD->Path); + FileNameRef = ModuleMapFileName; + } + + if (auto EFile = + SourceMgr.getFileManager().getOptionalFileRef(FileNameRef)) { + parseModuleMapFile(*EFile, IsSystem, EFile->getDir(), FileID(), + ExternModuleLoc); + } + } + + ParsedModuleMap[File] = &MMF; + + for (const auto &Cb : Callbacks) + Cb->moduleMapFileRead(SourceLocation(), File, IsSystem); + + return false; +} + FileID ModuleMap::getContainingModuleMapFileID(const Module *Module) const { if (Module->DefinitionLoc.isInvalid()) return {}; @@ -1458,7 +1537,6 @@ bool ModuleMap::resolveConflicts(Module *Mod, bool Complain) { namespace clang { class ModuleMapLoader { - modulemap::ModuleMapFile &MMF; SourceManager &SourceMgr; DiagnosticsEngine &Diags; @@ -1515,13 +1593,15 @@ class ModuleMapLoader { using Attributes = ModuleMap::Attributes; public: - ModuleMapLoader(modulemap::ModuleMapFile &MMF, SourceManager &SourceMgr, - DiagnosticsEngine &Diags, ModuleMap &Map, FileID ModuleMapFID, + ModuleMapLoader(SourceManager &SourceMgr, DiagnosticsEngine &Diags, + ModuleMap &Map, FileID ModuleMapFID, DirectoryEntryRef Directory, bool IsSystem) - : MMF(MMF), SourceMgr(SourceMgr), Diags(Diags), Map(Map), + : SourceMgr(SourceMgr), Diags(Diags), Map(Map), ModuleMapFID(ModuleMapFID), Directory(Directory), IsSystem(IsSystem) {} - bool loadModuleMapFile(); + bool loadModuleDecl(const modulemap::ModuleDecl &MD); + bool loadExternModuleDecl(const modulemap::ExternModuleDecl &EMD); + bool loadModuleMapFile(const modulemap::ModuleMapFile &MMF); }; } // namespace clang @@ -1660,7 +1740,11 @@ void ModuleMapLoader::handleModuleDecl(const modulemap::ModuleDecl &MD) { Map.LangOpts.CurrentModule == ModuleName && SourceMgr.getDecomposedLoc(ModuleNameLoc).first != SourceMgr.getDecomposedLoc(Existing->DefinitionLoc).first; - if (LoadedFromASTFile || Inferred || PartOfFramework || ParsedAsMainInput) { + // TODO: Remove this check when we can avoid loading module maps multiple + // times. + bool SameModuleDecl = ModuleNameLoc == Existing->DefinitionLoc; + if (LoadedFromASTFile || Inferred || PartOfFramework || ParsedAsMainInput || + SameModuleDecl) { ActiveModule = PreviousActiveModule; // Skip the module definition. return; @@ -2103,7 +2187,18 @@ void ModuleMapLoader::handleInferredModuleDecl( } } -bool ModuleMapLoader::loadModuleMapFile() { +bool ModuleMapLoader::loadModuleDecl(const modulemap::ModuleDecl &MD) { + handleModuleDecl(MD); + return HadError; +} + +bool ModuleMapLoader::loadExternModuleDecl( + const modulemap::ExternModuleDecl &EMD) { + handleExternModuleDecl(EMD); + return HadError; +} + +bool ModuleMapLoader::loadModuleMapFile(const modulemap::ModuleMapFile &MMF) { for (const auto &Decl : MMF.Decls) { std::visit( llvm::makeVisitor( @@ -2116,6 +2211,28 @@ bool ModuleMapLoader::loadModuleMapFile() { return HadError; } +Module *ModuleMap::findOrLoadModule(StringRef Name) { + llvm::StringMap<Module *>::const_iterator Known = Modules.find(Name); + if (Known != Modules.end()) + return Known->getValue(); + + auto ParsedMod = ParsedModules.find(Name); + if (ParsedMod == ParsedModules.end()) + return nullptr; + + Diags.Report(diag::remark_mmap_load_module) << Name; + + for (const auto &ModuleDecl : ParsedMod->second) { + const modulemap::ModuleMapFile &MMF = *ModuleDecl.first; + ModuleMapLoader Loader(SourceMgr, Diags, const_cast<ModuleMap &>(*this), + MMF.ID, *MMF.Dir, MMF.IsSystem); + if (Loader.loadModuleDecl(*ModuleDecl.second)) + return nullptr; + } + + return findModule(Name); +} + bool ModuleMap::loadModuleMapFile(FileEntryRef File, bool IsSystem, DirectoryEntryRef Dir, FileID ID, unsigned *Offset, @@ -2128,9 +2245,16 @@ bool ModuleMap::loadModuleMapFile(FileEntryRef File, bool IsSystem, // If the module map file wasn't already entered, do so now. if (ID.isInvalid()) { - auto FileCharacter = - IsSystem ? SrcMgr::C_System_ModuleMap : SrcMgr::C_User_ModuleMap; - ID = SourceMgr.createFileID(File, ExternModuleLoc, FileCharacter); + ID = SourceMgr.translateFile(File); + // TODO: The way we compute affecting module maps requires this to be a + // local FileID. This should be changed to reuse loaded FileIDs when + // available, and change the way that affecting module maps are + // computed to not require this. + if (ID.isInvalid() || SourceMgr.isLoadedFileID(ID)) { + auto FileCharacter = + IsSystem ? SrcMgr::C_System_ModuleMap : SrcMgr::C_User_ModuleMap; + ID = SourceMgr.createFileID(File, ExternModuleLoc, FileCharacter); + } } assert(Target && "Missing target information"); @@ -2144,8 +2268,9 @@ bool ModuleMap::loadModuleMapFile(FileEntryRef File, bool IsSystem, modulemap::parseModuleMap(ID, Dir, SourceMgr, Diags, IsSystem, Offset); bool Result = false; if (MMF) { - ModuleMapLoader Loader(*MMF, SourceMgr, Diags, *this, ID, Dir, IsSystem); - Result = Loader.loadModuleMapFile(); + Diags.Report(diag::remark_mmap_load) << File.getName(); + ModuleMapLoader Loader(SourceMgr, Diags, *this, ID, Dir, IsSystem); + Result = Loader.loadModuleMapFile(*MMF); } LoadedModuleMap[File] = Result; diff --git a/clang/lib/Lex/ModuleMapFile.cpp b/clang/lib/Lex/ModuleMapFile.cpp index 5cf4a4c3d96c1..f457de85243cc 100644 --- a/clang/lib/Lex/ModuleMapFile.cpp +++ b/clang/lib/Lex/ModuleMapFile.cpp @@ -169,7 +169,10 @@ modulemap::parseModuleMap(FileID ID, clang::DirectoryEntryRef Dir, if (Failed) return std::nullopt; + Parser.MMF.ID = ID; + Parser.MMF.Dir = Dir; Parser.MMF.Start = Start; + Parser.MMF.IsSystem = IsSystem; return std::move(Parser.MMF); } diff --git a/clang/lib/Sema/SemaModule.cpp b/clang/lib/Sema/SemaModule.cpp index 76589bff40be9..7549d09e9fb27 100644 --- a/clang/lib/Sema/SemaModule.cpp +++ b/clang/lib/Sema/SemaModule.cpp @@ -393,7 +393,7 @@ Sema::ActOnModuleDecl(SourceLocation StartLoc, SourceLocation ModuleLoc, case ModuleDeclKind::PartitionInterface: { // We can't have parsed or imported a definition of this module or parsed a // module map defining it already. - if (auto *M = Map.findModule(ModuleName)) { + if (auto *M = Map.findOrLoadModule(ModuleName)) { Diag(Path[0].second, diag::err_module_redefinition) << ModuleName; if (M->DefinitionLoc.isValid()) Diag(M->DefinitionLoc, diag::note_prev_module_definition); diff --git a/clang/test/Modules/Inputs/shadow/A1/A1.h b/clang/test/Modules/Inputs/shadow/A1/A1.h new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/clang/test/Modules/Inputs/shadow/A1/module.modulemap b/clang/test/Modules/Inputs/shadow/A1/module.modulemap index 9439a431b1dbe..3a47280776ae2 100644 --- a/clang/test/Modules/Inputs/shadow/A1/module.modulemap +++ b/clang/test/Modules/Inputs/shadow/A1/module.modulemap @@ -2,4 +2,6 @@ module A { header "A.h" } -module A1 {} +module A1 { + header "A1.h" +} diff --git a/clang/test/Modules/Inputs/shadow/A2/A2.h b/clang/test/Modules/Inputs/shadow/A2/A2.h new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/clang/test/Modules/Inputs/shadow/A2/module.modulemap b/clang/test/Modules/Inputs/shadow/A2/module.modulemap index 935d89bb425e0..9e6fe6448ead8 100644 --- a/clang/test/Modules/Inputs/shadow/A2/module.modulemap +++ b/clang/test/Modules/Inputs/shadow/A2/module.modulemap @@ -2,4 +2,6 @@ module A { header "A.h" } -module A2 {} +module A2 { + header "A2.h" +} diff --git a/clang/test/Modules/lazy-by-name-lookup.c b/clang/test/Modules/lazy-by-name-lookup.c new file mode 100644 index 0000000000000..11a3a5cda709d --- /dev/null +++ b/clang/test/Modules/lazy-by-name-lookup.c @@ -0,0 +1,31 @@ +// RUN: rm -rf %t +// RUN: split-file %s %t +// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -I%t \ +// RUN: -fmodules-cache-path=%t/cache %t/tu.c -fsyntax-only -Rmodule-map \ +// RUN: -verify + +//--- module.modulemap + +module A { + header "A.h" +} + +module B { + header "B.h" +} + +//--- A.h + +//--- B.h + +//--- tu.c + +#pragma clang __debug module_lookup A // does module map search for A +#pragma clang __debug module_map A // A is now in the ModuleMap, +#pragma clang __debug module_map B // expected-warning{{unknown module 'B'}} + // but B isn't. +#include <B.h> // Now load B via header search + +// expected-remark@*{{parsing modulemap}} +// expected-remark@*{{loading parsed module 'A'}} +// expected-remark@*{{loading modulemap}} \ No newline at end of file diff --git a/clang/test/Modules/shadow.m b/clang/test/Modules/shadow.m index 44320af2b0c66..c45d0185d4d80 100644 --- a/clang/test/Modules/shadow.m +++ b/clang/test/Modules/shadow.m @@ -1,13 +1,14 @@ // RUN: rm -rf %t -// RUN: not %clang_cc1 -fmodules -fimplicit-module-maps -fmodules-cache-path=%t -I %S/Inputs/shadow/A1 -I %S/Inputs/shadow/A2 %s -fsyntax-only 2>&1 | FileCheck %s -check-prefix=REDEFINITION -// RUN: not %clang_cc1 -fmodules -fimplicit-module-maps -fmodules-cache-path=%t -fmodule-map-file=%S/Inputs/shadow/A1/module.modulemap -fmodule-map-file=%S/Inputs/shadow/A2/module.modulemap %s -fsyntax-only 2>&1 | FileCheck %s -check-prefix=REDEFINITION +// RUN: not %clang_cc1 -fmodules -fimplicit-module-maps -fmodules-cache-path=%t -I %S/Inputs/shadow/A1 -I %S/Inputs/shadow/A2 -I %S/Inputs/shadow %s -fsyntax-only 2>&1 | FileCheck %s -check-prefix=REDEFINITION +// RUN: not %clang_cc1 -fmodules -fimplicit-module-maps -fmodules-cache-path=%t -fmodule-map-file=%S/Inputs/shadow/A1/module.modulemap -fmodule-map-file=%S/Inputs/shadow/A2/module.modulemap %S/Inputs/shadow %s -fsyntax-only 2>&1 | FileCheck %s -check-prefix=REDEFINITION // REDEFINITION: error: redefinition of module 'A' // REDEFINITION: note: previously defined -// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -fmodules-cache-path=%t -fmodule-map-file=%S/Inputs/shadow/A1/module.modulemap -I %S/Inputs/shadow %s -verify +// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -fmodules-cache-path=%t -x objective-c-header %S/Inputs/shadow/A1/module.modulemap -emit-module -o %t/A.pcm -fmodule-name=A +// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -fmodules-cache-path=%t -fmodule-map-file=%S/Inputs/shadow/A1/module.modulemap -fmodule-file=A=%t/A.pcm -I %S/Inputs/shadow %s -verify -@import A1; -@import A2; +#import "A1/A1.h" +#import "A2/A2.h" @import A; #import "A2/A.h" // expected-note {{implicitly imported}} >From 4aaf889f2281c801fd299856958e6134a83c5404 Mon Sep 17 00:00:00 2001 From: Michael Spencer <bigchees...@gmail.com> Date: Wed, 26 Mar 2025 14:48:19 -0700 Subject: [PATCH 2/2] [llvm][clang] Allocate a new stack instead of spawning a new thread to get more stack space Clang spawns a new thread to avoid running out of stack space. This can make debugging and performance analysis more difficult as how the threads are connected is difficult to recover. This patch introduces `runOnNewStack` and applies it in Clang. On platforms that have good support for it this allocates a new stack and moves to it using assembly. Doing split stacks like this actually runs on most platforms, but many debuggers and unwinders reject the large or backwards stack offsets that occur. Apple platforms and tools are known to support this, so this only enables it there for now. --- clang/docs/ReleaseNotes.rst | 4 + clang/include/clang/Basic/Stack.h | 5 +- clang/lib/Basic/Stack.cpp | 40 ++---- clang/lib/Frontend/CompilerInstance.cpp | 2 +- .../llvm/Support/CrashRecoveryContext.h | 3 + llvm/include/llvm/Support/ProgramStack.h | 45 +++++++ llvm/lib/Support/CMakeLists.txt | 1 + llvm/lib/Support/CrashRecoveryContext.cpp | 19 +++ llvm/lib/Support/ProgramStack.cpp | 123 ++++++++++++++++++ llvm/unittests/Support/CMakeLists.txt | 1 + llvm/unittests/Support/ProgramStackTest.cpp | 29 +++++ 11 files changed, 242 insertions(+), 30 deletions(-) create mode 100644 llvm/include/llvm/Support/ProgramStack.h create mode 100644 llvm/lib/Support/ProgramStack.cpp create mode 100644 llvm/unittests/Support/ProgramStackTest.cpp diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 04ec2cfef679c..2f6bf1c5ebf87 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -158,6 +158,10 @@ Non-comprehensive list of changes in this release - Support parsing the `cc` operand modifier and alias it to the `c` modifier (#GH127719). - Added `__builtin_elementwise_exp10`. +- Clang itself now uses split stacks instead of threads for allocating more + stack space when running on Apple AArch64 based platforms. This means that + stack traces of Clang from debuggers, crashes, and profilers may look + different than before. New Compiler Flags ------------------ diff --git a/clang/include/clang/Basic/Stack.h b/clang/include/clang/Basic/Stack.h index 30ebd94aedd1f..9674b9d9b62c3 100644 --- a/clang/include/clang/Basic/Stack.h +++ b/clang/include/clang/Basic/Stack.h @@ -27,7 +27,10 @@ namespace clang { /// Call this once on each thread, as soon after starting the thread as /// feasible, to note the approximate address of the bottom of the stack. - void noteBottomOfStack(); + /// + /// \param ForceSet set to true if you know the call is near the bottom of a + /// new stack. Used for split stacks. + void noteBottomOfStack(bool ForceSet = false); /// Determine whether the stack is nearly exhausted. bool isStackNearlyExhausted(); diff --git a/clang/lib/Basic/Stack.cpp b/clang/lib/Basic/Stack.cpp index aa15d8e66950f..8cbb84943f8d3 100644 --- a/clang/lib/Basic/Stack.cpp +++ b/clang/lib/Basic/Stack.cpp @@ -13,33 +13,13 @@ #include "clang/Basic/Stack.h" #include "llvm/Support/CrashRecoveryContext.h" +#include "llvm/Support/ProgramStack.h" -#ifdef _MSC_VER -#include <intrin.h> // for _AddressOfReturnAddress -#endif +static LLVM_THREAD_LOCAL uintptr_t BottomOfStack = 0; -static LLVM_THREAD_LOCAL void *BottomOfStack = nullptr; - -static void *getStackPointer() { -#if __GNUC__ || __has_builtin(__builtin_frame_address) - return __builtin_frame_address(0); -#elif defined(_MSC_VER) - return _AddressOfReturnAddress(); -#else - char CharOnStack = 0; - // The volatile store here is intended to escape the local variable, to - // prevent the compiler from optimizing CharOnStack into anything other - // than a char on the stack. - // - // Tested on: MSVC 2015 - 2019, GCC 4.9 - 9, Clang 3.2 - 9, ICC 13 - 19. - char *volatile Ptr = &CharOnStack; - return Ptr; -#endif -} - -void clang::noteBottomOfStack() { - if (!BottomOfStack) - BottomOfStack = getStackPointer(); +void clang::noteBottomOfStack(bool ForceSet) { + if (!BottomOfStack || ForceSet) + BottomOfStack = llvm::getStackPointer(); } bool clang::isStackNearlyExhausted() { @@ -51,7 +31,8 @@ bool clang::isStackNearlyExhausted() { if (!BottomOfStack) return false; - intptr_t StackDiff = (intptr_t)getStackPointer() - (intptr_t)BottomOfStack; + intptr_t StackDiff = + (intptr_t)llvm::getStackPointer() - (intptr_t)BottomOfStack; size_t StackUsage = (size_t)std::abs(StackDiff); // If the stack pointer has a surprising value, we do not understand this @@ -66,9 +47,12 @@ bool clang::isStackNearlyExhausted() { void clang::runWithSufficientStackSpaceSlow(llvm::function_ref<void()> Diag, llvm::function_ref<void()> Fn) { llvm::CrashRecoveryContext CRC; - CRC.RunSafelyOnThread([&] { - noteBottomOfStack(); + // Preserve the BottomOfStack in case RunSafelyOnNewStack uses split stacks. + uintptr_t PrevBottom = BottomOfStack; + CRC.RunSafelyOnNewStack([&] { + noteBottomOfStack(true); Diag(); Fn(); }, DesiredStackSize); + BottomOfStack = PrevBottom; } diff --git a/clang/lib/Frontend/CompilerInstance.cpp b/clang/lib/Frontend/CompilerInstance.cpp index a8c8f37306572..348342eed9eaa 100644 --- a/clang/lib/Frontend/CompilerInstance.cpp +++ b/clang/lib/Frontend/CompilerInstance.cpp @@ -1276,7 +1276,7 @@ compileModuleImpl(CompilerInstance &ImportingInstance, SourceLocation ImportLoc, // Execute the action to actually build the module in-place. Use a separate // thread so that we get a stack large enough. - bool Crashed = !llvm::CrashRecoveryContext().RunSafelyOnThread( + bool Crashed = !llvm::CrashRecoveryContext().RunSafelyOnNewStack( [&]() { GenerateModuleFromModuleMapAction Action; Instance.ExecuteAction(Action); diff --git a/llvm/include/llvm/Support/CrashRecoveryContext.h b/llvm/include/llvm/Support/CrashRecoveryContext.h index 26ddf97b3ef02..31293d6715757 100644 --- a/llvm/include/llvm/Support/CrashRecoveryContext.h +++ b/llvm/include/llvm/Support/CrashRecoveryContext.h @@ -97,6 +97,9 @@ class CrashRecoveryContext { return RunSafelyOnThread([&]() { Fn(UserData); }, RequestedStackSize); } + bool RunSafelyOnNewStack(function_ref<void()>, + unsigned RequestedStackSize = 0); + /// Explicitly trigger a crash recovery in the current process, and /// return failure from RunSafely(). This function does not return. [[noreturn]] void HandleExit(int RetCode); diff --git a/llvm/include/llvm/Support/ProgramStack.h b/llvm/include/llvm/Support/ProgramStack.h new file mode 100644 index 0000000000000..cc8fe98d7a8d1 --- /dev/null +++ b/llvm/include/llvm/Support/ProgramStack.h @@ -0,0 +1,45 @@ +//===--- ProgramStack.h -----------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_SUPPORT_PROGRAMSTACK_H +#define LLVM_SUPPORT_PROGRAMSTACK_H + +#include "llvm/ADT/STLFunctionalExtras.h" + +namespace llvm { + +/// \returns an address close to the current value of the stack pointer. +/// +/// The value is not guaranteed to point to anything specific. It can be used to +/// estimate how much stack space has been used since the previous call. +uintptr_t getStackPointer(); + +/// \returns the default stack size for this platform. +/// +/// Based on \p RLIMIT_STACK or the equivalent. +unsigned getDefaultStackSize(); + +/// Runs Fn on a new stack of at least the given size. +/// +/// \param StackSize requested stack size. A size of 0 uses the default stack +/// size of the platform. +/// +/// The preferred implementation is split stacks on platforms that have a good +/// debugging experience for them. On other platforms a new thread is used. +void runOnNewStack(unsigned StackSize, function_ref<void()> Fn); + +template <typename R, typename... Ts> +R runOnNewStack(unsigned StackSize, function_ref<R(Ts...)> Fn, Ts &&...Args) { + std::optional<R> Ret; + runOnNewStack(StackSize, [&]() { Ret = Fn(std::forward<Ts>(Args)...); }); + return std::move(*Ret); +} + +} // namespace llvm + +#endif // LLVM_SUPPORT_PROGRAMSTACK_H diff --git a/llvm/lib/Support/CMakeLists.txt b/llvm/lib/Support/CMakeLists.txt index 2754c97fce6c1..8e4503a1fc84f 100644 --- a/llvm/lib/Support/CMakeLists.txt +++ b/llvm/lib/Support/CMakeLists.txt @@ -294,6 +294,7 @@ add_llvm_component_library(LLVMSupport Path.cpp Process.cpp Program.cpp + ProgramStack.cpp RWMutex.cpp Signals.cpp Threading.cpp diff --git a/llvm/lib/Support/CrashRecoveryContext.cpp b/llvm/lib/Support/CrashRecoveryContext.cpp index f53aea177d612..ca0c8744a398c 100644 --- a/llvm/lib/Support/CrashRecoveryContext.cpp +++ b/llvm/lib/Support/CrashRecoveryContext.cpp @@ -10,6 +10,7 @@ #include "llvm/Config/llvm-config.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/ExitCodes.h" +#include "llvm/Support/ProgramStack.h" #include "llvm/Support/Signals.h" #include "llvm/Support/thread.h" #include <cassert> @@ -523,3 +524,21 @@ bool CrashRecoveryContext::RunSafelyOnThread(function_ref<void()> Fn, CRC->setSwitchedThread(); return Info.Result; } + +bool CrashRecoveryContext::RunSafelyOnNewStack(function_ref<void()> Fn, + unsigned RequestedStackSize) { + // If crash recovery is disabled, do nothing. + if (gCrashRecoveryEnabled) { + assert(!Impl && "Crash recovery context already initialized!"); + CrashRecoveryContextImpl *CRCI = new CrashRecoveryContextImpl(this); + Impl = CRCI; + + CRCI->ValidJumpBuffer = true; + if (setjmp(CRCI->JumpBuffer) != 0) { + return false; + } + } + + runOnNewStack(RequestedStackSize, Fn); + return true; +} diff --git a/llvm/lib/Support/ProgramStack.cpp b/llvm/lib/Support/ProgramStack.cpp new file mode 100644 index 0000000000000..cc4a356d63918 --- /dev/null +++ b/llvm/lib/Support/ProgramStack.cpp @@ -0,0 +1,123 @@ +//===--- RunOnNewStack.cpp - Crash Recovery -------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/ProgramStack.h" +#include "llvm/Config/config.h" +#include "llvm/Support/Compiler.h" + +#ifdef LLVM_ON_UNIX +# include <sys/resource.h> // for getrlimit +#endif + +#ifdef _MSC_VER +# include <intrin.h> // for _AddressOfReturnAddress +#endif + +// Currently only Apple AArch64 is known to support split stacks in the debugger +// and other tooling. +#if defined(__APPLE__) && defined(__aarch64__) && \ + LLVM_HAS_CPP_ATTRIBUTE(gnu::naked) && __has_extension(gnu_asm) +# define LLVM_HAS_SPLIT_STACKS +# define LLVM_HAS_SPLIT_STACKS_AARCH64 +#include <sys/mman.h> +#endif + +#ifndef LLVM_HAS_SPLIT_STACKS +# include "llvm/Support/thread.h" +#endif + +using namespace llvm; + +uintptr_t llvm::getStackPointer() { +#if __GNUC__ || __has_builtin(__builtin_frame_address) + return (uintptr_t)__builtin_frame_address(0); +#elif defined(_MSC_VER) + return (uintptr_t)_AddressOfReturnAddress(); +#else + volatile char CharOnStack = 0; + // The volatile store here is intended to escape the local variable, to + // prevent the compiler from optimizing CharOnStack into anything other + // than a char on the stack. + // + // Tested on: MSVC 2015 - 2019, GCC 4.9 - 9, Clang 3.2 - 9, ICC 13 - 19. + char *volatile Ptr = &CharOnStack; + return (uintptr_t)Ptr; +#endif +} + +unsigned llvm::getDefaultStackSize() { +#ifdef LLVM_ON_UNIX + rlimit RL; + getrlimit(RLIMIT_STACK, &RL); + return RL.rlim_cur; +#else + // Clang recursively parses, instantiates templates, and evaluates constant + // expressions. We've found 8MiB to be a reasonable stack size given the way + // Clang works and the way C++ is commonly written. + return 8 << 20; +#endif +} + +namespace { +#ifdef LLVM_HAS_SPLIT_STACKS_AARCH64 +[[gnu::naked]] void runOnNewStackImpl(void *Stack, void (*Fn)(void *), + void *Ctx) { + __asm__ volatile( + "mov x16, sp\n\t" + "sub x0, x0, #0x20\n\t" // subtract space from stack + "stp xzr, x16, [x0, #0x00]\n\t" // save old sp + "stp x29, x30, [x0, #0x10]\n\t" // save fp, lr + "mov sp, x0\n\t" // switch to new stack + "add x29, x0, #0x10\n\t" // switch to new frame + ".cfi_def_cfa w29, 16\n\t" + ".cfi_offset w30, -8\n\t" // lr + ".cfi_offset w29, -16\n\t" // fp + + "mov x0, x2\n\t" // Ctx is the only argument + "blr x1\n\t" // call Fn + + "ldp x29, x30, [sp, #0x10]\n\t" // restore fp, lr + "ldp xzr, x16, [sp, #0x00]\n\t" // load old sp + "mov sp, x16\n\t" + "ret" + ); +} +#endif + +#ifdef LLVM_HAS_SPLIT_STACKS +void callback(void *Ctx) { + (*reinterpret_cast<function_ref<void()> *>(Ctx))(); +} +#endif +} // namespace + +#ifdef LLVM_HAS_SPLIT_STACKS +void llvm::runOnNewStack(unsigned StackSize, function_ref<void()> Fn) { + if (StackSize == 0) + StackSize = getDefaultStackSize(); + + // We use malloc here instead of mmap because: + // - it's simpler, + // - many malloc implementations will reuse the allocation in cases where + // we're bouncing accross the edge of a stack boundry, and + // - many malloc implemenations will already provide guard pages for + // allocations this large. + void *Stack = malloc(StackSize); + void *BottomOfStack = (char *)Stack + StackSize; + + runOnNewStackImpl(BottomOfStack, callback, &Fn); + + free(Stack); +} +#else +void llvm::runOnNewStack(unsigned StackSize, function_ref<void()> Fn) { + llvm::thread Thread( + StackSize == 0 ? std::nullopt : std::optional<unsigned>(StackSize), Fn); + Thread.join(); +} +#endif diff --git a/llvm/unittests/Support/CMakeLists.txt b/llvm/unittests/Support/CMakeLists.txt index 6c4e7cb689b20..e5bf820fb4d1c 100644 --- a/llvm/unittests/Support/CMakeLists.txt +++ b/llvm/unittests/Support/CMakeLists.txt @@ -70,6 +70,7 @@ add_llvm_unittest(SupportTests PerThreadBumpPtrAllocatorTest.cpp ProcessTest.cpp ProgramTest.cpp + ProgramStackTest.cpp RecyclerTest.cpp RegexTest.cpp ReverseIterationTest.cpp diff --git a/llvm/unittests/Support/ProgramStackTest.cpp b/llvm/unittests/Support/ProgramStackTest.cpp new file mode 100644 index 0000000000000..1b4a071739139 --- /dev/null +++ b/llvm/unittests/Support/ProgramStackTest.cpp @@ -0,0 +1,29 @@ +//===- unittest/Support/ProgramStackTest.cpp ------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/ProgramStack.h" +#include "llvm/Support/Process.h" +#include "gtest/gtest.h" + +using namespace llvm; + +static uintptr_t func(int &A) { + A = 7; + return getStackPointer(); +} + +TEST(ProgramStackTest, runOnNewStack) { + int A = 0; + uintptr_t Stack = runOnNewStack(0, function_ref<uintptr_t(int &)>(func), A); + EXPECT_EQ(A, 7); + intptr_t StackDiff = (intptr_t)llvm::getStackPointer() - (intptr_t)Stack; + size_t StackDistance = (size_t)std::abs(StackDiff); + // Page size is used as it's large enough to guarantee were not on the same + // stack but not too large to cause spurious failures. + EXPECT_GT(StackDistance, llvm::sys::Process::getPageSizeEstimate()); +} _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits