Author: Jan Svoboda Date: 2024-10-28T11:47:59-07:00 New Revision: 6c6351ee350589c8e6bcd69c3255374a714d87d0
URL: https://github.com/llvm/llvm-project/commit/6c6351ee350589c8e6bcd69c3255374a714d87d0 DIFF: https://github.com/llvm/llvm-project/commit/6c6351ee350589c8e6bcd69c3255374a714d87d0.diff LOG: [clang][modules] Optimize construction and usage of the submodule index (#113391) This patch avoids eagerly populating the submodule index on `Module` construction. The `StringMap` allocation shows up in my profiles of `clang-scan-deps`, while the index is not necessary most of the time. We still construct it on-demand. Moreover, this patch avoids performing qualified submodule lookup in `ASTReader` whenever we're serializing a module graph whose top-level module is unknown. This is pointless, since that's guaranteed to never find any existing submodules anyway. This speeds up `clang-scan-deps` by ~0.5% on my workload. Added: Modified: clang/include/clang/Basic/Module.h clang/include/clang/Lex/ModuleMap.h clang/lib/Basic/Module.cpp clang/lib/Lex/ModuleMap.cpp clang/lib/Serialization/ASTReader.cpp Removed: ################################################################################ diff --git a/clang/include/clang/Basic/Module.h b/clang/include/clang/Basic/Module.h index 1ab3b5e5f81567..dd384c1d76c5fd 100644 --- a/clang/include/clang/Basic/Module.h +++ b/clang/include/clang/Basic/Module.h @@ -227,7 +227,7 @@ class alignas(8) Module { /// A mapping from the submodule name to the index into the /// \c SubModules vector at which that submodule resides. - llvm::StringMap<unsigned> SubModuleIndex; + mutable llvm::StringMap<unsigned> SubModuleIndex; /// The AST file if this is a top-level module which has a /// corresponding serialized AST file, or null otherwise. @@ -612,7 +612,6 @@ class alignas(8) Module { void setParent(Module *M) { assert(!Parent); Parent = M; - Parent->SubModuleIndex[Name] = Parent->SubModules.size(); Parent->SubModules.push_back(this); } diff --git a/clang/include/clang/Lex/ModuleMap.h b/clang/include/clang/Lex/ModuleMap.h index 75b567a347cb6c..5ee152e4213abf 100644 --- a/clang/include/clang/Lex/ModuleMap.h +++ b/clang/include/clang/Lex/ModuleMap.h @@ -541,11 +541,14 @@ class ModuleMap { /// /// \param IsExplicit Whether this is an explicit submodule. /// - /// \returns The found or newly-created module, along with a boolean value - /// that will be true if the module is newly-created. - std::pair<Module *, bool> findOrCreateModule(StringRef Name, Module *Parent, - bool IsFramework, - bool IsExplicit); + /// \returns The found or newly-created module. + Module *findOrCreateModule(StringRef Name, Module *Parent, bool IsFramework, + bool IsExplicit); + /// Create new submodule, assuming it does not exist. This function can only + /// be called when it is guaranteed that this submodule does not exist yet. + /// The parameters have same semantics as \c ModuleMap::findOrCreateModule. + Module *createModule(StringRef Name, Module *Parent, bool IsFramework, + bool IsExplicit); /// Create a global module fragment for a C++ module unit. /// diff --git a/clang/lib/Basic/Module.cpp b/clang/lib/Basic/Module.cpp index a7a3f6b37efef1..330108d5b3e47f 100644 --- a/clang/lib/Basic/Module.cpp +++ b/clang/lib/Basic/Module.cpp @@ -54,7 +54,6 @@ Module::Module(ModuleConstructorTag, StringRef Name, NoUndeclaredIncludes = Parent->NoUndeclaredIncludes; ModuleMapIsPrivate = Parent->ModuleMapIsPrivate; - Parent->SubModuleIndex[Name] = Parent->SubModules.size(); Parent->SubModules.push_back(this); } } @@ -351,11 +350,14 @@ void Module::markUnavailable(bool Unimportable) { } Module *Module::findSubmodule(StringRef Name) const { - llvm::StringMap<unsigned>::const_iterator Pos = SubModuleIndex.find(Name); - if (Pos == SubModuleIndex.end()) - return nullptr; + // Add new submodules into the index. + for (unsigned I = SubModuleIndex.size(), E = SubModules.size(); I != E; ++I) + SubModuleIndex[SubModules[I]->Name] = I; - return SubModules[Pos->getValue()]; + if (auto It = SubModuleIndex.find(Name); It != SubModuleIndex.end()) + return SubModules[It->second]; + + return nullptr; } Module *Module::getGlobalModuleFragment() const { diff --git a/clang/lib/Lex/ModuleMap.cpp b/clang/lib/Lex/ModuleMap.cpp index 201ab91cf68ca1..10774429a2177b 100644 --- a/clang/lib/Lex/ModuleMap.cpp +++ b/clang/lib/Lex/ModuleMap.cpp @@ -655,8 +655,8 @@ ModuleMap::findOrCreateModuleForHeaderInUmbrellaDir(FileEntryRef File) { SmallString<32> NameBuf; StringRef Name = sanitizeFilenameAsIdentifier( llvm::sys::path::stem(SkippedDir.getName()), NameBuf); - Result = findOrCreateModule(Name, Result, /*IsFramework=*/false, - Explicit).first; + Result = + findOrCreateModule(Name, Result, /*IsFramework=*/false, Explicit); setInferredModuleAllowedBy(Result, UmbrellaModuleMap); // Associate the module and the directory. @@ -672,8 +672,8 @@ ModuleMap::findOrCreateModuleForHeaderInUmbrellaDir(FileEntryRef File) { SmallString<32> NameBuf; StringRef Name = sanitizeFilenameAsIdentifier( llvm::sys::path::stem(File.getName()), NameBuf); - Result = findOrCreateModule(Name, Result, /*IsFramework=*/false, - Explicit).first; + Result = + findOrCreateModule(Name, Result, /*IsFramework=*/false, Explicit); setInferredModuleAllowedBy(Result, UmbrellaModuleMap); Result->addTopHeader(File); @@ -857,15 +857,21 @@ Module *ModuleMap::lookupModuleQualified(StringRef Name, Module *Context) const{ return Context->findSubmodule(Name); } -std::pair<Module *, bool> ModuleMap::findOrCreateModule(StringRef Name, - Module *Parent, - bool IsFramework, - bool IsExplicit) { +Module *ModuleMap::findOrCreateModule(StringRef Name, Module *Parent, + bool IsFramework, bool IsExplicit) { // Try to find an existing module with this name. if (Module *Sub = lookupModuleQualified(Name, Parent)) - return std::make_pair(Sub, false); + return Sub; // Create a new module with this name. + return createModule(Name, Parent, IsFramework, IsExplicit); +} + +Module *ModuleMap::createModule(StringRef Name, Module *Parent, + bool IsFramework, bool IsExplicit) { + assert(lookupModuleQualified(Name, Parent) == nullptr && + "Creating duplicate submodule"); + Module *Result = new (ModulesAlloc.Allocate()) Module(ModuleConstructorTag{}, Name, SourceLocation(), Parent, IsFramework, IsExplicit, NumCreatedModules++); @@ -875,7 +881,7 @@ std::pair<Module *, bool> ModuleMap::findOrCreateModule(StringRef Name, Modules[Name] = Result; ModuleScopeIDs[Result] = CurrentModuleScopeID; } - return std::make_pair(Result, true); + return Result; } Module *ModuleMap::createGlobalModuleFragmentForModuleUnit(SourceLocation Loc, @@ -2124,8 +2130,7 @@ void ModuleMapParser::parseModuleDecl() { Map.createShadowedModule(ModuleName, Framework, ShadowingModule); } else { ActiveModule = - Map.findOrCreateModule(ModuleName, ActiveModule, Framework, Explicit) - .first; + Map.findOrCreateModule(ModuleName, ActiveModule, Framework, Explicit); } ActiveModule->DefinitionLoc = ModuleNameLoc; diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp index 2419ed84e68acf..74a79ac54bb4eb 100644 --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -5756,6 +5756,13 @@ llvm::Error ASTReader::ReadSubmoduleBlock(ModuleFile &F, return Err; ModuleMap &ModMap = PP.getHeaderSearchInfo().getModuleMap(); + bool KnowsTopLevelModule = ModMap.findModule(F.ModuleName) != nullptr; + // If we don't know the top-level module, there's no point in doing qualified + // lookup of its submodules; it won't find anything anywhere within this tree. + // Let's skip that and avoid some string lookups. + auto CreateModule = !KnowsTopLevelModule ? &ModuleMap::createModule + : &ModuleMap::findOrCreateModule; + bool First = true; Module *CurrentModule = nullptr; RecordData Record; @@ -5829,11 +5836,8 @@ llvm::Error ASTReader::ReadSubmoduleBlock(ModuleFile &F, if (Parent) ParentModule = getSubmodule(Parent); - // Retrieve this (sub)module from the module map, creating it if - // necessary. - CurrentModule = - ModMap.findOrCreateModule(Name, ParentModule, IsFramework, IsExplicit) - .first; + CurrentModule = std::invoke(CreateModule, &ModMap, Name, ParentModule, + IsFramework, IsExplicit); SubmoduleID GlobalIndex = GlobalID - NUM_PREDEF_SUBMODULE_IDS; if (GlobalIndex >= SubmodulesLoaded.size() || _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits