https://github.com/jansvoboda11 created https://github.com/llvm/llvm-project/pull/89992
None >From e16becb9357fd560a1c9c673389e6dbba02e024c Mon Sep 17 00:00:00 2001 From: Jan Svoboda <jan_svob...@apple.com> Date: Wed, 24 Apr 2024 11:12:28 -0700 Subject: [PATCH] [clang][modules] Allow including module maps to be non-affecting --- .../include/clang/Serialization/ASTBitCodes.h | 4 +- clang/include/clang/Serialization/ASTWriter.h | 9 ++ .../include/clang/Serialization/ModuleFile.h | 1 + clang/lib/Serialization/ASTReader.cpp | 5 +- clang/lib/Serialization/ASTWriter.cpp | 109 ++++++++++++------ .../DependencyScanning/ModuleDepCollector.cpp | 2 +- .../ClangScanDeps/modules-extern-unrelated.m | 17 +-- 7 files changed, 97 insertions(+), 50 deletions(-) diff --git a/clang/include/clang/Serialization/ASTBitCodes.h b/clang/include/clang/Serialization/ASTBitCodes.h index dcfa4ac0c19677..9e90db513fde44 100644 --- a/clang/include/clang/Serialization/ASTBitCodes.h +++ b/clang/include/clang/Serialization/ASTBitCodes.h @@ -41,7 +41,7 @@ namespace serialization { /// Version 4 of AST files also requires that the version control branch and /// revision match exactly, since there is no backward compatibility of /// AST files at this time. -const unsigned VERSION_MAJOR = 30; +const unsigned VERSION_MAJOR = 31; /// AST file minor version number supported by this version of /// Clang. @@ -51,7 +51,7 @@ const unsigned VERSION_MAJOR = 30; /// for the previous version could still support reading the new /// version by ignoring new kinds of subblocks), this number /// should be increased. -const unsigned VERSION_MINOR = 1; +const unsigned VERSION_MINOR = 0; /// An ID number that refers to an identifier in an AST file. /// diff --git a/clang/include/clang/Serialization/ASTWriter.h b/clang/include/clang/Serialization/ASTWriter.h index 13b4ad4ad2953d..a9012837c30620 100644 --- a/clang/include/clang/Serialization/ASTWriter.h +++ b/clang/include/clang/Serialization/ASTWriter.h @@ -76,6 +76,10 @@ class StoredDeclsList; class SwitchCase; class Token; +namespace SrcMgr { +class FileInfo; +} // namespace SrcMgr + /// Writes an AST file containing the contents of a translation unit. /// /// The ASTWriter class produces a bitstream containing the serialized @@ -491,6 +495,11 @@ class ASTWriter : public ASTDeserializationListener, /// during \c SourceManager serialization. void computeNonAffectingInputFiles(); + /// Some affecting files can be included from files that are not considered + /// affecting. This function erases such source locations. + SourceLocation getAffectingIncludeLoc(const SourceManager &SourceMgr, + const SrcMgr::FileInfo &File); + /// Returns an adjusted \c FileID, accounting for any non-affecting input /// files. FileID getAdjustedFileID(FileID FID) const; diff --git a/clang/include/clang/Serialization/ModuleFile.h b/clang/include/clang/Serialization/ModuleFile.h index 492c35dceb08d4..84fb2e8b1e4842 100644 --- a/clang/include/clang/Serialization/ModuleFile.h +++ b/clang/include/clang/Serialization/ModuleFile.h @@ -69,6 +69,7 @@ struct InputFileInfo { bool Overridden; bool Transient; bool TopLevel; + bool TopLevelAmongAffecting; bool ModuleMap; }; diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp index 43b69045bb0543..2a726ee7fb4491 100644 --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -2444,9 +2444,10 @@ InputFileInfo ASTReader::getInputFileInfo(ModuleFile &F, unsigned ID) { R.Overridden = static_cast<bool>(Record[3]); R.Transient = static_cast<bool>(Record[4]); R.TopLevel = static_cast<bool>(Record[5]); - R.ModuleMap = static_cast<bool>(Record[6]); + R.TopLevelAmongAffecting = static_cast<bool>(Record[6]); + R.ModuleMap = static_cast<bool>(Record[7]); std::tie(R.FilenameAsRequested, R.Filename) = [&]() { - uint16_t AsRequestedLength = Record[7]; + uint16_t AsRequestedLength = Record[8]; std::string NameAsRequested = Blob.substr(0, AsRequestedLength).str(); std::string Name = Blob.substr(AsRequestedLength).str(); diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp index 30195868ca9965..aa235d9bd8d824 100644 --- a/clang/lib/Serialization/ASTWriter.cpp +++ b/clang/lib/Serialization/ASTWriter.cpp @@ -173,57 +173,53 @@ GetAffectingModuleMaps(const Preprocessor &PP, Module *RootModule) { const HeaderSearch &HS = PP.getHeaderSearchInfo(); const ModuleMap &MM = HS.getModuleMap(); - const SourceManager &SourceMgr = PP.getSourceManager(); std::set<const FileEntry *> ModuleMaps; - auto CollectIncludingModuleMaps = [&](FileID FID, FileEntryRef F) { - if (!ModuleMaps.insert(F).second) + std::set<const Module *> ProcessedModules; + auto CollectModuleMapsForHierarchy = [&](const Module *M) { + M = M->getTopLevelModule(); + + if (!ProcessedModules.insert(M).second) return; - SourceLocation Loc = SourceMgr.getIncludeLoc(FID); - // The include location of inferred module maps can point into the header - // file that triggered the inferring. Cut off the walk if that's the case. - while (Loc.isValid() && isModuleMap(SourceMgr.getFileCharacteristic(Loc))) { - FID = SourceMgr.getFileID(Loc); - F = *SourceMgr.getFileEntryRefForID(FID); - if (!ModuleMaps.insert(F).second) - break; - Loc = SourceMgr.getIncludeLoc(FID); - } - }; - std::set<const Module *> ProcessedModules; - auto CollectIncludingMapsFromAncestors = [&](const Module *M) { - for (const Module *Mod = M; Mod; Mod = Mod->Parent) { - if (!ProcessedModules.insert(Mod).second) - break; + std::queue<const Module *> Q; + Q.push(M); + while (!Q.empty()) { + const Module *Mod = Q.front(); + Q.pop(); + // The containing module map is affecting, because it's being pointed // into by Module::DefinitionLoc. - if (FileID FID = MM.getContainingModuleMapFileID(Mod); FID.isValid()) - CollectIncludingModuleMaps(FID, *SourceMgr.getFileEntryRefForID(FID)); + if (auto FE = MM.getContainingModuleMapFile(Mod); FE) + ModuleMaps.insert(*FE); // For inferred modules, the module map that allowed inferring is not in // the include chain of the virtual containing module map file. It did // affect the compilation, though. - if (FileID FID = MM.getModuleMapFileIDForUniquing(Mod); FID.isValid()) - CollectIncludingModuleMaps(FID, *SourceMgr.getFileEntryRefForID(FID)); + if (auto FE = MM.getModuleMapFileForUniquing(Mod); FE) + ModuleMaps.insert(*FE); + + for (auto *SubM : Mod->submodules()) + Q.push(SubM); } }; // Handle all the affecting modules referenced from the root module. + CollectModuleMapsForHierarchy(RootModule); + std::queue<const Module *> Q; Q.push(RootModule); while (!Q.empty()) { - const Module *CurrentModule = Q.front(); + const Module *RootSubmodule = Q.front(); Q.pop(); - CollectIncludingMapsFromAncestors(CurrentModule); - for (const Module *ImportedModule : CurrentModule->Imports) - CollectIncludingMapsFromAncestors(ImportedModule); - for (const Module *UndeclaredModule : CurrentModule->UndeclaredUses) - CollectIncludingMapsFromAncestors(UndeclaredModule); + for (const Module *ImportedModule : RootSubmodule->Imports) + CollectModuleMapsForHierarchy(ImportedModule); + for (const Module *UndeclaredModule : RootSubmodule->UndeclaredUses) + CollectModuleMapsForHierarchy(UndeclaredModule); - for (auto *M : CurrentModule->submodules()) - Q.push(M); + for (auto *SubM : RootSubmodule->submodules()) + Q.push(SubM); } // Handle textually-included headers that belong to other modules. @@ -249,9 +245,39 @@ GetAffectingModuleMaps(const Preprocessor &PP, Module *RootModule) { for (const auto &KH : HS.findResolvedModulesForHeader(*File)) if (const Module *M = KH.getModule()) - CollectIncludingMapsFromAncestors(M); + CollectModuleMapsForHierarchy(M); } + // Note: This algorithm doesn't handle all edge-cases. Let's say something + // from \c ModulesToProcess imports X. The algorithm doesn't pick up + // Y.modulemap as affecting in the following cases: + + //--- X.modulemap + // module X { header "X.h" } + // extern module Y "Y.modulemap + //--- Y.modulemap + // module Y { header "Y.h" } + // + // Since SourceManager doesn't keep list of files included from X.modulemap, + // we would need to walk up the include chain from all leaf files to figure + // out if they are reachable from X.modulemap and if so mark all module map + // files in the chain as affecting. + + //--- Y.modulemap + // module Y { header "Y.h" } + // extern module X "X.modulemap" + //--- X.modulemap + // module X { header "X.h" } + // module Y.Sub { header "Y_Sub.h" } + // + // Only considering X.modulemap affecting means that parsing it without having + // access to Y.modulemap fails. There is no way to find out this dependence + // since ModuleMap does not keep a mapping from a module map file to the + // modules it defines. We could always consider the including module map as + // affecting, but that is problematic on MacOS, where X might be something + // like Darwin whose parent module map includes lots of other module maps that + // describe unrelated top-level modules. + return ModuleMaps; } @@ -1631,6 +1657,7 @@ struct InputFileEntry { bool IsTransient; bool BufferOverridden; bool IsTopLevel; + bool IsTopLevelAmongAffecting; bool IsModuleMap; uint32_t ContentHash[2]; @@ -1639,6 +1666,18 @@ struct InputFileEntry { } // namespace +SourceLocation ASTWriter::getAffectingIncludeLoc(const SourceManager &SourceMgr, + const SrcMgr::FileInfo &File) { + SourceLocation IncludeLoc = File.getIncludeLoc(); + if (IncludeLoc.isValid()) { + FileID IncludeFID = SourceMgr.getFileID(IncludeLoc); + assert(IncludeFID.isValid() && "IncludeLoc in invalid file"); + if (!IsSLocAffecting[IncludeFID.ID]) + IncludeLoc = SourceLocation(); + } + return IncludeLoc; +} + void ASTWriter::WriteInputFiles(SourceManager &SourceMgr, HeaderSearchOptions &HSOpts) { using namespace llvm; @@ -1654,6 +1693,7 @@ void ASTWriter::WriteInputFiles(SourceManager &SourceMgr, IFAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // Overridden IFAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // Transient IFAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // Top-level + IFAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // Top-level affect IFAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // Module map IFAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 16)); // Name as req. len IFAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // Name as req. + name @@ -1693,6 +1733,8 @@ void ASTWriter::WriteInputFiles(SourceManager &SourceMgr, Entry.IsTransient = Cache->IsTransient; Entry.BufferOverridden = Cache->BufferOverridden; Entry.IsTopLevel = File.getIncludeLoc().isInvalid(); + Entry.IsTopLevelAmongAffecting = + getAffectingIncludeLoc(SourceMgr, File).isInvalid(); Entry.IsModuleMap = isModuleMap(File.getFileCharacteristic()); auto ContentHash = hash_code(-1); @@ -1758,6 +1800,7 @@ void ASTWriter::WriteInputFiles(SourceManager &SourceMgr, Entry.BufferOverridden, Entry.IsTransient, Entry.IsTopLevel, + Entry.IsTopLevelAmongAffecting, Entry.IsModuleMap, NameAsRequested.size()}; @@ -2219,7 +2262,7 @@ void ASTWriter::WriteSourceManagerBlock(SourceManager &SourceMgr, SLocEntryOffsets.push_back(Offset); // Starting offset of this entry within this module, so skip the dummy. Record.push_back(getAdjustedOffset(SLoc->getOffset()) - 2); - AddSourceLocation(File.getIncludeLoc(), Record); + AddSourceLocation(getAffectingIncludeLoc(SourceMgr, File), Record); Record.push_back(File.getFileCharacteristic()); // FIXME: stable encoding Record.push_back(File.hasLineDirectives()); diff --git a/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp b/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp index f46324ee9989eb..ae8cb664ca14b5 100644 --- a/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp +++ b/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp @@ -616,7 +616,7 @@ ModuleDepCollectorPP::handleTopLevelModule(const Module *M) { MDC.ScanInstance.getASTReader()->visitInputFileInfos( *MF, /*IncludeSystem=*/true, [&](const serialization::InputFileInfo &IFI, bool IsSystem) { - if (!(IFI.TopLevel && IFI.ModuleMap)) + if (!(IFI.TopLevelAmongAffecting && IFI.ModuleMap)) return; if (StringRef(IFI.FilenameAsRequested) .ends_with("__inferred_module.map")) diff --git a/clang/test/ClangScanDeps/modules-extern-unrelated.m b/clang/test/ClangScanDeps/modules-extern-unrelated.m index 76611c596d3eff..90c494991343e5 100644 --- a/clang/test/ClangScanDeps/modules-extern-unrelated.m +++ b/clang/test/ClangScanDeps/modules-extern-unrelated.m @@ -22,15 +22,8 @@ //--- second/second.h #include "first_other.h" -//--- cdb.json.template -[{ - "directory": "DIR", - "file": "DIR/tu.m", - "command": "clang -fmodules -fmodules-cache-path=DIR/cache -I DIR/zeroth -I DIR/first -I DIR/second -c DIR/tu.m -o DIR/tu.o" -}] - -// RUN: sed -e "s|DIR|%/t|g" -e "s|INPUTS|%/S/Inputs|g" %t/cdb.json.template > %t/cdb.json -// RUN: clang-scan-deps -compilation-database %t/cdb.json -format experimental-full > %t/result.json +// RUN: clang-scan-deps -format experimental-full -o %t/result.json \ +// RUN: -- %clang -fmodules -fmodules-cache-path=%t/cache -I %t/zeroth -I %t/first -I %t/second -c %t/tu.m -o %t/tu.o // RUN: cat %t/result.json | sed 's:\\\\\?:/:g' | FileCheck %s -DPREFIX=%/t // CHECK: { @@ -67,11 +60,11 @@ // CHECK-NEXT: ], // CHECK-NEXT: "clang-modulemap-file": "[[PREFIX]]/second/second.modulemap", // CHECK-NEXT: "command-line": [ +// CHECK-NOT: "-fmodule-map-file=[[PREFIX]]/second/module.modulemap" // CHECK: ], // CHECK-NEXT: "context-hash": "{{.*}}", // CHECK-NEXT: "file-deps": [ // CHECK-NEXT: "[[PREFIX]]/first/module.modulemap", -// CHECK-NEXT: "[[PREFIX]]/second/module.modulemap", // CHECK-NEXT: "[[PREFIX]]/second/second.h", // CHECK-NEXT: "[[PREFIX]]/second/second.modulemap" // CHECK-NEXT: ], @@ -90,11 +83,11 @@ // CHECK-NEXT: ], // CHECK-NEXT: "clang-modulemap-file": "[[PREFIX]]/zeroth/module.modulemap", // CHECK-NEXT: "command-line": [ +// CHECK-NOT: "-fmodule-map-file=[[PREFIX]]/second/module.modulemap" // CHECK: ], // CHECK-NEXT: "context-hash": "{{.*}}", // CHECK-NEXT: "file-deps": [ // CHECK-NEXT: "[[PREFIX]]/first/module.modulemap", -// CHECK-NEXT: "[[PREFIX]]/second/module.modulemap", // CHECK-NEXT: "[[PREFIX]]/second/second.modulemap", // CHECK-NEXT: "[[PREFIX]]/zeroth/module.modulemap", // CHECK-NEXT: "[[PREFIX]]/zeroth/zeroth.h" @@ -115,7 +108,7 @@ // CHECK-NEXT: ], // CHECK-NEXT: "command-line": [ // CHECK: ], -// CHECK-NEXT: "executable": "clang", +// CHECK-NEXT: "executable": "{{.*}}", // CHECK-NEXT: "file-deps": [ // CHECK-NEXT: "[[PREFIX]]/tu.m" // CHECK-NEXT: ], _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits