https://github.com/jansvoboda11 updated https://github.com/llvm/llvm-project/pull/89992
>From 8c2989ffe7b6e61b32facf831aa7eeabbf0001d2 Mon Sep 17 00:00:00 2001 From: Jan Svoboda <jan_svob...@apple.com> Date: Wed, 24 Apr 2024 11:12:28 -0700 Subject: [PATCH 1/2] [clang][modules] Allow including module maps to be non-affecting --- .../include/clang/Serialization/ASTBitCodes.h | 4 +- clang/include/clang/Serialization/ASTWriter.h | 9 ++ .../include/clang/Serialization/ModuleFile.h | 1 + clang/lib/Serialization/ASTReader.cpp | 5 +- clang/lib/Serialization/ASTWriter.cpp | 93 ++++++++++++------- .../DependencyScanning/ModuleDepCollector.cpp | 2 +- .../ClangScanDeps/modules-extern-unrelated.m | 20 ++-- 7 files changed, 86 insertions(+), 48 deletions(-) diff --git a/clang/include/clang/Serialization/ASTBitCodes.h b/clang/include/clang/Serialization/ASTBitCodes.h index a8df5a0bda0850..d1939bf65bfbb5 100644 --- a/clang/include/clang/Serialization/ASTBitCodes.h +++ b/clang/include/clang/Serialization/ASTBitCodes.h @@ -42,7 +42,7 @@ namespace serialization { /// Version 4 of AST files also requires that the version control branch and /// revision match exactly, since there is no backward compatibility of /// AST files at this time. -const unsigned VERSION_MAJOR = 30; +const unsigned VERSION_MAJOR = 31; /// AST file minor version number supported by this version of /// Clang. @@ -52,7 +52,7 @@ const unsigned VERSION_MAJOR = 30; /// for the previous version could still support reading the new /// version by ignoring new kinds of subblocks), this number /// should be increased. -const unsigned VERSION_MINOR = 1; +const unsigned VERSION_MINOR = 0; /// An ID number that refers to an identifier in an AST file. /// diff --git a/clang/include/clang/Serialization/ASTWriter.h b/clang/include/clang/Serialization/ASTWriter.h index 6c45b7348b8552..247e7943a45bc9 100644 --- a/clang/include/clang/Serialization/ASTWriter.h +++ b/clang/include/clang/Serialization/ASTWriter.h @@ -76,6 +76,10 @@ class StoredDeclsList; class SwitchCase; class Token; +namespace SrcMgr { +class FileInfo; +} // namespace SrcMgr + /// Writes an AST file containing the contents of a translation unit. /// /// The ASTWriter class produces a bitstream containing the serialized @@ -490,6 +494,11 @@ class ASTWriter : public ASTDeserializationListener, /// during \c SourceManager serialization. void computeNonAffectingInputFiles(); + /// Some affecting files can be included from files that are not affecting. + /// This function erases source locations pointing into such files. + SourceLocation getAffectingIncludeLoc(const SourceManager &SourceMgr, + const SrcMgr::FileInfo &File); + /// Returns an adjusted \c FileID, accounting for any non-affecting input /// files. FileID getAdjustedFileID(FileID FID) const; diff --git a/clang/include/clang/Serialization/ModuleFile.h b/clang/include/clang/Serialization/ModuleFile.h index 25f644e76edb1a..d24c293648c62c 100644 --- a/clang/include/clang/Serialization/ModuleFile.h +++ b/clang/include/clang/Serialization/ModuleFile.h @@ -69,6 +69,7 @@ struct InputFileInfo { bool Overridden; bool Transient; bool TopLevel; + bool TopLevelAmongAffecting; bool ModuleMap; }; diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp index 0ef57a3ea804ef..aafc695022aa8c 100644 --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -2444,9 +2444,10 @@ InputFileInfo ASTReader::getInputFileInfo(ModuleFile &F, unsigned ID) { R.Overridden = static_cast<bool>(Record[3]); R.Transient = static_cast<bool>(Record[4]); R.TopLevel = static_cast<bool>(Record[5]); - R.ModuleMap = static_cast<bool>(Record[6]); + R.TopLevelAmongAffecting = static_cast<bool>(Record[6]); + R.ModuleMap = static_cast<bool>(Record[7]); std::tie(R.FilenameAsRequested, R.Filename) = [&]() { - uint16_t AsRequestedLength = Record[7]; + uint16_t AsRequestedLength = Record[8]; std::string NameAsRequested = Blob.substr(0, AsRequestedLength).str(); std::string Name = Blob.substr(AsRequestedLength).str(); diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp index 0408eeb6a95b00..dcf093e5e659ce 100644 --- a/clang/lib/Serialization/ASTWriter.cpp +++ b/clang/lib/Serialization/ASTWriter.cpp @@ -173,54 +173,50 @@ GetAffectingModuleMaps(const Preprocessor &PP, Module *RootModule) { const HeaderSearch &HS = PP.getHeaderSearchInfo(); const ModuleMap &MM = HS.getModuleMap(); - const SourceManager &SourceMgr = PP.getSourceManager(); std::set<const FileEntry *> ModuleMaps; - auto CollectIncludingModuleMaps = [&](FileID FID, FileEntryRef F) { - if (!ModuleMaps.insert(F).second) + std::set<const Module *> ProcessedModules; + auto CollectModuleMapsForHierarchy = [&](const Module *M) { + M = M->getTopLevelModule(); + + if (!ProcessedModules.insert(M).second) return; - SourceLocation Loc = SourceMgr.getIncludeLoc(FID); - // The include location of inferred module maps can point into the header - // file that triggered the inferring. Cut off the walk if that's the case. - while (Loc.isValid() && isModuleMap(SourceMgr.getFileCharacteristic(Loc))) { - FID = SourceMgr.getFileID(Loc); - F = *SourceMgr.getFileEntryRefForID(FID); - if (!ModuleMaps.insert(F).second) - break; - Loc = SourceMgr.getIncludeLoc(FID); - } - }; - std::set<const Module *> ProcessedModules; - auto CollectIncludingMapsFromAncestors = [&](const Module *M) { - for (const Module *Mod = M; Mod; Mod = Mod->Parent) { - if (!ProcessedModules.insert(Mod).second) - break; + std::queue<const Module *> Q; + Q.push(M); + while (!Q.empty()) { + const Module *Mod = Q.front(); + Q.pop(); + // The containing module map is affecting, because it's being pointed // into by Module::DefinitionLoc. - if (FileID FID = MM.getContainingModuleMapFileID(Mod); FID.isValid()) - CollectIncludingModuleMaps(FID, *SourceMgr.getFileEntryRefForID(FID)); - // For inferred modules, the module map that allowed inferring is not in - // the include chain of the virtual containing module map file. It did - // affect the compilation, though. - if (FileID FID = MM.getModuleMapFileIDForUniquing(Mod); FID.isValid()) - CollectIncludingModuleMaps(FID, *SourceMgr.getFileEntryRefForID(FID)); + if (auto FE = MM.getContainingModuleMapFile(Mod)) + ModuleMaps.insert(*FE); + // For inferred modules, the module map that allowed inferring is not + // related to the virtual containing module map file. It did affect the + // compilation, though. + if (auto FE = MM.getModuleMapFileForUniquing(Mod)) + ModuleMaps.insert(*FE); + + for (auto *SubM : Mod->submodules()) + Q.push(SubM); } }; // Handle all the affecting modules referenced from the root module. + CollectModuleMapsForHierarchy(RootModule); + std::queue<const Module *> Q; Q.push(RootModule); while (!Q.empty()) { const Module *CurrentModule = Q.front(); Q.pop(); - CollectIncludingMapsFromAncestors(CurrentModule); for (const Module *ImportedModule : CurrentModule->Imports) - CollectIncludingMapsFromAncestors(ImportedModule); + CollectModuleMapsForHierarchy(ImportedModule); for (const Module *UndeclaredModule : CurrentModule->UndeclaredUses) - CollectIncludingMapsFromAncestors(UndeclaredModule); + CollectModuleMapsForHierarchy(UndeclaredModule); for (auto *M : CurrentModule->submodules()) Q.push(M); @@ -249,9 +245,27 @@ GetAffectingModuleMaps(const Preprocessor &PP, Module *RootModule) { for (const auto &KH : HS.findResolvedModulesForHeader(*File)) if (const Module *M = KH.getModule()) - CollectIncludingMapsFromAncestors(M); + CollectModuleMapsForHierarchy(M); } + // FIXME: This algorithm is not correct for module map hierarchies where + // module map file defining a (sub)module of a top-level module X includes + // a module map file that defines a (sub)module of another top-level module Y. + // Whenever X is affecting and Y is not, "replaying" this PCM file will fail + // when parsing module map files for X due to not knowing about the `extern` + // module map for Y. + // + // We don't have a good way to fix it here. We could mark all children of + // affecting module map files as being affecting as well, but that's + // expensive. SourceManager does not model the edge from parent to child + // SLocEntries, so instead, we would need to iterate over leaf module map + // files, walk up their include hierarchy and check whether we arrive at an + // affecting module map. + // + // Instead of complicating and slowing down this function, we should probably + // just ban module map hierarchies where module map defining a (sub)module X + // includes a module map defining a module that's not a submodule of X. + return ModuleMaps; } @@ -1631,6 +1645,7 @@ struct InputFileEntry { bool IsTransient; bool BufferOverridden; bool IsTopLevel; + bool IsTopLevelAmongAffecting; bool IsModuleMap; uint32_t ContentHash[2]; @@ -1639,6 +1654,18 @@ struct InputFileEntry { } // namespace +SourceLocation ASTWriter::getAffectingIncludeLoc(const SourceManager &SourceMgr, + const SrcMgr::FileInfo &File) { + SourceLocation IncludeLoc = File.getIncludeLoc(); + if (IncludeLoc.isValid()) { + FileID IncludeFID = SourceMgr.getFileID(IncludeLoc); + assert(IncludeFID.isValid() && "IncludeLoc in invalid file"); + if (!IsSLocAffecting[IncludeFID.ID]) + IncludeLoc = SourceLocation(); + } + return IncludeLoc; +} + void ASTWriter::WriteInputFiles(SourceManager &SourceMgr, HeaderSearchOptions &HSOpts) { using namespace llvm; @@ -1654,6 +1681,7 @@ void ASTWriter::WriteInputFiles(SourceManager &SourceMgr, IFAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // Overridden IFAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // Transient IFAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // Top-level + IFAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // Top-level affect IFAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // Module map IFAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 16)); // Name as req. len IFAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // Name as req. + name @@ -1693,6 +1721,8 @@ void ASTWriter::WriteInputFiles(SourceManager &SourceMgr, Entry.IsTransient = Cache->IsTransient; Entry.BufferOverridden = Cache->BufferOverridden; Entry.IsTopLevel = File.getIncludeLoc().isInvalid(); + Entry.IsTopLevelAmongAffecting = + getAffectingIncludeLoc(SourceMgr, File).isInvalid(); Entry.IsModuleMap = isModuleMap(File.getFileCharacteristic()); auto ContentHash = hash_code(-1); @@ -1758,6 +1788,7 @@ void ASTWriter::WriteInputFiles(SourceManager &SourceMgr, Entry.BufferOverridden, Entry.IsTransient, Entry.IsTopLevel, + Entry.IsTopLevelAmongAffecting, Entry.IsModuleMap, NameAsRequested.size()}; @@ -2219,7 +2250,7 @@ void ASTWriter::WriteSourceManagerBlock(SourceManager &SourceMgr, SLocEntryOffsets.push_back(Offset); // Starting offset of this entry within this module, so skip the dummy. Record.push_back(getAdjustedOffset(SLoc->getOffset()) - 2); - AddSourceLocation(File.getIncludeLoc(), Record); + AddSourceLocation(getAffectingIncludeLoc(SourceMgr, File), Record); Record.push_back(File.getFileCharacteristic()); // FIXME: stable encoding Record.push_back(File.hasLineDirectives()); diff --git a/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp b/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp index f46324ee9989eb..ae8cb664ca14b5 100644 --- a/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp +++ b/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp @@ -616,7 +616,7 @@ ModuleDepCollectorPP::handleTopLevelModule(const Module *M) { MDC.ScanInstance.getASTReader()->visitInputFileInfos( *MF, /*IncludeSystem=*/true, [&](const serialization::InputFileInfo &IFI, bool IsSystem) { - if (!(IFI.TopLevel && IFI.ModuleMap)) + if (!(IFI.TopLevelAmongAffecting && IFI.ModuleMap)) return; if (StringRef(IFI.FilenameAsRequested) .ends_with("__inferred_module.map")) diff --git a/clang/test/ClangScanDeps/modules-extern-unrelated.m b/clang/test/ClangScanDeps/modules-extern-unrelated.m index 76611c596d3eff..23ddbb0a681203 100644 --- a/clang/test/ClangScanDeps/modules-extern-unrelated.m +++ b/clang/test/ClangScanDeps/modules-extern-unrelated.m @@ -1,3 +1,6 @@ +// This test checks that parent module maps that do not define any related +// modules are not affecting. + // RUN: rm -rf %t // RUN: split-file %s %t @@ -22,15 +25,8 @@ //--- second/second.h #include "first_other.h" -//--- cdb.json.template -[{ - "directory": "DIR", - "file": "DIR/tu.m", - "command": "clang -fmodules -fmodules-cache-path=DIR/cache -I DIR/zeroth -I DIR/first -I DIR/second -c DIR/tu.m -o DIR/tu.o" -}] - -// RUN: sed -e "s|DIR|%/t|g" -e "s|INPUTS|%/S/Inputs|g" %t/cdb.json.template > %t/cdb.json -// RUN: clang-scan-deps -compilation-database %t/cdb.json -format experimental-full > %t/result.json +// RUN: clang-scan-deps -format experimental-full -o %t/result.json \ +// RUN: -- %clang -fmodules -fmodules-cache-path=%t/cache -I %t/zeroth -I %t/first -I %t/second -c %t/tu.m -o %t/tu.o // RUN: cat %t/result.json | sed 's:\\\\\?:/:g' | FileCheck %s -DPREFIX=%/t // CHECK: { @@ -67,11 +63,11 @@ // CHECK-NEXT: ], // CHECK-NEXT: "clang-modulemap-file": "[[PREFIX]]/second/second.modulemap", // CHECK-NEXT: "command-line": [ +// CHECK-NOT: "-fmodule-map-file=[[PREFIX]]/second/module.modulemap" // CHECK: ], // CHECK-NEXT: "context-hash": "{{.*}}", // CHECK-NEXT: "file-deps": [ // CHECK-NEXT: "[[PREFIX]]/first/module.modulemap", -// CHECK-NEXT: "[[PREFIX]]/second/module.modulemap", // CHECK-NEXT: "[[PREFIX]]/second/second.h", // CHECK-NEXT: "[[PREFIX]]/second/second.modulemap" // CHECK-NEXT: ], @@ -90,11 +86,11 @@ // CHECK-NEXT: ], // CHECK-NEXT: "clang-modulemap-file": "[[PREFIX]]/zeroth/module.modulemap", // CHECK-NEXT: "command-line": [ +// CHECK-NOT: "-fmodule-map-file=[[PREFIX]]/second/module.modulemap" // CHECK: ], // CHECK-NEXT: "context-hash": "{{.*}}", // CHECK-NEXT: "file-deps": [ // CHECK-NEXT: "[[PREFIX]]/first/module.modulemap", -// CHECK-NEXT: "[[PREFIX]]/second/module.modulemap", // CHECK-NEXT: "[[PREFIX]]/second/second.modulemap", // CHECK-NEXT: "[[PREFIX]]/zeroth/module.modulemap", // CHECK-NEXT: "[[PREFIX]]/zeroth/zeroth.h" @@ -115,7 +111,7 @@ // CHECK-NEXT: ], // CHECK-NEXT: "command-line": [ // CHECK: ], -// CHECK-NEXT: "executable": "clang", +// CHECK-NEXT: "executable": "{{.*}}", // CHECK-NEXT: "file-deps": [ // CHECK-NEXT: "[[PREFIX]]/tu.m" // CHECK-NEXT: ], >From 6d435c0c95e7c78f081c6833bf9bf6a9d9f881eb Mon Sep 17 00:00:00 2001 From: Jan Svoboda <jan_svob...@apple.com> Date: Fri, 26 Apr 2024 12:01:02 -0700 Subject: [PATCH 2/2] Squash the new logic into the existing bit in PCM --- clang/include/clang/Serialization/ASTBitCodes.h | 4 ++-- clang/include/clang/Serialization/ModuleFile.h | 1 - clang/lib/Serialization/ASTReader.cpp | 5 ++--- clang/lib/Serialization/ASTWriter.cpp | 6 +----- clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp | 2 +- 5 files changed, 6 insertions(+), 12 deletions(-) diff --git a/clang/include/clang/Serialization/ASTBitCodes.h b/clang/include/clang/Serialization/ASTBitCodes.h index d1939bf65bfbb5..a8df5a0bda0850 100644 --- a/clang/include/clang/Serialization/ASTBitCodes.h +++ b/clang/include/clang/Serialization/ASTBitCodes.h @@ -42,7 +42,7 @@ namespace serialization { /// Version 4 of AST files also requires that the version control branch and /// revision match exactly, since there is no backward compatibility of /// AST files at this time. -const unsigned VERSION_MAJOR = 31; +const unsigned VERSION_MAJOR = 30; /// AST file minor version number supported by this version of /// Clang. @@ -52,7 +52,7 @@ const unsigned VERSION_MAJOR = 31; /// for the previous version could still support reading the new /// version by ignoring new kinds of subblocks), this number /// should be increased. -const unsigned VERSION_MINOR = 0; +const unsigned VERSION_MINOR = 1; /// An ID number that refers to an identifier in an AST file. /// diff --git a/clang/include/clang/Serialization/ModuleFile.h b/clang/include/clang/Serialization/ModuleFile.h index d24c293648c62c..25f644e76edb1a 100644 --- a/clang/include/clang/Serialization/ModuleFile.h +++ b/clang/include/clang/Serialization/ModuleFile.h @@ -69,7 +69,6 @@ struct InputFileInfo { bool Overridden; bool Transient; bool TopLevel; - bool TopLevelAmongAffecting; bool ModuleMap; }; diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp index aafc695022aa8c..0ef57a3ea804ef 100644 --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -2444,10 +2444,9 @@ InputFileInfo ASTReader::getInputFileInfo(ModuleFile &F, unsigned ID) { R.Overridden = static_cast<bool>(Record[3]); R.Transient = static_cast<bool>(Record[4]); R.TopLevel = static_cast<bool>(Record[5]); - R.TopLevelAmongAffecting = static_cast<bool>(Record[6]); - R.ModuleMap = static_cast<bool>(Record[7]); + R.ModuleMap = static_cast<bool>(Record[6]); std::tie(R.FilenameAsRequested, R.Filename) = [&]() { - uint16_t AsRequestedLength = Record[8]; + uint16_t AsRequestedLength = Record[7]; std::string NameAsRequested = Blob.substr(0, AsRequestedLength).str(); std::string Name = Blob.substr(AsRequestedLength).str(); diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp index dcf093e5e659ce..0dae3df1d5978a 100644 --- a/clang/lib/Serialization/ASTWriter.cpp +++ b/clang/lib/Serialization/ASTWriter.cpp @@ -1681,7 +1681,6 @@ void ASTWriter::WriteInputFiles(SourceManager &SourceMgr, IFAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // Overridden IFAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // Transient IFAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // Top-level - IFAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // Top-level affect IFAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // Module map IFAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 16)); // Name as req. len IFAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // Name as req. + name @@ -1720,9 +1719,7 @@ void ASTWriter::WriteInputFiles(SourceManager &SourceMgr, Entry.IsSystemFile = isSystem(File.getFileCharacteristic()); Entry.IsTransient = Cache->IsTransient; Entry.BufferOverridden = Cache->BufferOverridden; - Entry.IsTopLevel = File.getIncludeLoc().isInvalid(); - Entry.IsTopLevelAmongAffecting = - getAffectingIncludeLoc(SourceMgr, File).isInvalid(); + Entry.IsTopLevel = getAffectingIncludeLoc(SourceMgr, File).isInvalid(); Entry.IsModuleMap = isModuleMap(File.getFileCharacteristic()); auto ContentHash = hash_code(-1); @@ -1788,7 +1785,6 @@ void ASTWriter::WriteInputFiles(SourceManager &SourceMgr, Entry.BufferOverridden, Entry.IsTransient, Entry.IsTopLevel, - Entry.IsTopLevelAmongAffecting, Entry.IsModuleMap, NameAsRequested.size()}; diff --git a/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp b/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp index ae8cb664ca14b5..f46324ee9989eb 100644 --- a/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp +++ b/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp @@ -616,7 +616,7 @@ ModuleDepCollectorPP::handleTopLevelModule(const Module *M) { MDC.ScanInstance.getASTReader()->visitInputFileInfos( *MF, /*IncludeSystem=*/true, [&](const serialization::InputFileInfo &IFI, bool IsSystem) { - if (!(IFI.TopLevelAmongAffecting && IFI.ModuleMap)) + if (!(IFI.TopLevel && IFI.ModuleMap)) return; if (StringRef(IFI.FilenameAsRequested) .ends_with("__inferred_module.map")) _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits