Author: Jan Svoboda Date: 2023-04-21T10:56:42-07:00 New Revision: 6b80f00bac6a4832af4f8ebde96564690a35f7e1
URL: https://github.com/llvm/llvm-project/commit/6b80f00bac6a4832af4f8ebde96564690a35f7e1 DIFF: https://github.com/llvm/llvm-project/commit/6b80f00bac6a4832af4f8ebde96564690a35f7e1.diff LOG: [clang][deps] NFC: Refactor and comment ModuleDeps sorting I once again stumbled across `IndexedModuleID` and was confused by the mutable `InputIndex` field. This patch adds comments around that piece of code and unifies/simplifies related functions. Reviewed By: Bigcheese Differential Revision: https://reviews.llvm.org/D145197 Added: Modified: clang/include/clang/Tooling/DependencyScanning/ModuleDepCollector.h clang/tools/clang-scan-deps/ClangScanDeps.cpp Removed: ################################################################################ diff --git a/clang/include/clang/Tooling/DependencyScanning/ModuleDepCollector.h b/clang/include/clang/Tooling/DependencyScanning/ModuleDepCollector.h index 24d7337d26e4c..0a5cbc4f046aa 100644 --- a/clang/include/clang/Tooling/DependencyScanning/ModuleDepCollector.h +++ b/clang/include/clang/Tooling/DependencyScanning/ModuleDepCollector.h @@ -59,7 +59,13 @@ struct ModuleID { std::string ContextHash; bool operator==(const ModuleID &Other) const { - return ModuleName == Other.ModuleName && ContextHash == Other.ContextHash; + return std::tie(ModuleName, ContextHash) == + std::tie(Other.ModuleName, Other.ContextHash); + } + + bool operator<(const ModuleID& Other) const { + return std::tie(ModuleName, ContextHash) < + std::tie(Other.ModuleName, Other.ContextHash); } }; diff --git a/clang/tools/clang-scan-deps/ClangScanDeps.cpp b/clang/tools/clang-scan-deps/ClangScanDeps.cpp index 93c60eb2cdf59..9018acc8e682f 100644 --- a/clang/tools/clang-scan-deps/ClangScanDeps.cpp +++ b/clang/tools/clang-scan-deps/ClangScanDeps.cpp @@ -323,11 +323,10 @@ static llvm::json::Array toJSONSorted(const llvm::StringSet<> &Set) { return llvm::json::Array(Strings); } +// Technically, we don't need to sort the dependency list to get determinism. +// Leaving these be will simply preserve the import order. static llvm::json::Array toJSONSorted(std::vector<ModuleID> V) { - llvm::sort(V, [](const ModuleID &A, const ModuleID &B) { - return std::tie(A.ModuleName, A.ContextHash) < - std::tie(B.ModuleName, B.ContextHash); - }); + llvm::sort(V); llvm::json::Array Ret; for (const ModuleID &MID : V) @@ -406,11 +405,7 @@ class FullDeps { std::vector<IndexedModuleID> ModuleIDs; for (auto &&M : Modules) ModuleIDs.push_back(M.first); - llvm::sort(ModuleIDs, - [](const IndexedModuleID &A, const IndexedModuleID &B) { - return std::tie(A.ID.ModuleName, A.InputIndex) < - std::tie(B.ID.ModuleName, B.InputIndex); - }); + llvm::sort(ModuleIDs); using namespace llvm::json; @@ -470,20 +465,37 @@ class FullDeps { private: struct IndexedModuleID { ModuleID ID; + + // FIXME: This is mutable so that it can still be updated after insertion + // into an unordered associative container. This is "fine", since this + // field doesn't contribute to the hash, but it's a brittle hack. mutable size_t InputIndex; bool operator==(const IndexedModuleID &Other) const { - return ID.ModuleName == Other.ID.ModuleName && - ID.ContextHash == Other.ID.ContextHash; + return std::tie(ID.ModuleName, ID.ContextHash) == + std::tie(Other.ID.ModuleName, Other.ID.ContextHash); } - }; - - struct IndexedModuleIDHasher { - std::size_t operator()(const IndexedModuleID &IMID) const { - using llvm::hash_combine; - return hash_combine(IMID.ID.ModuleName, IMID.ID.ContextHash); + bool operator<(const IndexedModuleID &Other) const { + /// We need the output of clang-scan-deps to be deterministic. However, + /// the dependency graph may contain two modules with the same name. How + /// do we decide which one to print first? If we made that decision based + /// on the context hash, the ordering would be deterministic, but + /// diff erent across machines. This can happen for example when the inputs + /// or the SDKs (which both contribute to the "context" hash) live in + /// diff erent absolute locations. We solve that by tracking the index of + /// the first input TU that (transitively) imports the dependency, which + /// is always the same for the same input, resulting in deterministic + /// sorting that's also reproducible across machines. + return std::tie(ID.ModuleName, InputIndex) < + std::tie(Other.ID.ModuleName, Other.InputIndex); } + + struct Hasher { + std::size_t operator()(const IndexedModuleID &IMID) const { + return llvm::hash_combine(IMID.ID.ModuleName, IMID.ID.ContextHash); + } + }; }; struct InputDeps { @@ -496,7 +508,7 @@ class FullDeps { }; std::mutex Lock; - std::unordered_map<IndexedModuleID, ModuleDeps, IndexedModuleIDHasher> + std::unordered_map<IndexedModuleID, ModuleDeps, IndexedModuleID::Hasher> Modules; std::vector<InputDeps> Inputs; }; _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits