kadircet created this revision. kadircet added a reviewer: ilya-biryukov. Herald added subscribers: cfe-commits, jdoerfert, arphaman, mgrang, jkorous, MaskRay, ioeric. Herald added a project: clang.
For counting number of references clangd was relying on merging every duplication of a symbol. Unfortunately this does not apply to FileIndex(and one of its users' BackgroundIndex), since we get rid of duplication by simply dropping symbols coming from non-canonical locations. So only one or two(coming from canonical declaration header and defined source file, if exists) replications of the same symbol reaches merging step. This patch changes reference counting logic to rather count number of different RefSlabs a given SymbolID exists. Repository: rG LLVM Github Monorepo https://reviews.llvm.org/D59481 Files: clang-tools-extra/clangd/index/FileIndex.cpp clang-tools-extra/unittests/clangd/BackgroundIndexTests.cpp
Index: clang-tools-extra/unittests/clangd/BackgroundIndexTests.cpp =================================================================== --- clang-tools-extra/unittests/clangd/BackgroundIndexTests.cpp +++ clang-tools-extra/unittests/clangd/BackgroundIndexTests.cpp @@ -32,6 +32,7 @@ return !arg.IsTU && !arg.URI.empty() && arg.Digest == FileDigest{{0}} && arg.DirectIncludes.empty(); } +MATCHER_P(NumReferences, N, "") { return arg.References == N; } class MemoryShardStorage : public BackgroundIndexStorage { mutable std::mutex StorageMu; @@ -127,20 +128,25 @@ CDB.setCompileCommand(testPath("root/A.cc"), Cmd); ASSERT_TRUE(Idx.blockUntilIdleForTest()); - EXPECT_THAT( - runFuzzyFind(Idx, ""), - UnorderedElementsAre(Named("common"), Named("A_CC"), Named("g"), - AllOf(Named("f_b"), Declared(), Not(Defined())))); + EXPECT_THAT(runFuzzyFind(Idx, ""), + UnorderedElementsAre(AllOf(Named("common"), NumReferences(2U)), + AllOf(Named("A_CC"), NumReferences(1U)), + AllOf(Named("g"), NumReferences(0U)), + AllOf(Named("f_b"), Declared(), + Not(Defined()), NumReferences(1U)))); Cmd.Filename = testPath("root/B.cc"); Cmd.CommandLine = {"clang++", Cmd.Filename}; - CDB.setCompileCommand(testPath("root/A.cc"), Cmd); + CDB.setCompileCommand(testPath("root/B.cc"), Cmd); ASSERT_TRUE(Idx.blockUntilIdleForTest()); // B_CC is dropped as we don't collect symbols from A.h in this compilation. EXPECT_THAT(runFuzzyFind(Idx, ""), - UnorderedElementsAre(Named("common"), Named("A_CC"), Named("g"), - AllOf(Named("f_b"), Declared(), Defined()))); + UnorderedElementsAre(AllOf(Named("common"), NumReferences(3U)), + AllOf(Named("A_CC"), NumReferences(1U)), + AllOf(Named("g"), NumReferences(0U)), + AllOf(Named("f_b"), Declared(), Defined(), + NumReferences(2U)))); auto Syms = runFuzzyFind(Idx, "common"); EXPECT_THAT(Syms, UnorderedElementsAre(Named("common"))); Index: clang-tools-extra/clangd/index/FileIndex.cpp =================================================================== --- clang-tools-extra/clangd/index/FileIndex.cpp +++ clang-tools-extra/clangd/index/FileIndex.cpp @@ -27,6 +27,47 @@ namespace clang { namespace clangd { +namespace { +// Puts all refs for the same symbol into RefsStorage in a contigious way. +// If MergedSyms is provided, increments a symbols Reference count once for +// every file it was mentioned in. +void mergeRefs(llvm::ArrayRef<std::shared_ptr<RefSlab>> RefSlabs, + std::vector<Ref> &RefsStorage, + llvm::DenseMap<SymbolID, llvm::ArrayRef<Ref>> &AllRefs, + llvm::DenseMap<SymbolID, Symbol> *MergedSyms = nullptr) { + llvm::DenseMap<SymbolID, llvm::SmallVector<Ref, 4>> MergedRefs; + size_t Count = 0; + for (const auto &RefSlab : RefSlabs) { + for (const auto &Sym : *RefSlab) { + MergedRefs[Sym.first].append(Sym.second.begin(), Sym.second.end()); + Count += Sym.second.size(); + if (MergedSyms) { + auto It = MergedSyms->find(Sym.first); + assert(It != MergedSyms->end() && "Reference to unknown symbol!"); + // Note that we increment References per each file mentioning the + // symbol, including headers. This only happens once for each file since + // FileIndex keeps only oneslab per file. + // This contradicts with behavior inside clangd-indexer, it only counts + // translation units since it processes each header multiple times. + It->getSecond().References++; + } + } + } + RefsStorage.reserve(Count); + AllRefs.reserve(MergedRefs.size()); + for (auto &Sym : MergedRefs) { + auto &SymRefs = Sym.second; + // Sorting isn't required, but yields more stable results over rebuilds. + llvm::sort(SymRefs); + llvm::copy(SymRefs, back_inserter(RefsStorage)); + AllRefs.try_emplace( + Sym.first, + llvm::ArrayRef<Ref>(&RefsStorage[RefsStorage.size() - SymRefs.size()], + SymRefs.size())); + } +} + +} // namespace static std::pair<SymbolSlab, RefSlab> indexSymbols(ASTContext &AST, std::shared_ptr<Preprocessor> PP, @@ -115,6 +156,9 @@ } std::vector<const Symbol *> AllSymbols; std::vector<Symbol> SymsStorage; + std::vector<Ref> RefsStorage; // Contiguous ranges for each SymbolID. + llvm::DenseMap<SymbolID, llvm::ArrayRef<Ref>> AllRefs; + switch (DuplicateHandle) { case DuplicateHandling::Merge: { llvm::DenseMap<SymbolID, Symbol> Merged; @@ -123,8 +167,11 @@ auto I = Merged.try_emplace(Sym.ID, Sym); if (!I.second) I.first->second = mergeSymbol(I.first->second, Sym); + // We re-count number of references while merging refs from scratch. + I.first->getSecond().References = 0; } } + mergeRefs(RefSlabs, RefsStorage, AllRefs, &Merged); SymsStorage.reserve(Merged.size()); for (auto &Sym : Merged) { SymsStorage.push_back(std::move(Sym.second)); @@ -139,34 +186,11 @@ for (const auto &Sym : *Slab) if (AddedSymbols.insert(Sym.ID).second) AllSymbols.push_back(&Sym); + mergeRefs(RefSlabs, RefsStorage, AllRefs); break; } } - std::vector<Ref> RefsStorage; // Contiguous ranges for each SymbolID. - llvm::DenseMap<SymbolID, llvm::ArrayRef<Ref>> AllRefs; - { - llvm::DenseMap<SymbolID, llvm::SmallVector<Ref, 4>> MergedRefs; - size_t Count = 0; - for (const auto &RefSlab : RefSlabs) - for (const auto &Sym : *RefSlab) { - MergedRefs[Sym.first].append(Sym.second.begin(), Sym.second.end()); - Count += Sym.second.size(); - } - RefsStorage.reserve(Count); - AllRefs.reserve(MergedRefs.size()); - for (auto &Sym : MergedRefs) { - auto &SymRefs = Sym.second; - // Sorting isn't required, but yields more stable results over rebuilds. - llvm::sort(SymRefs); - llvm::copy(SymRefs, back_inserter(RefsStorage)); - AllRefs.try_emplace( - Sym.first, - llvm::ArrayRef<Ref>(&RefsStorage[RefsStorage.size() - SymRefs.size()], - SymRefs.size())); - } - } - size_t StorageSize = RefsStorage.size() * sizeof(Ref) + SymsStorage.size() * sizeof(Symbol); for (const auto &Slab : SymbolSlabs)
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits