llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-mc @llvm/pr-subscribers-bolt Author: Amir Ayupov (aaupov) <details> <summary>Changes</summary> Use #<!-- -->102774 to allocate storage for decoded probes (`PseudoProbeVec`) and function records (`InlineTreeVec`). Additionally, probes and inlined function records are allocated contiguously, which allows to keep track of them using `ArrayRef`s. This in turn reduces the size of `MCDecodedPseudoProbeInlineTree`. --- Full diff: https://github.com/llvm/llvm-project/pull/102789.diff 4 Files Affected: - (modified) bolt/lib/Rewrite/PseudoProbeRewriter.cpp (+16-7) - (modified) llvm/include/llvm/MC/MCPseudoProbe.h (+59-12) - (modified) llvm/lib/MC/MCPseudoProbe.cpp (+30-15) - (modified) llvm/tools/llvm-profgen/ProfiledBinary.cpp (+5-5) ``````````diff diff --git a/bolt/lib/Rewrite/PseudoProbeRewriter.cpp b/bolt/lib/Rewrite/PseudoProbeRewriter.cpp index 37a5b937ebcaa..e5ced0f75d187 100644 --- a/bolt/lib/Rewrite/PseudoProbeRewriter.cpp +++ b/bolt/lib/Rewrite/PseudoProbeRewriter.cpp @@ -200,7 +200,9 @@ void PseudoProbeRewriter::updatePseudoProbes() { } unsigned ProbeTrack = AP.second.size(); - std::list<MCDecodedPseudoProbe>::iterator Probe = AP.second.begin(); + auto Probe = llvm::map_iterator( + AP.second.begin(), + [](auto RW) -> MCDecodedPseudoProbe & { return RW.get(); }); while (ProbeTrack != 0) { if (Probe->isBlock()) { Probe->setAddress(BlkOutputAddress); @@ -218,9 +220,7 @@ void PseudoProbeRewriter::updatePseudoProbes() { } while (CallOutputAddress != CallOutputAddresses.second) { - AP.second.push_back(*Probe); - AP.second.back().setAddress(CallOutputAddress->second); - Probe->getInlineTreeNode()->addProbes(&(AP.second.back())); + ProbeDecoder.addInjectedProbe(*Probe, CallOutputAddress->second); CallOutputAddress = std::next(CallOutputAddress); } } @@ -332,7 +332,7 @@ void PseudoProbeRewriter::encodePseudoProbes() { ProbeDecoder.getDummyInlineRoot(); for (auto Child = Root.getChildren().begin(); Child != Root.getChildren().end(); ++Child) - Inlinees[Child->first] = Child->second.get(); + Inlinees[Child->getInlineSite()] = &*Child; for (auto Inlinee : Inlinees) // INT64_MAX is "placeholder" of unused callsite index field in the pair @@ -362,7 +362,8 @@ void PseudoProbeRewriter::encodePseudoProbes() { if (Probe->getAddress() == INT64_MAX) Deleted++; LLVM_DEBUG(dbgs() << "Deleted Probes:" << Deleted << "\n"); - uint64_t ProbesSize = Cur->getProbes().size() - Deleted; + size_t InjectedProbes = ProbeDecoder.getNumInjectedProbes(Cur); + uint64_t ProbesSize = Cur->Probes.size() + InjectedProbes - Deleted; EmitULEB128IntValue(ProbesSize); // Emit number of direct inlinees EmitULEB128IntValue(Cur->getChildren().size()); @@ -373,10 +374,18 @@ void PseudoProbeRewriter::encodePseudoProbes() { EmitDecodedPseudoProbe(Probe); LastProbe = Probe; } + if (InjectedProbes) { + for (MCDecodedPseudoProbe *&Probe : ProbeDecoder.getInjectedProbes(Cur)) { + if (Probe->getAddress() == INT64_MAX) + continue; + EmitDecodedPseudoProbe(Probe); + LastProbe = Probe; + } + } for (auto Child = Cur->getChildren().begin(); Child != Cur->getChildren().end(); ++Child) - Inlinees[Child->first] = Child->second.get(); + Inlinees[Child->getInlineSite()] = &*Child; for (const auto &Inlinee : Inlinees) { assert(Cur->Guid != 0 && "non root tree node must have nonzero Guid"); NextNodes.push_back({std::get<1>(Inlinee.first), Inlinee.second}); diff --git a/llvm/include/llvm/MC/MCPseudoProbe.h b/llvm/include/llvm/MC/MCPseudoProbe.h index 5c536a8ec0b55..5dba797be5c30 100644 --- a/llvm/include/llvm/MC/MCPseudoProbe.h +++ b/llvm/include/llvm/MC/MCPseudoProbe.h @@ -54,20 +54,21 @@ #ifndef LLVM_MC_MCPSEUDOPROBE_H #define LLVM_MC_MCPSEUDOPROBE_H +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/iterator.h" #include "llvm/IR/PseudoProbe.h" #include "llvm/Support/ErrorOr.h" -#include <list> +#include <functional> #include <map> #include <memory> #include <string> #include <tuple> #include <type_traits> #include <unordered_map> -#include <unordered_set> #include <vector> namespace llvm { @@ -103,7 +104,9 @@ using MCPseudoProbeInlineStack = SmallVector<InlineSite, 8>; using GUIDProbeFunctionMap = std::unordered_map<uint64_t, MCPseudoProbeFuncDesc>; // Address to pseudo probes map. -using AddressProbesMap = std::map<uint64_t, std::list<MCDecodedPseudoProbe>>; +using AddressProbesMap = + std::map<uint64_t, + std::vector<std::reference_wrapper<MCDecodedPseudoProbe>>>; class MCDecodedPseudoProbeInlineTree; @@ -276,17 +279,31 @@ class MCPseudoProbeInlineTree }; // inline tree node for the decoded pseudo probe -class MCDecodedPseudoProbeInlineTree - : public MCPseudoProbeInlineTreeBase<MCDecodedPseudoProbe *, - MCDecodedPseudoProbeInlineTree> { +class MCDecodedPseudoProbeInlineTree { public: - InlineSite ISite; + uint64_t Guid = 0; + uint32_t ProbeId = 0; + + // Caller node of the inline site + MCDecodedPseudoProbeInlineTree *Parent = nullptr; MCDecodedPseudoProbeInlineTree() = default; - MCDecodedPseudoProbeInlineTree(const InlineSite &Site) : ISite(Site){}; + MCDecodedPseudoProbeInlineTree(uint64_t Guid, uint32_t ProbeId, + MCDecodedPseudoProbeInlineTree *Parent) + : Guid(Guid), ProbeId(ProbeId), Parent(Parent) {} + + // Track children (e.g. inlinees) of current context + MutableArrayRef<MCDecodedPseudoProbeInlineTree> Children; + // Set of probes that come with the function. + MutableArrayRef<MCDecodedPseudoProbe> Probes; + // Root node has a GUID 0. + bool isRoot() const { return Guid == 0; } // Return false if it's a dummy inline site bool hasInlineSite() const { return !isRoot() && !Parent->isRoot(); } + InlineSite getInlineSite() const { return InlineSite(Guid, ProbeId); } + auto getChildren() const { return Children; } + auto getProbes() const { return llvm::make_pointer_range(Probes); } }; /// Instances of this class represent the pseudo probes inserted into a compile @@ -336,6 +353,15 @@ class MCPseudoProbeTable { }; class MCPseudoProbeDecoder { + // Decoded pseudo probes vector. + std::vector<MCDecodedPseudoProbe> PseudoProbeVec; + // Injected pseudo probes, identified by the containing inline tree node. + std::unordered_map<const MCDecodedPseudoProbeInlineTree *, + std::vector<MCDecodedPseudoProbe>> + InjectedProbeMap; + // Decoded inline records vector. + std::vector<MCDecodedPseudoProbeInlineTree> InlineTreeVec; + // GUID to PseudoProbeFuncDesc map. GUIDProbeFunctionMap GUID2FuncDescMap; @@ -381,10 +407,6 @@ class MCPseudoProbeDecoder { const Uint64Set &GuildFilter, const Uint64Map &FuncStartAddrs); - bool buildAddress2ProbeMap(MCDecodedPseudoProbeInlineTree *Cur, - uint64_t &LastAddr, const Uint64Set &GuildFilter, - const Uint64Map &FuncStartAddrs); - // Print pseudo_probe_desc section info void printGUID2FuncDescMap(raw_ostream &OS); @@ -427,6 +449,31 @@ class MCPseudoProbeDecoder { const MCDecodedPseudoProbeInlineTree &getDummyInlineRoot() const { return DummyInlineRoot; } + + void addInjectedProbe(const MCDecodedPseudoProbe &Probe, uint64_t Address) { + const MCDecodedPseudoProbeInlineTree *Parent = Probe.getInlineTreeNode(); + InjectedProbeMap[Parent].emplace_back(Probe).setAddress(Address); + } + + size_t + getNumInjectedProbes(const MCDecodedPseudoProbeInlineTree *Parent) const { + auto It = InjectedProbeMap.find(Parent); + if (It == InjectedProbeMap.end()) + return 0; + return It->second.size(); + } + + auto getInjectedProbes(MCDecodedPseudoProbeInlineTree *Parent) { + auto It = InjectedProbeMap.find(Parent); + assert(It != InjectedProbeMap.end()); + return llvm::make_pointer_range(iterator_range(It->second)); + } + +private: + void buildAddress2ProbeMap(MCDecodedPseudoProbeInlineTree *Cur, + uint64_t &LastAddr, const Uint64Set &GuildFilter, + const Uint64Map &FuncStartAddrs, + uint32_t &CurChild); }; } // end namespace llvm diff --git a/llvm/lib/MC/MCPseudoProbe.cpp b/llvm/lib/MC/MCPseudoProbe.cpp index e9e391c8eadd7..9be93445e14cf 100644 --- a/llvm/lib/MC/MCPseudoProbe.cpp +++ b/llvm/lib/MC/MCPseudoProbe.cpp @@ -290,7 +290,7 @@ void MCDecodedPseudoProbe::getInlineContext( while (Cur->hasInlineSite()) { StringRef FuncName = getProbeFNameForGUID(GUID2FuncMAP, Cur->Parent->Guid); ContextStack.emplace_back( - MCPseudoProbeFrameLocation(FuncName, std::get<1>(Cur->ISite))); + MCPseudoProbeFrameLocation(FuncName, Cur->ProbeId)); Cur = static_cast<MCDecodedPseudoProbeInlineTree *>(Cur->Parent); } // Make the ContextStack in caller-callee order @@ -417,9 +417,10 @@ bool MCPseudoProbeDecoder::buildGUID2FuncDescMap(const uint8_t *Start, return true; } -bool MCPseudoProbeDecoder::buildAddress2ProbeMap( +void MCPseudoProbeDecoder::buildAddress2ProbeMap( MCDecodedPseudoProbeInlineTree *Cur, uint64_t &LastAddr, - const Uint64Set &GuidFilter, const Uint64Map &FuncStartAddrs) { + const Uint64Set &GuidFilter, const Uint64Map &FuncStartAddrs, + uint32_t &CurChild) { // The pseudo_probe section encodes an inline forest and each tree has a // format defined in MCPseudoProbe.h @@ -427,7 +428,7 @@ bool MCPseudoProbeDecoder::buildAddress2ProbeMap( bool IsTopLevelFunc = Cur == &DummyInlineRoot; if (IsTopLevelFunc) { // Use a sequential id for top level inliner. - Index = Cur->getChildren().size(); + Index = CurChild; } else { // Read inline site for inlinees Index = cantFail(errorOrToExpected(readUnsignedNumber<uint32_t>())); @@ -443,17 +444,21 @@ bool MCPseudoProbeDecoder::buildAddress2ProbeMap( // If the incoming node is null, all its children nodes should be disgarded. if (Cur) { // Switch/add to a new tree node(inlinee) - Cur = Cur->getOrAddNode(std::make_tuple(Guid, Index)); - Cur->Guid = Guid; + Cur->Children[CurChild] = MCDecodedPseudoProbeInlineTree(Guid, Index, Cur); + Cur = &Cur->Children[CurChild]; if (IsTopLevelFunc && !EncodingIsAddrBased) { if (auto V = FuncStartAddrs.lookup(Guid)) LastAddr = V; } } + // Advance CurChild for non-skipped top-level functions and unconditionally + // for inlined functions. + CurChild += Cur || !IsTopLevelFunc; // Read number of probes in the current node. uint32_t NodeCount = cantFail(errorOrToExpected(readUnsignedNumber<uint32_t>())); + uint32_t CurrentProbeCount = 0; // Read number of direct inlinees uint32_t ChildrenToProcess = cantFail(errorOrToExpected(readUnsignedNumber<uint32_t>())); @@ -494,19 +499,22 @@ bool MCPseudoProbeDecoder::buildAddress2ProbeMap( } if (Cur && !isSentinelProbe(Attr)) { - // Populate Address2ProbesMap - auto &Probes = Address2ProbesMap[Addr]; - Probes.emplace_back(Addr, Cur->Guid, Index, PseudoProbeType(Kind), Attr, - Discriminator, Cur); - Cur->addProbes(&Probes.back()); + PseudoProbeVec.emplace_back(Addr, Cur->Guid, Index, PseudoProbeType(Kind), + Attr, Discriminator, Cur); + Address2ProbesMap[Addr].emplace_back(PseudoProbeVec.back()); + ++CurrentProbeCount; } LastAddr = Addr; } - for (uint32_t I = 0; I < ChildrenToProcess; I++) { - buildAddress2ProbeMap(Cur, LastAddr, GuidFilter, FuncStartAddrs); + if (Cur) { + Cur->Probes = MutableArrayRef(PseudoProbeVec).take_back(CurrentProbeCount); + InlineTreeVec.resize(InlineTreeVec.size() + ChildrenToProcess); + Cur->Children = MutableArrayRef(InlineTreeVec).take_back(ChildrenToProcess); + } + for (uint32_t I = 0; I < ChildrenToProcess;) { + buildAddress2ProbeMap(Cur, LastAddr, GuidFilter, FuncStartAddrs, I); } - return true; } bool MCPseudoProbeDecoder::countRecords(bool IsTopLevelFunc, bool &Discard, @@ -605,13 +613,20 @@ bool MCPseudoProbeDecoder::buildAddress2ProbeMap( TopLevelFuncs += !Discard; } assert(Data == End && "Have unprocessed data in pseudo_probe section"); + PseudoProbeVec.reserve(ProbeCount); + InlineTreeVec.reserve(InlinedCount); + + // Allocate top-level function records as children of DummyInlineRoot. + InlineTreeVec.resize(TopLevelFuncs); + DummyInlineRoot.Children = MutableArrayRef(InlineTreeVec); Data = Start; End = Data + Size; uint64_t LastAddr = 0; + uint32_t Child = 0; while (Data < End) buildAddress2ProbeMap(&DummyInlineRoot, LastAddr, GuidFilter, - FuncStartAddrs); + FuncStartAddrs, Child); assert(Data == End && "Have unprocessed data in pseudo_probe section"); return true; } diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.cpp b/llvm/tools/llvm-profgen/ProfiledBinary.cpp index 574a9c9f52bf1..fe7d3ffa476eb 100644 --- a/llvm/tools/llvm-profgen/ProfiledBinary.cpp +++ b/llvm/tools/llvm-profgen/ProfiledBinary.cpp @@ -132,7 +132,7 @@ void BinarySizeContextTracker::trackInlineesOptimizedAway( MCPseudoProbeDecoder &ProbeDecoder) { ProbeFrameStack ProbeContext; for (const auto &Child : ProbeDecoder.getDummyInlineRoot().getChildren()) - trackInlineesOptimizedAway(ProbeDecoder, *Child.second, ProbeContext); + trackInlineesOptimizedAway(ProbeDecoder, Child, ProbeContext); } void BinarySizeContextTracker::trackInlineesOptimizedAway( @@ -160,9 +160,9 @@ void BinarySizeContextTracker::trackInlineesOptimizedAway( // DFS down the probe inline tree for (const auto &ChildNode : ProbeNode.getChildren()) { - InlineSite Location = ChildNode.first; + InlineSite Location = ChildNode.getInlineSite(); ProbeContext.back().second = std::get<1>(Location); - trackInlineesOptimizedAway(ProbeDecoder, *ChildNode.second, ProbeContext); + trackInlineesOptimizedAway(ProbeDecoder, ChildNode, ProbeContext); } ProbeContext.pop_back(); @@ -454,8 +454,8 @@ void ProfiledBinary::decodePseudoProbe(const ELFObjectFileBase *Obj) { // Build TopLevelProbeFrameMap to track size for optimized inlinees when probe // is available if (TrackFuncContextSize) { - for (const auto &Child : ProbeDecoder.getDummyInlineRoot().getChildren()) { - auto *Frame = Child.second.get(); + for (auto &Child : ProbeDecoder.getDummyInlineRoot().getChildren()) { + auto *Frame = &Child; StringRef FuncName = ProbeDecoder.getFuncDescForGUID(Frame->Guid)->FuncName; TopLevelProbeFrameMap[FuncName] = Frame; `````````` </details> https://github.com/llvm/llvm-project/pull/102789 _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits