[llvm-branch-commits] [BOLT] Detect .warm split functions as cold fragments (PR #93759)
@@ -596,6 +597,9 @@ class RewriteInstance { NameResolver NR; + // Regex object matching split function names. + const Regex ColdFragment{"(.*)\\.(cold|warm)(\\.[0-9]+)?"}; maksfb wrote: nit: s/ColdFragment/FunctionFragmentTemplate/ https://github.com/llvm/llvm-project/pull/93759 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [BOLT] Detect .warm split functions as cold fragments (PR #93759)
https://github.com/maksfb edited https://github.com/llvm/llvm-project/pull/93759 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [BOLT] Detect .warm split functions as cold fragments (PR #93759)
https://github.com/maksfb approved this pull request. LGTM with the nit addressed. https://github.com/llvm/llvm-project/pull/93759 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Name similarity function matching (PR #95884)
https://github.com/maksfb commented: Please refactor new code into a separate function. Add a comment on how the matching is done such that the interface can be understood without reading the code. https://github.com/llvm/llvm-project/pull/95884 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Name similarity function matching (PR #95884)
https://github.com/maksfb edited https://github.com/llvm/llvm-project/pull/95884 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Name similarity function matching (PR #95884)
@@ -23,6 +26,11 @@ extern cl::opt Verbosity; extern cl::OptionCategory BoltOptCategory; extern cl::opt InferStaleProfile; +cl::opt NameSimilarityFunctionMatchingThreshold( +"name-similarity-function-matching-threshold", +cl::desc("Matches functions using namespace and edit distance."), maksfb wrote: nit: use imperative statement. https://github.com/llvm/llvm-project/pull/95884 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Name similarity function matching (PR #95884)
@@ -0,0 +1,64 @@ +## Tests function matching in YAMLProfileReader by name similarity. + +# REQUIRES: system-linux +# RUN: split-file %s %t +# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown %t/main.s -o %t.o +# RUN: %clang %cflags %t.o -o %t.exe -Wl,-q -nostdlib +# RUN: llvm-bolt %t.exe -o %t.out --data %t/yaml -v=2 \ +# RUN: --print-cfg --name-similarity-function-matching-threshold=1 2>&1 --funcs=main --profile-ignore-hash=0 | FileCheck %s maksfb wrote: ```suggestion # RUN: --print-cfg --name-similarity-function-matching-threshold=1 --funcs=main --profile-ignore-hash=0 2>&1 | FileCheck %s ``` https://github.com/llvm/llvm-project/pull/95884 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Name similarity function matching (PR #95884)
@@ -0,0 +1,64 @@ +## Tests function matching in YAMLProfileReader by name similarity. + +# REQUIRES: system-linux +# RUN: split-file %s %t +# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown %t/main.s -o %t.o +# RUN: %clang %cflags %t.o -o %t.exe -Wl,-q -nostdlib +# RUN: llvm-bolt %t.exe -o %t.out --data %t/yaml -v=2 \ +# RUN: --print-cfg --name-similarity-function-matching-threshold=1 2>&1 --funcs=main --profile-ignore-hash=0 | FileCheck %s + +# CHECK: BOLT-INFO: matched 1 functions with similar names + +#--- main.s +.globl main +.type main, @function +main: + .cfi_startproc +.LBB00: + pushq %rbp + movq%rsp, %rbp + subq$16, %rsp + testq %rax, %rax + js .LBB03 +.LBB01: + jne .LBB04 +.LBB02: + nop +.LBB03: + xorl%eax, %eax + addq$16, %rsp + popq%rbp + retq +.LBB04: + xorl%eax, %eax + addq$16, %rsp + popq%rbp + retq +## For relocations against .text +.LBB05: + call exit maksfb wrote: See comments on the other PR. https://github.com/llvm/llvm-project/pull/95884 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Name similarity function matching (PR #95884)
@@ -415,11 +423,116 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) { if (!YamlBF.Used && BF && !ProfiledFunctions.count(BF)) matchProfileToFunction(YamlBF, *BF); + // Uses name similarity to match functions that were not matched by name. + uint64_t MatchedWithNameSimilarity = 0; + + if (opts::NameSimilarityFunctionMatchingThreshold > 0) { +ItaniumPartialDemangler ItaniumPartialDemangler; + +auto DemangleName = [&](std::string &FunctionName) { + StringRef RestoredName = NameResolver::restore(FunctionName); + return demangle(RestoredName); +}; + +auto DeriveNameSpace = [&](std::string &DemangledName) { + if (ItaniumPartialDemangler.partialDemangle(DemangledName.c_str())) +return std::string(""); + std::vector Buffer(DemangledName.begin(), DemangledName.end()); + size_t BufferSize = Buffer.size(); + char *NameSpace = ItaniumPartialDemangler.getFunctionDeclContextName( + &Buffer[0], &BufferSize); + return NameSpace ? std::string(NameSpace) : std::string(""); +}; + +// Preprocessing YamlBFs to minimize the number of BFs to process. +std::unordered_map> maksfb wrote: Can you use `StringMap` here? https://llvm.org/docs/ProgrammersManual.html#llvm-adt-stringmap-h https://github.com/llvm/llvm-project/pull/95884 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Name similarity function matching (PR #95884)
@@ -415,11 +423,116 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) { if (!YamlBF.Used && BF && !ProfiledFunctions.count(BF)) matchProfileToFunction(YamlBF, *BF); + // Uses name similarity to match functions that were not matched by name. + uint64_t MatchedWithNameSimilarity = 0; + + if (opts::NameSimilarityFunctionMatchingThreshold > 0) { +ItaniumPartialDemangler ItaniumPartialDemangler; + +auto DemangleName = [&](std::string &FunctionName) { + StringRef RestoredName = NameResolver::restore(FunctionName); + return demangle(RestoredName); +}; + +auto DeriveNameSpace = [&](std::string &DemangledName) { + if (ItaniumPartialDemangler.partialDemangle(DemangledName.c_str())) +return std::string(""); + std::vector Buffer(DemangledName.begin(), DemangledName.end()); + size_t BufferSize = Buffer.size(); + char *NameSpace = ItaniumPartialDemangler.getFunctionDeclContextName( + &Buffer[0], &BufferSize); + return NameSpace ? std::string(NameSpace) : std::string(""); +}; + +// Preprocessing YamlBFs to minimize the number of BFs to process. +std::unordered_map> + NamespaceToProfiledBFSizes; +NamespaceToProfiledBFSizes.reserve(YamlBP.Functions.size()); +std::vector ProfileBFDemangledNames; +ProfileBFDemangledNames.reserve(YamlBP.Functions.size()); +std::vector ProfiledBFNamespaces; +ProfiledBFNamespaces.reserve(YamlBP.Functions.size()); + +for (auto &YamlBF : YamlBP.Functions) { + std::string YamlBFDemangledName = DemangleName(YamlBF.Name); + ProfileBFDemangledNames.push_back(YamlBFDemangledName); + std::string YamlBFNamespace = DeriveNameSpace(YamlBFDemangledName); + ProfiledBFNamespaces.push_back(YamlBFNamespace); + NamespaceToProfiledBFSizes[YamlBFNamespace].insert(YamlBF.NumBasicBlocks); +} + +std::unordered_map> maksfb wrote: Same for `StringMap`. https://github.com/llvm/llvm-project/pull/95884 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Name similarity function matching (PR #95884)
@@ -342,6 +350,108 @@ bool YAMLProfileReader::mayHaveProfileData(const BinaryFunction &BF) { return false; } +uint64_t YAMLProfileReader::matchWithNameSimilarity(BinaryContext &BC) { + uint64_t MatchedWithNameSimilarity = 0; + ItaniumPartialDemangler ItaniumPartialDemangler; + + // Demangle and derive namespace from function name. + auto DemangleName = [&](std::string &FunctionName) { +StringRef RestoredName = NameResolver::restore(FunctionName); +return demangle(RestoredName); + }; + auto DeriveNameSpace = [&](std::string &DemangledName) { +if (ItaniumPartialDemangler.partialDemangle(DemangledName.c_str())) + return std::string(""); +std::vector Buffer(DemangledName.begin(), DemangledName.end()); +size_t BufferSize = Buffer.size(); +char *NameSpace = ItaniumPartialDemangler.getFunctionDeclContextName( +&Buffer[0], &BufferSize); +return NameSpace ? std::string(NameSpace) : std::string(""); + }; + + // Maps namespaces to associated function block counts and gets profile + // function names and namespaces to minimize the number of BFs to process and + // avoid repeated name demangling/namespace derivision. + StringMap> +NamespaceToProfiledBFSizes; + std::vector ProfileBFDemangledNames; + ProfileBFDemangledNames.reserve(YamlBP.Functions.size()); + std::vector ProfiledBFNamespaces; + ProfiledBFNamespaces.reserve(YamlBP.Functions.size()); + + for (auto &YamlBF : YamlBP.Functions) { +std::string YamlBFDemangledName = DemangleName(YamlBF.Name); +ProfileBFDemangledNames.push_back(YamlBFDemangledName); +std::string YamlBFNamespace = DeriveNameSpace(YamlBFDemangledName); +ProfiledBFNamespaces.push_back(YamlBFNamespace); +NamespaceToProfiledBFSizes[YamlBFNamespace].insert(YamlBF.NumBasicBlocks); + } + + StringMap> + NamespaceToBFs; maksfb wrote: nit: formatting looks off. https://github.com/llvm/llvm-project/pull/95884 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT][NFC] Refactor function matching (PR #97502)
@@ -456,6 +435,39 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) { ++MatchedWithLTOCommonName; } } + return MatchedWithLTOCommonName; +} + +Error YAMLProfileReader::readProfile(BinaryContext &BC) { + if (opts::Verbosity >= 1) { +outs() << "BOLT-INFO: YAML profile with hash: "; +switch (YamlBP.Header.HashFunction) { +case HashFunction::StdHash: + outs() << "std::hash\n"; + break; +case HashFunction::XXH3: + outs() << "xxh3\n"; + break; +} + } + YamlProfileToFunction.resize(YamlBP.Functions.size() + 1); + + // Computes hash for binary functions. + if (opts::MatchProfileWithFunctionHash) { +for (auto &[_, BF] : BC.getBinaryFunctions()) { + BF.computeHash(YamlBP.Header.IsDFSOrder, YamlBP.Header.HashFunction); +} + } else if (!opts::IgnoreHash) { +for (BinaryFunction *BF : ProfileBFs) { + if (!BF) +continue; + BF->computeHash(YamlBP.Header.IsDFSOrder, YamlBP.Header.HashFunction); +} + } + + size_t MatchedWithExactName = matchWithExactName(); + size_t MatchedWithHash = matchWithHash(BC); + size_t MatchedWithLTOCommonName = matchWithLTOCommonName(); maksfb wrote: nit: make them `const`. https://github.com/llvm/llvm-project/pull/97502 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Match blocks with calls as anchors (PR #96596)
maksfb wrote: Could you please reword the summary and add an example where the new matching technique helps. https://github.com/llvm/llvm-project/pull/96596 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT][NFC] Refactor function matching (PR #97502)
https://github.com/maksfb approved this pull request. https://github.com/llvm/llvm-project/pull/97502 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Match blocks with calls as anchors (PR #96596)
@@ -56,6 +56,10 @@ class YAMLProfileReader : public ProfileReaderBase { /// is attributed. FunctionSet ProfiledFunctions; + /// Maps profiled function id to name, for function matching with calls as + /// anchors. + DenseMap IdToFunctionName; maksfb wrote: nit: use a type alias. Will make it easier to read specially when the type is used in an argument list. https://github.com/llvm/llvm-project/pull/96596 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Match blocks with calls as anchors (PR #96596)
@@ -155,5 +155,52 @@ std::string hashBlockLoose(BinaryContext &BC, const BinaryBasicBlock &BB) { return HashString; } +/// An even looser hash level relative to $ hashBlockLoose to use with stale +/// profile matching, composed of the names of a block's called functions in +/// lexicographic order. +std::string hashBlockCalls(BinaryContext &BC, const BinaryBasicBlock &BB) { + // The hash is computed by creating a string of all lexicographically ordered + // called function names. + std::multiset FunctionNames; + for (const MCInst &Instr : BB) { +// Skip non-call instructions. +if (!BC.MIB->isCall(Instr)) + continue; +const MCSymbol *CallSymbol = BC.MIB->getTargetSymbol(Instr); +if (!CallSymbol) + continue; +FunctionNames.insert(std::string(CallSymbol->getName())); + } + + std::string HashString; + for (const std::string &FunctionName : FunctionNames) +HashString.append(FunctionName); + + return HashString; +} + +/// The same as the $hashBlockCalls function, but for profiled functions. +std::string +hashBlockCalls(const DenseMap &IdToFunctionName, + const yaml::bolt::BinaryBasicBlockProfile &YamlBB) { + std::multiset FunctionNames; + for (const yaml::bolt::CallSiteInfo &CallSiteInfo : YamlBB.CallSites) { +auto It = IdToFunctionName.find(CallSiteInfo.DestId); +if (It == IdToFunctionName.end()) + continue; +StringRef Name = It->second; +const size_t Pos = Name.find("(*"); maksfb wrote: Let's expand `NameResolver` interface to restore the name of the symbol and use it here. https://github.com/llvm/llvm-project/pull/96596 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Match blocks with calls as anchors (PR #96596)
@@ -155,5 +155,52 @@ std::string hashBlockLoose(BinaryContext &BC, const BinaryBasicBlock &BB) { return HashString; } +/// An even looser hash level relative to $ hashBlockLoose to use with stale +/// profile matching, composed of the names of a block's called functions in +/// lexicographic order. +std::string hashBlockCalls(BinaryContext &BC, const BinaryBasicBlock &BB) { + // The hash is computed by creating a string of all lexicographically ordered + // called function names. + std::multiset FunctionNames; maksfb wrote: Consider an alternative container. https://llvm.org/docs/ProgrammersManual.html#other-set-like-container-options https://github.com/llvm/llvm-project/pull/96596 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/19.x: [BOLT] Fix relocations handling (PR #102741)
https://github.com/maksfb approved this pull request. Thanks for the back port. https://github.com/llvm/llvm-project/pull/102741 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Set EntryDiscriminator in YAML profile for indirect calls (PR #82128)
@@ -78,32 +97,15 @@ YAMLProfileWriter::convert(const BinaryFunction &BF, bool UseDFS) { if (!ICSP) continue; for (const IndirectCallProfile &CSP : ICSP.get()) { - StringRef TargetName = ""; - CSI.DestId = 0; // designated for unknown functions - CSI.EntryDiscriminator = 0; - if (CSP.Symbol) { -const BinaryFunction *Callee = BC.getFunctionForSymbol(CSP.Symbol); -if (Callee) { - CSI.DestId = Callee->getFunctionNumber(); - TargetName = Callee->getOneName(); -} - } + const BinaryFunction *Callee = setCSIDestination(BC, CSI, CSP.Symbol); CSI.Count = CSP.Count; CSI.Mispreds = CSP.Mispreds; - CSTargets.emplace_back(TargetName, CSI); + if (CSI.Count && Callee) maksfb wrote: Previously, we would record data for unknown destinations, but now we are ignoring those. Not sure if it matters in practice. But if we are going to ignore them, the code that could be simplified/refactored. https://github.com/llvm/llvm-project/pull/82128 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Set EntryDiscriminator in YAML profile for indirect calls (PR #82128)
https://github.com/maksfb approved this pull request. LGTM https://github.com/llvm/llvm-project/pull/82128 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Use BAT interfaces in YAMLProfileWriter::convert (PR #86219)
@@ -123,6 +123,9 @@ class BoltAddressTranslation { std::unordered_map> getBFBranches(uint64_t FuncOutputAddress) const; + /// Returns a secondary entry point id for a given function and offset. maksfb wrote: "... function at a given \p Address..." https://github.com/llvm/llvm-project/pull/86219 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Use BAT interfaces in YAMLProfileWriter::convert (PR #86219)
@@ -27,25 +28,55 @@ namespace bolt { /// Set CallSiteInfo destination fields from \p Symbol and return a target /// BinaryFunction for that symbol. -static const BinaryFunction *setCSIDestination(const BinaryContext &BC, - yaml::bolt::CallSiteInfo &CSI, - const MCSymbol *Symbol) { +static const BinaryFunction * +setCSIDestination(const BinaryContext &BC, yaml::bolt::CallSiteInfo &CSI, + const MCSymbol *Symbol, const BoltAddressTranslation *BAT) { CSI.DestId = 0; // designated for unknown functions CSI.EntryDiscriminator = 0; + auto setBATSecondaryEntry = [&](const BinaryFunction *const Callee) { +// The symbol could be a secondary entry in a cold fragment. +ErrorOr SymbolValue = BC.getSymbolValue(*Symbol); +if (SymbolValue.getError()) maksfb wrote: Can we get a condition when `getFunctionForSymbol(Symbol)` return a function, but the symbol has no set value? https://github.com/llvm/llvm-project/pull/86219 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Use BAT interfaces in YAMLProfileWriter::convert (PR #86219)
@@ -27,25 +28,55 @@ namespace bolt { /// Set CallSiteInfo destination fields from \p Symbol and return a target /// BinaryFunction for that symbol. -static const BinaryFunction *setCSIDestination(const BinaryContext &BC, - yaml::bolt::CallSiteInfo &CSI, - const MCSymbol *Symbol) { +static const BinaryFunction * +setCSIDestination(const BinaryContext &BC, yaml::bolt::CallSiteInfo &CSI, + const MCSymbol *Symbol, const BoltAddressTranslation *BAT) { CSI.DestId = 0; // designated for unknown functions CSI.EntryDiscriminator = 0; + auto setBATSecondaryEntry = [&](const BinaryFunction *const Callee) { maksfb wrote: ```suggestion auto setBATSecondaryEntry = [&](const BinaryFunction &Callee) { ``` https://github.com/llvm/llvm-project/pull/86219 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Use BAT interfaces in YAMLProfileWriter::convert (PR #86219)
@@ -27,25 +28,55 @@ namespace bolt { /// Set CallSiteInfo destination fields from \p Symbol and return a target /// BinaryFunction for that symbol. -static const BinaryFunction *setCSIDestination(const BinaryContext &BC, - yaml::bolt::CallSiteInfo &CSI, - const MCSymbol *Symbol) { +static const BinaryFunction * +setCSIDestination(const BinaryContext &BC, yaml::bolt::CallSiteInfo &CSI, + const MCSymbol *Symbol, const BoltAddressTranslation *BAT) { CSI.DestId = 0; // designated for unknown functions CSI.EntryDiscriminator = 0; + auto setBATSecondaryEntry = [&](const BinaryFunction *const Callee) { +// The symbol could be a secondary entry in a cold fragment. +ErrorOr SymbolValue = BC.getSymbolValue(*Symbol); +if (SymbolValue.getError()) + return; + +// Containing function, not necessarily the same as symbol value. +const uint64_t CalleeAddress = Callee->getAddress(); +const uint32_t OutputOffset = SymbolValue.get() - CalleeAddress; + +const uint64_t ParentAddress = BAT->fetchParentAddress(CalleeAddress); +const uint64_t HotAddress = ParentAddress ? ParentAddress : CalleeAddress; + +if (const BinaryFunction *ParentBF = +BC.getBinaryFunctionAtAddress(HotAddress)) + CSI.DestId = ParentBF->getFunctionNumber(); + +const uint32_t InputOffset = +BAT->translate(CalleeAddress, OutputOffset, /*IsBranchSrc*/ false); + +if (!InputOffset) + return; + +CSI.EntryDiscriminator = +BAT->getSecondaryEntryPointId(HotAddress, InputOffset) + 1; + }; + if (Symbol) { uint64_t EntryID = 0; if (const BinaryFunction *const Callee = BC.getFunctionForSymbol(Symbol, &EntryID)) { CSI.DestId = Callee->getFunctionNumber(); CSI.EntryDiscriminator = EntryID; + if (BAT && BAT->isBATFunction(Callee->getAddress())) +setBATSecondaryEntry(Callee); maksfb wrote: What do you think about modifying `BinaryContext::getFunctionForSymbol()` in the presence of BAT? We might move BAT into `BinaryContext` (from `RewriteInstance`) for that. https://github.com/llvm/llvm-project/pull/86219 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Use BAT interfaces in YAMLProfileWriter::convert (PR #86219)
@@ -27,25 +28,55 @@ namespace bolt { /// Set CallSiteInfo destination fields from \p Symbol and return a target /// BinaryFunction for that symbol. -static const BinaryFunction *setCSIDestination(const BinaryContext &BC, - yaml::bolt::CallSiteInfo &CSI, - const MCSymbol *Symbol) { +static const BinaryFunction * +setCSIDestination(const BinaryContext &BC, yaml::bolt::CallSiteInfo &CSI, + const MCSymbol *Symbol, const BoltAddressTranslation *BAT) { CSI.DestId = 0; // designated for unknown functions CSI.EntryDiscriminator = 0; + auto setBATSecondaryEntry = [&](const BinaryFunction *const Callee) { +// The symbol could be a secondary entry in a cold fragment. +ErrorOr SymbolValue = BC.getSymbolValue(*Symbol); +if (SymbolValue.getError()) + return; + +// Containing function, not necessarily the same as symbol value. +const uint64_t CalleeAddress = Callee->getAddress(); +const uint32_t OutputOffset = SymbolValue.get() - CalleeAddress; + +const uint64_t ParentAddress = BAT->fetchParentAddress(CalleeAddress); +const uint64_t HotAddress = ParentAddress ? ParentAddress : CalleeAddress; + +if (const BinaryFunction *ParentBF = +BC.getBinaryFunctionAtAddress(HotAddress)) + CSI.DestId = ParentBF->getFunctionNumber(); + +const uint32_t InputOffset = +BAT->translate(CalleeAddress, OutputOffset, /*IsBranchSrc*/ false); + +if (!InputOffset) + return; + +CSI.EntryDiscriminator = +BAT->getSecondaryEntryPointId(HotAddress, InputOffset) + 1; maksfb wrote: Can we modify `getSecondaryEntryPointId()` interface so that there's no need to "+1" here? https://github.com/llvm/llvm-project/pull/86219 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT][BAT] Support multi-way split functions (PR #87123)
@@ -197,8 +197,10 @@ void BoltAddressTranslation::writeMaps(std::map &Maps, ? SecondaryEntryPointsMap[Address].size() : 0; if (Cold) { - size_t HotIndex = - std::distance(ColdPartSource.begin(), ColdPartSource.find(Address)); + // `Maps` is keyed by output addresses. + auto HotEntryIt = Maps.find(ColdPartSource[Address]); + assert(HotEntryIt != Maps.end()); maksfb wrote: Does this happen when BAT maps are incomplete? Since BAT is part of the input, should this be an error instead? https://github.com/llvm/llvm-project/pull/87123 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT][BAT] Support multi-way split functions (PR #87123)
https://github.com/maksfb approved this pull request. https://github.com/llvm/llvm-project/pull/87123 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Use BAT interfaces in YAMLProfileWriter::convert (PR #86219)
@@ -27,25 +28,55 @@ namespace bolt { /// Set CallSiteInfo destination fields from \p Symbol and return a target /// BinaryFunction for that symbol. -static const BinaryFunction *setCSIDestination(const BinaryContext &BC, - yaml::bolt::CallSiteInfo &CSI, - const MCSymbol *Symbol) { +static const BinaryFunction * +setCSIDestination(const BinaryContext &BC, yaml::bolt::CallSiteInfo &CSI, + const MCSymbol *Symbol, const BoltAddressTranslation *BAT) { CSI.DestId = 0; // designated for unknown functions CSI.EntryDiscriminator = 0; + auto setBATSecondaryEntry = [&](const BinaryFunction *const Callee) { +// The symbol could be a secondary entry in a cold fragment. +ErrorOr SymbolValue = BC.getSymbolValue(*Symbol); +if (SymbolValue.getError()) + return; + +// Containing function, not necessarily the same as symbol value. +const uint64_t CalleeAddress = Callee->getAddress(); +const uint32_t OutputOffset = SymbolValue.get() - CalleeAddress; + +const uint64_t ParentAddress = BAT->fetchParentAddress(CalleeAddress); +const uint64_t HotAddress = ParentAddress ? ParentAddress : CalleeAddress; + +if (const BinaryFunction *ParentBF = +BC.getBinaryFunctionAtAddress(HotAddress)) + CSI.DestId = ParentBF->getFunctionNumber(); + +const uint32_t InputOffset = +BAT->translate(CalleeAddress, OutputOffset, /*IsBranchSrc*/ false); + +if (!InputOffset) + return; + +CSI.EntryDiscriminator = +BAT->getSecondaryEntryPointId(HotAddress, InputOffset) + 1; + }; + if (Symbol) { uint64_t EntryID = 0; if (const BinaryFunction *const Callee = BC.getFunctionForSymbol(Symbol, &EntryID)) { CSI.DestId = Callee->getFunctionNumber(); CSI.EntryDiscriminator = EntryID; + if (BAT && BAT->isBATFunction(Callee->getAddress())) +setBATSecondaryEntry(Callee); maksfb wrote: How about moving the adjustment logic into BAT then? https://github.com/llvm/llvm-project/pull/86219 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Use BAT for YAML profile call target information (PR #86219)
https://github.com/maksfb approved this pull request. Looks good with nits addressed. https://github.com/llvm/llvm-project/pull/86219 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Use BAT for YAML profile call target information (PR #86219)
@@ -123,6 +124,12 @@ class BoltAddressTranslation { std::unordered_map> getBFBranches(uint64_t FuncOutputAddress) const; + /// For a given \p Symbol in the output binary, returns a corresponding pair + /// of parent BinaryFunction and secondary entry point in it. maksfb wrote: Update comment with `Offset` info. https://github.com/llvm/llvm-project/pull/86219 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Use BAT for YAML profile call target information (PR #86219)
@@ -161,6 +164,10 @@ class BoltAddressTranslation { /// Map a function to its secondary entry points vector std::unordered_map> SecondaryEntryPointsMap; + /// Translates a given \p Symbol into a BinaryFunction and + /// Returns a secondary entry point id for a given \p Address and \p Offset. maksfb wrote: ```suggestion /// Return a secondary entry point ID for a function located at \p Address and \p Offset within that function. ``` https://github.com/llvm/llvm-project/pull/86219 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Use BAT for YAML profile call target information (PR #86219)
https://github.com/maksfb edited https://github.com/llvm/llvm-project/pull/86219 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [BOLT] Emit empty FDE for injected functions (PR #87967)
https://github.com/maksfb commented: Let's make `hasCFI()` return true for injected functions. https://github.com/llvm/llvm-project/pull/87967 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [BOLT] Use BAT to register fragments (PR #87968)
https://github.com/maksfb commented: Is information we emit in the symbol table insufficient to establish the fragment relationship? https://github.com/llvm/llvm-project/pull/87968 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Emit empty FDE for injected functions (PR #87967)
maksfb wrote: > > Let's make `hasCFI()` return true for injected functions. > > Given that this change increases the size of eh_frame section, should we make > it dependent on `enable-bat`, i.e. when we expect to feed the binary back to > BOLT? > > Because otherwise this "fix" is a pure size regression. Creating a proper FDE entry is the right thing to do regardless of BAT. How large is the regression? https://github.com/llvm/llvm-project/pull/87967 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Emit empty FDE for injected functions (PR #87967)
maksfb wrote: > > Creating a proper FDE entry is the right thing to do regardless of BAT. How > > large is the regression? > > The largest I've seen is 34M->39M (HHVM instrumentation). Did you check if we patch every single function in he original `.text`? How many functions are there? https://github.com/llvm/llvm-project/pull/87967 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Emit empty FDE for injected functions (PR #87967)
maksfb wrote: Okay. That explains the increase. https://github.com/llvm/llvm-project/pull/87967 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Emit empty FDE for injected functions (PR #87967)
https://github.com/maksfb approved this pull request. https://github.com/llvm/llvm-project/pull/87967 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT][BAT] Fix handling of split functions (PR #87569)
@@ -245,14 +245,12 @@ class DataAggregator : public DataReader { /// disassembled BinaryFunctions BinaryFunction *getBinaryFunctionContainingAddress(uint64_t Address) const; + /// Perform BAT translation for a given \p Func and return the parent + /// BinaryFunction or nullptr. + BinaryFunction *getParentFunction(const BinaryFunction &Func) const; maksfb wrote: If the function is BAT-specific, it should be clear from the name. https://github.com/llvm/llvm-project/pull/87569 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT][BAT] Fix handling of split functions (PR #87569)
@@ -59,10 +59,29 @@ # RUN: llvm-bolt %t.exe -o %t.bat2 --data %t.fdata --funcs=main,func \ # RUN: --split-functions --split-strategy=all --split-all-cold --enable-bat +# Prepare pre-aggregated profile using %t.bat maksfb wrote: Nit: use double `#`s. https://github.com/llvm/llvm-project/pull/87569 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Cover all call sites in writeBATYAML (PR #87743)
@@ -38,10 +38,63 @@ # RUN: llvm-bolt %t.exe -o %t.bat --data %t.fdata --funcs=func \ # RUN: --split-functions --split-strategy=all --split-all-cold --enable-bat +# Prepare pre-aggregated profile using %t.bat maksfb wrote: Use double `#`s. https://github.com/llvm/llvm-project/pull/87743 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Cover all call sites in writeBATYAML (PR #87743)
@@ -2341,86 +2341,62 @@ std::error_code DataAggregator::writeBATYAML(BinaryContext &BC, YamlBF.NumBasicBlocks = BAT->getNumBasicBlocks(FuncAddress); const BoltAddressTranslation::BBHashMapTy &BlockMap = BAT->getBBHashMap(FuncAddress); + YamlBF.Blocks.resize(YamlBF.NumBasicBlocks); - auto addSuccProfile = [&](yaml::bolt::BinaryBasicBlockProfile &YamlBB, -uint64_t SuccOffset, unsigned SuccDataIdx) { + for (auto &&[Idx, YamlBB] : llvm::enumerate(YamlBF.Blocks)) +YamlBB.Index = Idx; + + for (auto BI = BlockMap.begin(), BE = BlockMap.end(); BI != BE; ++BI) +YamlBF.Blocks[BI->second.getBBIndex()].Hash = BI->second.getBBHash(); + + auto getSuccessorInfo = [&](uint32_t SuccOffset, unsigned SuccDataIdx) { const llvm::bolt::BranchInfo &BI = Branches.Data.at(SuccDataIdx); yaml::bolt::SuccessorInfo SI; SI.Index = BlockMap.getBBIndex(SuccOffset); SI.Count = BI.Branches; SI.Mispreds = BI.Mispreds; -YamlBB.Successors.emplace_back(SI); +return SI; }; - std::unordered_map> BFBranches = - BAT->getBFBranches(FuncAddress); - - auto addCallsProfile = [&](yaml::bolt::BinaryBasicBlockProfile &YamlBB, - uint64_t Offset) { -// Iterate over BRANCHENTRY records in the current block -for (uint32_t BranchOffset : BFBranches[Offset]) { - if (!Branches.InterIndex.contains(BranchOffset)) -continue; - for (const auto &[CallToLoc, CallToIdx] : - Branches.InterIndex.at(BranchOffset)) { -const llvm::bolt::BranchInfo &BI = Branches.Data.at(CallToIdx); -yaml::bolt::CallSiteInfo YamlCSI; -YamlCSI.DestId = 0; // designated for unknown functions -YamlCSI.EntryDiscriminator = 0; -YamlCSI.Count = BI.Branches; -YamlCSI.Mispreds = BI.Mispreds; -YamlCSI.Offset = BranchOffset - Offset; -BinaryData *CallTargetBD = BC.getBinaryDataByName(CallToLoc.Name); -if (!CallTargetBD) { - YamlBB.CallSites.emplace_back(YamlCSI); - continue; -} -uint64_t CallTargetAddress = CallTargetBD->getAddress(); -BinaryFunction *CallTargetBF = -BC.getBinaryFunctionAtAddress(CallTargetAddress); -if (!CallTargetBF) { - YamlBB.CallSites.emplace_back(YamlCSI); - continue; -} -// Calls between hot and cold fragments must be handled in -// fixupBATProfile. -assert(CallTargetBF != BF && "invalid CallTargetBF"); -YamlCSI.DestId = CallTargetBF->getFunctionNumber(); -if (CallToLoc.Offset) { - if (BAT->isBATFunction(CallTargetAddress)) { -LLVM_DEBUG(dbgs() << "BOLT-DEBUG: Unsupported secondary " - "entry point in BAT function " - << CallToLoc.Name << '\n'); - } else if (const BinaryBasicBlock *CallTargetBB = - CallTargetBF->getBasicBlockAtOffset( - CallToLoc.Offset)) { -// Only record true call information, ignoring returns (normally -// won't have a target basic block) and jumps to the landing -// pads (not an entry point). -if (CallTargetBB->isEntryPoint()) { - YamlCSI.EntryDiscriminator = - CallTargetBF->getEntryIDForSymbol( - CallTargetBB->getLabel()); -} - } -} -YamlBB.CallSites.emplace_back(YamlCSI); - } -} + auto getCallSiteInfo = [&](Location CallToLoc, unsigned CallToIdx, + uint32_t Offset) { +const llvm::bolt::BranchInfo &BI = Branches.Data.at(CallToIdx); +yaml::bolt::CallSiteInfo CSI; +CSI.DestId = 0; // designated for unknown functions +CSI.EntryDiscriminator = 0; +CSI.Count = BI.Branches; +CSI.Mispreds = BI.Mispreds; +CSI.Offset = Offset; +if (BinaryData *BD = BC.getBinaryDataByName(CallToLoc.Name)) + YAMLProfileWriter::setCSIDestination(BC, CSI, BD->getSymbol(), BAT, + CallToLoc.Offset); +return CSI; }; for (const auto &[FromOffset, SuccKV] : Branches.IntraIndex) { -yaml::bolt::BinaryBasicBlockProfile YamlBB; if (!BlockMap.isInputBlock(FromOffset)) continue; -YamlBB.Index = BlockMap.getBBIndex(FromOffset); -YamlBB.Hash = BlockMap.getBBHash(FromOffset); +unsigned Index = BlockMap.getBBIndex(FromOffset); +yaml::bolt::BinaryBasicBlockProfile &YamlBB = YamlBF.Blocks[Index]; for (const aut
[llvm-branch-commits] [llvm] [BOLT] Cover all call sites in writeBATYAML (PR #87743)
@@ -2341,86 +2341,62 @@ std::error_code DataAggregator::writeBATYAML(BinaryContext &BC, YamlBF.NumBasicBlocks = BAT->getNumBasicBlocks(FuncAddress); const BoltAddressTranslation::BBHashMapTy &BlockMap = BAT->getBBHashMap(FuncAddress); + YamlBF.Blocks.resize(YamlBF.NumBasicBlocks); - auto addSuccProfile = [&](yaml::bolt::BinaryBasicBlockProfile &YamlBB, -uint64_t SuccOffset, unsigned SuccDataIdx) { + for (auto &&[Idx, YamlBB] : llvm::enumerate(YamlBF.Blocks)) +YamlBB.Index = Idx; + + for (auto BI = BlockMap.begin(), BE = BlockMap.end(); BI != BE; ++BI) +YamlBF.Blocks[BI->second.getBBIndex()].Hash = BI->second.getBBHash(); + + auto getSuccessorInfo = [&](uint32_t SuccOffset, unsigned SuccDataIdx) { const llvm::bolt::BranchInfo &BI = Branches.Data.at(SuccDataIdx); yaml::bolt::SuccessorInfo SI; SI.Index = BlockMap.getBBIndex(SuccOffset); SI.Count = BI.Branches; SI.Mispreds = BI.Mispreds; -YamlBB.Successors.emplace_back(SI); +return SI; }; - std::unordered_map> BFBranches = - BAT->getBFBranches(FuncAddress); - - auto addCallsProfile = [&](yaml::bolt::BinaryBasicBlockProfile &YamlBB, - uint64_t Offset) { -// Iterate over BRANCHENTRY records in the current block -for (uint32_t BranchOffset : BFBranches[Offset]) { - if (!Branches.InterIndex.contains(BranchOffset)) -continue; - for (const auto &[CallToLoc, CallToIdx] : - Branches.InterIndex.at(BranchOffset)) { -const llvm::bolt::BranchInfo &BI = Branches.Data.at(CallToIdx); -yaml::bolt::CallSiteInfo YamlCSI; -YamlCSI.DestId = 0; // designated for unknown functions -YamlCSI.EntryDiscriminator = 0; -YamlCSI.Count = BI.Branches; -YamlCSI.Mispreds = BI.Mispreds; -YamlCSI.Offset = BranchOffset - Offset; -BinaryData *CallTargetBD = BC.getBinaryDataByName(CallToLoc.Name); -if (!CallTargetBD) { - YamlBB.CallSites.emplace_back(YamlCSI); - continue; -} -uint64_t CallTargetAddress = CallTargetBD->getAddress(); -BinaryFunction *CallTargetBF = -BC.getBinaryFunctionAtAddress(CallTargetAddress); -if (!CallTargetBF) { - YamlBB.CallSites.emplace_back(YamlCSI); - continue; -} -// Calls between hot and cold fragments must be handled in -// fixupBATProfile. -assert(CallTargetBF != BF && "invalid CallTargetBF"); -YamlCSI.DestId = CallTargetBF->getFunctionNumber(); -if (CallToLoc.Offset) { - if (BAT->isBATFunction(CallTargetAddress)) { -LLVM_DEBUG(dbgs() << "BOLT-DEBUG: Unsupported secondary " - "entry point in BAT function " - << CallToLoc.Name << '\n'); - } else if (const BinaryBasicBlock *CallTargetBB = - CallTargetBF->getBasicBlockAtOffset( - CallToLoc.Offset)) { -// Only record true call information, ignoring returns (normally -// won't have a target basic block) and jumps to the landing -// pads (not an entry point). -if (CallTargetBB->isEntryPoint()) { - YamlCSI.EntryDiscriminator = - CallTargetBF->getEntryIDForSymbol( - CallTargetBB->getLabel()); -} - } -} -YamlBB.CallSites.emplace_back(YamlCSI); - } -} + auto getCallSiteInfo = [&](Location CallToLoc, unsigned CallToIdx, + uint32_t Offset) { +const llvm::bolt::BranchInfo &BI = Branches.Data.at(CallToIdx); +yaml::bolt::CallSiteInfo CSI; +CSI.DestId = 0; // designated for unknown functions +CSI.EntryDiscriminator = 0; +CSI.Count = BI.Branches; +CSI.Mispreds = BI.Mispreds; +CSI.Offset = Offset; +if (BinaryData *BD = BC.getBinaryDataByName(CallToLoc.Name)) + YAMLProfileWriter::setCSIDestination(BC, CSI, BD->getSymbol(), BAT, + CallToLoc.Offset); +return CSI; }; for (const auto &[FromOffset, SuccKV] : Branches.IntraIndex) { -yaml::bolt::BinaryBasicBlockProfile YamlBB; if (!BlockMap.isInputBlock(FromOffset)) continue; -YamlBB.Index = BlockMap.getBBIndex(FromOffset); -YamlBB.Hash = BlockMap.getBBHash(FromOffset); +unsigned Index = BlockMap.getBBIndex(FromOffset); +yaml::bolt::BinaryBasicBlockProfile &YamlBB = YamlBF.Blocks[Index]; for (const aut
[llvm-branch-commits] [llvm] [BOLT] Cover all call sites in writeBATYAML (PR #87743)
https://github.com/maksfb approved this pull request. https://github.com/llvm/llvm-project/pull/87743 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT][BAT] Fix handling of split functions (PR #87569)
https://github.com/maksfb approved this pull request. https://github.com/llvm/llvm-project/pull/87569 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [BOLT] Use offset deduplication for cold fragments (PR #87853)
https://github.com/maksfb approved this pull request. https://github.com/llvm/llvm-project/pull/87853 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Use heuristic for matching split local functions (PR #90424)
https://github.com/maksfb approved this pull request. https://github.com/llvm/llvm-project/pull/90424 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [BOLT][BAT] Fix translate for branches added by BOLT (PR #90811)
@@ -48,10 +48,9 @@ static cl::opt InputFilename(cl::Positional, cl::Required, cl::cat(BatDumpCategory)); -static cl::list Translate("translate", -cl::desc("translate addresses using BAT"), -cl::value_desc("addr"), -cl::cat(BatDumpCategory)); +static cl::list +Translate("translate", cl::desc("translate addresses using BAT"), + cl::value_desc("addr[:is_from]"), cl::cat(BatDumpCategory)); maksfb wrote: ```suggestion cl::value_desc("addr[:is_src]"), cl::cat(BatDumpCategory)); ``` https://github.com/llvm/llvm-project/pull/90811 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [BOLT][BAT] Fix translate for branches added by BOLT (PR #90811)
@@ -24,6 +24,32 @@ READ-BAT-CHECK-NOT: BOLT-ERROR: unable to save profile in YAML format for input READ-BAT-CHECK: BOLT-INFO: Parsed 5 BAT entries READ-BAT-CHECK: PERF2BOLT: read 79 aggregated LBR entries +# Check handling of a branch not in BAT (added by BOLT) +RUN: FileCheck --input-file %p/Inputs/blarge_new_bat.preagg.txt --check-prefix PREAGG-CHECK %s +# The branch +PREAGG-CHECK: B 80007b 80004c 483 1 +RUN: llvm-objdump %t.out -d --start-address=0x80007b --stop-address=0x80007d \ +RUN: | FileCheck %s --check-prefix OBJDUMP-CHECK +OBJDUMP-CHECK: jmp +# Confirming it's not in BAT +RUN: llvm-bat-dump %t.out --dump-all | FileCheck %s --check-prefix BAT-DUMP-CHECK +BAT-DUMP-CHECK: Function Address: 0x800040, hash: 0x99e67ed32a203023 +# Containing basic block +BAT-DUMP-CHECK: 0x34 -> 0x32 hash: 0x6c36179f229b0032 +# Branch entry just above +BAT-DUMP-CHECK-NEXT: 0x37 -> 0x35 (branch) +# No entry for that offset +BAT-DUMP-NOT: 0x3b -> maksfb wrote: Is this line intended to be used? Is it needed? https://github.com/llvm/llvm-project/pull/90811 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [BOLT][BAT] Fix translate for branches added by BOLT (PR #90811)
@@ -478,18 +478,34 @@ uint64_t BoltAddressTranslation::translate(uint64_t FuncAddress, return Offset; const MapTy &Map = Iter->second; + if (IsBranchSrc) { +// Try exact lookup first +auto KeyVal = Map.find(Offset); +if (KeyVal != Map.end() && KeyVal->second & BRANCHENTRY) + return KeyVal->second >> 1; + } auto KeyVal = Map.upper_bound(Offset); if (KeyVal == Map.begin()) return Offset; --KeyVal; const uint32_t Val = KeyVal->second >> 1; // dropping BRANCHENTRY bit - // Branch source addresses are translated to the first instruction of the - // source BB to avoid accounting for modifications BOLT may have made in the - // BB regarding deletion/addition of instructions. - if (IsBranchSrc) -return Val; + if (IsBranchSrc) { maksfb wrote: Can we handle the exact match case (from above) here as well to simplify the code? https://github.com/llvm/llvm-project/pull/90811 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [BOLT][BAT] Fix translate for branches added by BOLT (PR #90811)
@@ -492,6 +486,10 @@ uint64_t BoltAddressTranslation::translate(uint64_t FuncAddress, const uint32_t Val = KeyVal->second >> 1; // dropping BRANCHENTRY bit if (IsBranchSrc) { +// Branch entry is found in BAT +if (KeyVal->first == Offset && KeyVal->second & BRANCHENTRY) maksfb wrote: What would be the case where we need to check for the second condition (`KeyVal->second & BRANCHENTRY`)? Can we drop it? https://github.com/llvm/llvm-project/pull/90811 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Ignore returns in DataAggregator (PR #90807)
https://github.com/maksfb approved this pull request. Please address the nits. Otherwise - good to go. https://github.com/llvm/llvm-project/pull/90807 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Ignore returns in DataAggregator (PR #90807)
@@ -1167,6 +1167,21 @@ void BinaryFunction::handleAArch64IndirectCall(MCInst &Instruction, } } +std::optional +BinaryFunction::disassembleInstructionAtOffset(uint64_t Offset) const { + assert(CurrentState == State::Empty); maksfb wrote: nit: add message. https://github.com/llvm/llvm-project/pull/90807 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Ignore returns in DataAggregator (PR #90807)
@@ -1167,6 +1167,21 @@ void BinaryFunction::handleAArch64IndirectCall(MCInst &Instruction, } } +std::optional +BinaryFunction::disassembleInstructionAtOffset(uint64_t Offset) const { + assert(CurrentState == State::Empty); + assert(Offset < MaxSize && "invalid offset"); maksfb wrote: nit: capitalize the message. https://github.com/llvm/llvm-project/pull/90807 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Ignore returns in DataAggregator (PR #90807)
@@ -1167,6 +1167,21 @@ void BinaryFunction::handleAArch64IndirectCall(MCInst &Instruction, } } +std::optional +BinaryFunction::disassembleInstructionAtOffset(uint64_t Offset) const { + assert(CurrentState == State::Empty); + assert(Offset < MaxSize && "invalid offset"); + ErrorOr> FunctionData = getData(); + assert(FunctionData && "cannot get function as data"); maksfb wrote: nit: ditto. https://github.com/llvm/llvm-project/pull/90807 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Ignore returns in DataAggregator (PR #90807)
https://github.com/maksfb edited https://github.com/llvm/llvm-project/pull/90807 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Map branch source address to the containing basic block in BAT YAML (PR #91273)
@@ -2378,21 +2378,24 @@ std::error_code DataAggregator::writeBATYAML(BinaryContext &BC, return CSI; }; + // Lookup containing basic block offset and index + auto getBlock = [&BlockMap](uint32_t Offset) { +auto BlockIt = BlockMap.upper_bound(Offset); +assert(BlockIt != BlockMap.begin()); +--BlockIt; +return std::pair(BlockIt->first, BlockIt->second.getBBIndex()); + }; + for (const auto &[FromOffset, SuccKV] : Branches.IntraIndex) { -if (!BlockMap.isInputBlock(FromOffset)) - continue; -const unsigned Index = BlockMap.getBBIndex(FromOffset); +const auto &[_, Index] = getBlock(FromOffset); maksfb wrote: Do we expect `getBlock()` to always return a good value? There's no chance for malformed input to trigger the assertion above? https://github.com/llvm/llvm-project/pull/91273 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Map branch source address to the containing basic block in BAT YAML (PR #91273)
@@ -2378,21 +2378,24 @@ std::error_code DataAggregator::writeBATYAML(BinaryContext &BC, return CSI; }; + // Lookup containing basic block offset and index + auto getBlock = [&BlockMap](uint32_t Offset) { +auto BlockIt = BlockMap.upper_bound(Offset); +assert(BlockIt != BlockMap.begin()); +--BlockIt; +return std::pair(BlockIt->first, BlockIt->second.getBBIndex()); + }; + for (const auto &[FromOffset, SuccKV] : Branches.IntraIndex) { -if (!BlockMap.isInputBlock(FromOffset)) - continue; -const unsigned Index = BlockMap.getBBIndex(FromOffset); +const auto &[_, Index] = getBlock(FromOffset); maksfb wrote: Even though we generate BAT in BOLT, when we view the invocation of BOLT on a binary with embedded BAT, such input should be considered an external and potentially malformed data. In this case, assertions will not provide adequate enough protection since we can build BOLT without them. https://github.com/llvm/llvm-project/pull/91273 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Map branch source address to the containing basic block in BAT YAML (PR #91273)
@@ -2378,21 +2379,27 @@ std::error_code DataAggregator::writeBATYAML(BinaryContext &BC, return CSI; }; + // Lookup containing basic block offset and index + auto getBlock = [&BlockMap](uint32_t Offset) { +auto BlockIt = BlockMap.upper_bound(Offset); +if (LLVM_UNLIKELY(BlockIt == BlockMap.begin())) { + errs() << "BOLT-ERROR: Invalid BAT section"; maksfb wrote: ```suggestion errs() << "BOLT-ERROR: invalid BAT section\n"; ``` https://github.com/llvm/llvm-project/pull/91273 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Map branch source address to the containing basic block in BAT YAML (PR #91273)
https://github.com/maksfb edited https://github.com/llvm/llvm-project/pull/91273 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Map branch source address to the containing basic block in BAT YAML (PR #91273)
https://github.com/maksfb approved this pull request. Please see the nit for the error message (caps and new line). Otherwise LGTM. https://github.com/llvm/llvm-project/pull/91273 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [BOLT] Use aggregated FuncBranchData in writeBATYAML (PR #91289)
https://github.com/maksfb edited https://github.com/llvm/llvm-project/pull/91289 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [BOLT] Use aggregated FuncBranchData in writeBATYAML (PR #91289)
https://github.com/maksfb approved this pull request. Looks good on my end. https://github.com/llvm/llvm-project/pull/91289 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [BOLT] Use aggregated FuncBranchData in writeBATYAML (PR #91289)
@@ -2386,25 +2362,26 @@ std::error_code DataAggregator::writeBATYAML(BinaryContext &BC, return std::pair(BlockIt->first, BlockIt->second.getBBIndex()); }; - for (const auto &[FromOffset, SuccKV] : Branches.IntraIndex) { -const auto &[_, Index] = getBlock(FromOffset); -yaml::bolt::BinaryBasicBlockProfile &YamlBB = YamlBF.Blocks[Index]; -for (const auto &[SuccOffset, SuccDataIdx] : SuccKV) - if (BlockMap.isInputBlock(SuccOffset)) -YamlBB.Successors.emplace_back( -getSuccessorInfo(SuccOffset, SuccDataIdx)); - } - for (const auto &[FromOffset, CallTo] : Branches.InterIndex) { -const auto &[BlockOffset, BlockIndex] = getBlock(FromOffset); -yaml::bolt::BinaryBasicBlockProfile &YamlBB = YamlBF.Blocks[BlockIndex]; -const uint32_t Offset = FromOffset - BlockOffset; -for (const auto &[CallToLoc, CallToIdx] : CallTo) - YamlBB.CallSites.emplace_back( - getCallSiteInfo(CallToLoc, CallToIdx, Offset)); -llvm::sort(YamlBB.CallSites, [](yaml::bolt::CallSiteInfo &A, -yaml::bolt::CallSiteInfo &B) { - return A.Offset < B.Offset; -}); + for (const llvm::bolt::BranchInfo &BI : Branches.Data) { maksfb wrote: nit: no need for `llvm::bolt::`. https://github.com/llvm/llvm-project/pull/91289 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Ignore hot markers as function references in updateELFSymbolTable (PR #92713)
@@ -4788,13 +4788,25 @@ void RewriteInstance::updateELFSymbolTable( if (!IsDynSym && shouldStrip(Symbol)) continue; +Expected SymbolName = Symbol.getName(StringSection); maksfb wrote: Can we move the code that checks for special symbols here and skip function matching for them all together? Otherwise we are checking for the markers in several locations unnecessarily. https://github.com/llvm/llvm-project/pull/92713 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Ignore special symbols as function aliases in updateELFSymbolTable (PR #92713)
https://github.com/maksfb approved this pull request. LGTM. Thanks. https://github.com/llvm/llvm-project/pull/92713 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [BOLT] Support perf2bolt-N in the driver (PR #111072)
https://github.com/maksfb approved this pull request. LGTM. Please add the description of the problem this PR fixes and link any related issue(s). https://github.com/llvm/llvm-project/pull/111072 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [BOLT][NFC] Speedup BAT::writeMaps (PR #112061)
https://github.com/maksfb approved this pull request. LGTM. I'd mark this PR as "NFCI". https://github.com/llvm/llvm-project/pull/112061 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Allow builtin_unreachable to be at MaxSize address (PR #111771)
@@ -1365,8 +1365,9 @@ Error BinaryFunction::disassemble() { if (containsAddress(TargetAddress)) { TargetSymbol = getOrCreateLocalLabel(TargetAddress); } else { -if (TargetAddress == getAddress() + getSize() && -TargetAddress < getAddress() + getMaxSize() && +if (BC.isELF() && !BC.getBinaryDataAtAddress(TargetAddress) && maksfb wrote: What happens when there's a data object at `TargetAddress`? https://github.com/llvm/llvm-project/pull/111771 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [CMake] Preserve clang.pre-bolt (PR #109351)
https://github.com/maksfb approved this pull request. Thanks! PS. Let's drop ICF from the flags. https://github.com/llvm/llvm-project/pull/109351 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/20.x: [BOLT, test] Link against a shared object to test PLT (#125625) (PR #126351)
https://github.com/maksfb approved this pull request. https://github.com/llvm/llvm-project/pull/126351 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Fix counts aggregation in merge-fdata (PR #119652)
https://github.com/maksfb approved this pull request. LGTM https://github.com/llvm/llvm-project/pull/119652 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Skip out-of-range pending relocations (PR #116964)
@@ -1689,6 +1689,8 @@ bool BinaryFunction::scanExternalRefs() { // Create relocation for every fixup. for (const MCFixup &Fixup : Fixups) { std::optional Rel = BC.MIB->createRelocation(Fixup, *BC.MAB); + // Can be skipped under the right circumstances. maksfb wrote: Maybe we can say here "can be skipped in case of the overflow"? https://github.com/llvm/llvm-project/pull/116964 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Skip out-of-range pending relocations (PR #116964)
@@ -174,11 +175,31 @@ void BinarySection::flushPendingRelocations(raw_pwrite_stream &OS, OS.pwrite(Patch.Bytes.data(), Patch.Bytes.size(), SectionFileOffset + Patch.Offset); + uint64_t SkippedPendingRelocations = 0; for (Relocation &Reloc : PendingRelocations) { uint64_t Value = Reloc.Addend; if (Reloc.Symbol) Value += Resolver(Reloc.Symbol); +// Safely skip any optional pending relocation that cannot be encoded. +if (Reloc.isOptional() && +!Relocation::canEncodeValue(Reloc.Type, Value, +SectionAddress + Reloc.Offset)) { + + // A successful run of 'scanExternalRefs' means that all pending + // relocations are flushed. Otherwise, PatchEntries should run. + if (!opts::ForcePatch) { +BC.errs() +<< "BOLT-ERROR: Cannot fully run scanExternalRefs as pending " + "relocation for symbol " maksfb wrote: Let's rephrase the message in a more user-oriented way, i.e. drop `scanExternalRefs()`. https://github.com/llvm/llvm-project/pull/116964 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Skip out-of-range pending relocations (PR #116964)
https://github.com/maksfb commented: Looks good! Minor nits. https://github.com/llvm/llvm-project/pull/116964 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Skip out-of-range pending relocations (PR #116964)
https://github.com/maksfb edited https://github.com/llvm/llvm-project/pull/116964 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Make DataflowAnalysis::getStateBefore() const (NFC) (PR #133308)
https://github.com/maksfb approved this pull request. LGTM https://github.com/llvm/llvm-project/pull/133308 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT][NFC] Define AArch64 jump table types (PR #132109)
https://github.com/maksfb commented: I expect that eventually we will get RISC-V support for jump tables. In this context, I would prefer to keep the format of the jump table itself separate from the underlying architecture. I.e. we can use JTs with an absolute 64-bit addressing on any platform with a proper compiler support. Having one type, e.g. `JTT_ABS64`, sounds more convenient. And we can always lookup the target architecture if needed, but again it should be orthogonal to the format itself. What could be different is endianness, but we can handle it during serialization. Also, in most places we use the number of bits for naming "things". Let's stick to this scheme. https://github.com/llvm/llvm-project/pull/132109 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Build heatmap with pre-aggregated data (PR #138798)
https://github.com/maksfb approved this pull request. LGTM https://github.com/llvm/llvm-project/pull/138798 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [BOLT] Drop perf2bolt cold samples diagnostic (PR #139337)
https://github.com/maksfb approved this pull request. https://github.com/llvm/llvm-project/pull/139337 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [BOLT] Print heatmap from perf2bolt (PR #139194)
https://github.com/maksfb approved this pull request. https://github.com/llvm/llvm-project/pull/139194 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT][heatmap] Compute section utilization and partition score (PR #139193)
https://github.com/maksfb approved this pull request. The code LGTM. For practical applications, we sometimes use 3-way function splitting resulting in code being broken into more than two partitions. In such case, the most interesting metrics and score should be attached to non-cold partitions. With "hot text" enabled, the corresponding address range of such partition could be identified by [__hot_start, __hot_end). https://github.com/llvm/llvm-project/pull/139193 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Factor out MCInstReference from gadget scanner (NFC) (PR #138655)
https://github.com/maksfb approved this pull request. As an NFC this change looks good to me. I've left a comments for a follow-up. https://github.com/llvm/llvm-project/pull/138655 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Factor out MCInstReference from gadget scanner (NFC) (PR #138655)
@@ -1682,48 +1648,66 @@ void Analysis::runOnFunction(BinaryFunction &BF, } } +// Compute the instruction address for printing (may be slow). +static uint64_t getAddress(const MCInstReference &Inst) { + const BinaryFunction *BF = Inst.getFunction(); + + if (Inst.hasCFG()) { +const BinaryBasicBlock *BB = Inst.getBasicBlock(); + +auto It = static_cast(&Inst.getMCInst()); +unsigned IndexInBB = std::distance(BB->begin(), It); + +// FIXME: this assumes all instructions are 4 bytes in size. This is true maksfb wrote: We have `BinaryContext::computeCodeSize()`. https://github.com/llvm/llvm-project/pull/138655 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Factor out MCInstReference from gadget scanner (NFC) (PR #138655)
@@ -0,0 +1,168 @@ +//===- bolt/Core/MCInstUtils.h --*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// + +#ifndef BOLT_CORE_MCINSTUTILS_H +#define BOLT_CORE_MCINSTUTILS_H + +#include "bolt/Core/BinaryBasicBlock.h" + +#include +#include +#include + +namespace llvm { +namespace bolt { + +class BinaryFunction; + +/// MCInstReference represents a reference to a constant MCInst as stored either +/// in a BinaryFunction (i.e. before a CFG is created), or in a BinaryBasicBlock +/// (after a CFG is created). +class MCInstReference { + using nocfg_const_iterator = std::map::const_iterator; + + // Two cases are possible: + // * functions with CFG reconstructed - a function stores a collection of + // basic blocks, each basic block stores a contiguous vector of MCInst + // * functions without CFG - there are no basic blocks created, + // the instructions are directly stored in std::map in BinaryFunction + // + // In both cases, the direct parent of MCInst is stored together with an + // iterator pointing to the instruction. + + // Helper struct: CFG is available, the direct parent is a basic block, + // iterator's type is `MCInst *`. + struct RefInBB { +RefInBB(const BinaryBasicBlock *BB, const MCInst *Inst) +: BB(BB), It(Inst) {} +RefInBB(const RefInBB &Other) = default; +RefInBB &operator=(const RefInBB &Other) = default; + +const BinaryBasicBlock *BB; +BinaryBasicBlock::const_iterator It; + +bool operator<(const RefInBB &Other) const { + return std::tie(BB, It) < std::tie(Other.BB, Other.It); +} + +bool operator==(const RefInBB &Other) const { + return BB == Other.BB && It == Other.It; +} + }; + + // Helper struct: CFG is *not* available, the direct parent is a function, + // iterator's type is std::map::iterator (the mapped value + // is an instruction's offset). + struct RefInBF { +RefInBF(const BinaryFunction *BF, nocfg_const_iterator It) +: BF(BF), It(It) {} +RefInBF(const RefInBF &Other) = default; +RefInBF &operator=(const RefInBF &Other) = default; + +const BinaryFunction *BF; +nocfg_const_iterator It; + +bool operator<(const RefInBF &Other) const { maksfb wrote: Similar concern regarding the `BinaryFunction *` order. https://github.com/llvm/llvm-project/pull/138655 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Factor out MCInstReference from gadget scanner (NFC) (PR #138655)
@@ -0,0 +1,168 @@ +//===- bolt/Core/MCInstUtils.h --*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// + +#ifndef BOLT_CORE_MCINSTUTILS_H +#define BOLT_CORE_MCINSTUTILS_H + +#include "bolt/Core/BinaryBasicBlock.h" + +#include +#include +#include + +namespace llvm { +namespace bolt { + +class BinaryFunction; + +/// MCInstReference represents a reference to a constant MCInst as stored either +/// in a BinaryFunction (i.e. before a CFG is created), or in a BinaryBasicBlock +/// (after a CFG is created). +class MCInstReference { + using nocfg_const_iterator = std::map::const_iterator; + + // Two cases are possible: + // * functions with CFG reconstructed - a function stores a collection of + // basic blocks, each basic block stores a contiguous vector of MCInst + // * functions without CFG - there are no basic blocks created, + // the instructions are directly stored in std::map in BinaryFunction + // + // In both cases, the direct parent of MCInst is stored together with an + // iterator pointing to the instruction. + + // Helper struct: CFG is available, the direct parent is a basic block, + // iterator's type is `MCInst *`. + struct RefInBB { +RefInBB(const BinaryBasicBlock *BB, const MCInst *Inst) +: BB(BB), It(Inst) {} +RefInBB(const RefInBB &Other) = default; +RefInBB &operator=(const RefInBB &Other) = default; + +const BinaryBasicBlock *BB; +BinaryBasicBlock::const_iterator It; + +bool operator<(const RefInBB &Other) const { maksfb wrote: What are expected uses for this comparison? I'm concerned about non-deterministic order of `BinaryBasicBlock *`. https://github.com/llvm/llvm-project/pull/138655 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Factor out MCInstReference from gadget scanner (NFC) (PR #138655)
https://github.com/maksfb edited https://github.com/llvm/llvm-project/pull/138655 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Factor out MCInstReference from gadget scanner (NFC) (PR #138655)
@@ -0,0 +1,168 @@ +//===- bolt/Core/MCInstUtils.h --*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// + +#ifndef BOLT_CORE_MCINSTUTILS_H +#define BOLT_CORE_MCINSTUTILS_H + +#include "bolt/Core/BinaryBasicBlock.h" + maksfb wrote: nit: drop empty line. https://github.com/llvm/llvm-project/pull/138655 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Factor out MCInstReference from gadget scanner (NFC) (PR #138655)
@@ -0,0 +1,57 @@ +//===- bolt/Passes/MCInstUtils.cpp ===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// + +#include "bolt/Core/MCInstUtils.h" + maksfb wrote: nit: empty line. https://github.com/llvm/llvm-project/pull/138655 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [BOLT] Support pre-aggregated basic sample profile (PR #140196)
@@ -370,33 +370,46 @@ class DataAggregator : public DataReader { /// memory. /// /// File format syntax: - /// {B|F|f|T} [:] [:] [] - ///[] + /// E maksfb wrote: Can you specify multiple events per file? https://github.com/llvm/llvm-project/pull/140196 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [BOLT] Support pre-aggregated basic sample profile (PR #140196)
https://github.com/maksfb approved this pull request. https://github.com/llvm/llvm-project/pull/140196 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [BOLT] Zero initialize pre-aggregated counters (PR #142698)
https://github.com/maksfb approved this pull request. https://github.com/llvm/llvm-project/pull/142698 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT][NFCI] Skip validation in parseLBRSample (PR #143288)
https://github.com/maksfb approved this pull request. https://github.com/llvm/llvm-project/pull/143288 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [BOLT] Sort EntryData (PR #143308)
https://github.com/maksfb approved this pull request. https://github.com/llvm/llvm-project/pull/143308 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [BOLT][NFC] Move LBREntry from DataReader to DataAggregator (PR #143287)
https://github.com/maksfb approved this pull request. https://github.com/llvm/llvm-project/pull/143287 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits