llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-bolt Author: Amir Ayupov (aaupov) <details> <summary>Changes</summary> Indirect Call Promotion used to consider individual symbols in the call profile. However, with ICF enabled, some symbols might get folded into one function. Indirect Call Promotion should accumulate their counts to check against the threshold and generate checks accordingly, one for unique function. Test Plan: added bolt/test/X86/icf-aware-icp.s --- Full diff: https://github.com/llvm/llvm-project/pull/120493.diff 3 Files Affected: - (modified) bolt/include/bolt/Passes/IndirectCallPromotion.h (+11) - (modified) bolt/lib/Passes/IndirectCallPromotion.cpp (+16-2) - (added) bolt/test/X86/icf-aware-icp.s (+56) ``````````diff diff --git a/bolt/include/bolt/Passes/IndirectCallPromotion.h b/bolt/include/bolt/Passes/IndirectCallPromotion.h index 8ec160b867cf8ce..6f5f3532fc82bc5 100644 --- a/bolt/include/bolt/Passes/IndirectCallPromotion.h +++ b/bolt/include/bolt/Passes/IndirectCallPromotion.h @@ -108,6 +108,9 @@ class IndirectCallPromotion : public BinaryFunctionPass { Location() {} explicit Location(MCSymbol *Sym) : Sym(Sym) {} explicit Location(uint64_t Addr) : Addr(Addr) {} + bool operator==(const Location &O) { + return Sym == O.Sym || Addr == O.Addr; + } }; struct Callsite { @@ -123,6 +126,14 @@ class IndirectCallPromotion : public BinaryFunctionPass { uint64_t Branches, uint64_t JTIndex) : From(From), To(To), Mispreds(Mispreds), Branches(Branches), JTIndices(1, JTIndex) {} + // Increment the current Callsite counts with another, used for merging + // targets. + Callsite &operator+=(const Callsite &O) { + assert(From == O.From); + Mispreds += O.Mispreds; + Branches += O.Branches; + return *this; + } }; std::unordered_set<const BinaryFunction *> Modified; diff --git a/bolt/lib/Passes/IndirectCallPromotion.cpp b/bolt/lib/Passes/IndirectCallPromotion.cpp index 2b5a591f4c7a22f..aad11adb9697294 100644 --- a/bolt/lib/Passes/IndirectCallPromotion.cpp +++ b/bolt/lib/Passes/IndirectCallPromotion.cpp @@ -317,11 +317,25 @@ IndirectCallPromotion::getCallTargets(BinaryBasicBlock &BB, const auto ICSP = BC.MIB->tryGetAnnotationAs<IndirectCallSiteProfile>( Inst, "CallProfile"); if (ICSP) { + // Deduplicate aliases by using function + entry id as a key type. + using FuncEntryTy = std::pair<const BinaryFunction *, uint64_t>; + std::map<FuncEntryTy, Callsite> FuncToCallsite; for (const IndirectCallProfile &CSP : ICSP.get()) { Callsite Site(BF, CSP); - if (Site.isValid()) - Targets.emplace_back(std::move(Site)); + if (!Site.isValid()) + continue; + + uint64_t EntryDesc = 0; + const BinaryFunction *Func = + BC.getFunctionForSymbol(CSP.Symbol, &EntryDesc); + + auto [It, Success] = + FuncToCallsite.try_emplace(std::make_pair(Func, EntryDesc), Site); + if (!Success) + It->second += Site; } + for (Callsite Site : llvm::make_second_range(FuncToCallsite)) + Targets.emplace_back(std::move(Site)); } } diff --git a/bolt/test/X86/icf-aware-icp.s b/bolt/test/X86/icf-aware-icp.s new file mode 100644 index 000000000000000..ca4dba58806c3cb --- /dev/null +++ b/bolt/test/X86/icf-aware-icp.s @@ -0,0 +1,56 @@ +## Check that ICP recognizes functions folded by ICF and inserts a single check + +# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown %s -o %t.o +# RUN: link_fdata %s %t.o %t.fdata +# RUN: llvm-strip --strip-unneeded %t.o +# RUN: ld.lld -q -o %t %t.o + +# Without ICF, ICP should not be performed: +# RUN: llvm-bolt %t -o %t.bolt1 --icp=calls --icp-calls-topn=1 --print-icp \ +# RUN: --icp-calls-total-percent-threshold=90 \ +# RUN: --data %t.fdata | FileCheck %s --check-prefix=CHECK-NO-ICF + +# CHECK-NO-ICF: ICP percentage of indirect callsites that are optimized = 0.0% + +# With ICF, ICP should be performed: +# RUN: llvm-bolt %t -o %t.bolt1 --icp=calls --icp-calls-topn=1 --print-icp \ +# RUN: --icp-calls-total-percent-threshold=90 \ +# RUN: --data %t.fdata --icf | FileCheck %s --check-prefix=CHECK-ICF + +# CHECK-ICF: ICP percentage of indirect callsites that are optimized = 100.0% +# CHECK-ICF: Binary Function "main" after indirect-call-promotion +# CHECK-ICF: callq bar + + .globl bar +bar: + imull $0x64, %edi, %eax + addl $0x2a, %eax + retq +.size bar, .-bar + + .globl foo +foo: + imull $0x64, %edi, %eax + addl $0x2a, %eax + retq +.size foo, .-foo + + .globl main +main: + pushq %rax + movslq %edi, %rax + leaq funcs(%rip), %rcx + xorl %edi, %edi +LBB00_br: + callq *(%rcx,%rax,8) +# FDATA: 1 main #LBB00_br# 1 foo 0 0 2 +# FDATA: 1 main #LBB00_br# 1 bar 0 0 2 + popq %rcx + retq +.size main, .-main + + .section .rodata + .globl funcs +funcs: + .quad foo + .quad bar `````````` </details> https://github.com/llvm/llvm-project/pull/120493 _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits