https://github.com/pcc updated https://github.com/llvm/llvm-project/pull/138366
>From 03060849dc81f83ec48f05995ac8fd6df846c25b Mon Sep 17 00:00:00 2001 From: Peter Collingbourne <pe...@pcc.me.uk> Date: Fri, 2 May 2025 16:57:28 -0700 Subject: [PATCH 1/5] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20in?= =?UTF-8?q?itial=20version?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Created using spr 1.3.6-beta.1 --- lld/ELF/Arch/AArch64.cpp | 58 +++++++++++++++++ lld/ELF/Arch/TargetImpl.h | 87 +++++++++++++++++++++++++ lld/ELF/Arch/X86_64.cpp | 54 +++++++++++++++ lld/ELF/Config.h | 1 + lld/ELF/Driver.cpp | 2 + lld/ELF/Options.td | 4 ++ lld/ELF/Relocations.cpp | 8 ++- lld/ELF/Target.h | 1 + lld/docs/ld.lld.1 | 8 ++- lld/test/ELF/aarch64-branch-to-branch.s | 58 +++++++++++++++++ lld/test/ELF/x86-64-branch-to-branch.s | 58 +++++++++++++++++ 11 files changed, 335 insertions(+), 4 deletions(-) create mode 100644 lld/ELF/Arch/TargetImpl.h create mode 100644 lld/test/ELF/aarch64-branch-to-branch.s create mode 100644 lld/test/ELF/x86-64-branch-to-branch.s diff --git a/lld/ELF/Arch/AArch64.cpp b/lld/ELF/Arch/AArch64.cpp index 9538dd4a70bae..f3a24bd8a9184 100644 --- a/lld/ELF/Arch/AArch64.cpp +++ b/lld/ELF/Arch/AArch64.cpp @@ -11,6 +11,7 @@ #include "Symbols.h" #include "SyntheticSections.h" #include "Target.h" +#include "TargetImpl.h" #include "lld/Common/ErrorHandler.h" #include "llvm/BinaryFormat/ELF.h" #include "llvm/Support/Endian.h" @@ -83,6 +84,7 @@ class AArch64 : public TargetInfo { uint64_t val) const override; RelExpr adjustTlsExpr(RelType type, RelExpr expr) const override; void relocateAlloc(InputSectionBase &sec, uint8_t *buf) const override; + void applyBranchToBranchOpt() const override; private: void relaxTlsGdToLe(uint8_t *loc, const Relocation &rel, uint64_t val) const; @@ -975,6 +977,62 @@ void AArch64::relocateAlloc(InputSectionBase &sec, uint8_t *buf) const { } } +static std::optional<uint64_t> getControlTransferAddend(InputSection &is, + Relocation &r) { + // Identify a control transfer relocation for the branch-to-branch + // optimization. A "control transfer relocation" means a B or BL + // target but it also includes relative vtable relocations for example. + // + // We require the relocation type to be JUMP26, CALL26 or PLT32. With a + // relocation type of PLT32 the value may be assumed to be used for branching + // directly to the symbol and the addend is only used to produce the relocated + // value (hence the effective addend is always 0). This is because if a PLT is + // needed the addend will be added to the address of the PLT, and it doesn't + // make sense to branch into the middle of a PLT. For example, relative vtable + // relocations use PLT32 and 0 or a positive value as the addend but still are + // used to branch to the symbol. + // + // With JUMP26 or CALL26 the only reasonable interpretation of a non-zero + // addend is that we are branching to symbol+addend so that becomes the + // effective addend. + if (r.type == R_AARCH64_PLT32) + return 0; + if (r.type == R_AARCH64_JUMP26 || r.type == R_AARCH64_CALL26) + return r.addend; + return std::nullopt; +} + +static std::pair<Relocation *, uint64_t> getBranchInfo(InputSection &is, + uint64_t offset) { + auto *i = std::lower_bound( + is.relocations.begin(), is.relocations.end(), offset, + [](Relocation &r, uint64_t offset) { return r.offset < offset; }); + if (i != is.relocations.end() && i->offset == offset && + i->type == R_AARCH64_JUMP26) { + return {i, i->addend}; + } + return {nullptr, 0}; +} + +static void mergeControlTransferRelocations(Relocation &r1, + const Relocation &r2) { + r1.expr = r2.expr; + r1.sym = r2.sym; + // With PLT32 we must respect the original addend as that affects the value's + // interpretation. With the other relocation types the original addend is + // irrelevant because it referred to an offset within the original target + // section so we overwrite it. + if (r1.type == R_AARCH64_PLT32) + r1.addend += r2.addend; + else + r1.addend = r2.addend; +} + +void AArch64::applyBranchToBranchOpt() const { + applyBranchToBranchOptImpl(ctx, getBranchInfo, getControlTransferAddend, + mergeControlTransferRelocations); +} + // AArch64 may use security features in variant PLT sequences. These are: // Pointer Authentication (PAC), introduced in armv8.3-a and Branch Target // Indicator (BTI) introduced in armv8.5-a. The additional instructions used diff --git a/lld/ELF/Arch/TargetImpl.h b/lld/ELF/Arch/TargetImpl.h new file mode 100644 index 0000000000000..bb10749516953 --- /dev/null +++ b/lld/ELF/Arch/TargetImpl.h @@ -0,0 +1,87 @@ +//===- TargetImpl.h ---------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_ELF_ARCH_TARGETIMPL_H +#define LLD_ELF_ARCH_TARGETIMPL_H + +#include "InputFiles.h" +#include "InputSection.h" +#include "Relocations.h" +#include "Symbols.h" +#include "llvm/BinaryFormat/ELF.h" + +namespace lld { +namespace elf { + +// getControlTransferAddend: If this relocation is used for control transfer +// instructions (e.g. branch, branch-link or call) or code references (e.g. +// virtual function pointers) and indicates an address-insignificant reference, +// return the effective addend for the relocation, otherwise return +// std::nullopt. The effective addend for a relocation is the addend that is +// used to determine its branch destination. +// +// getBranchInfo: If a control transfer relocation referring to is+offset +// directly transfers control to a relocated branch instruction in the specified +// section, return the relocation for the branch target as well as its effective +// addend (see above). Otherwise return {nullptr, 0}. +// +// mergeControlTransferRelocations: Given r1, a relocation for which +// getControlTransferAddend() returned a value, and r2, a relocation returned by +// getBranchInfo(), modify r1 so that it branches directly to the target of r2. +template <typename GetBranchInfo, typename GetControlTransferAddend, + typename MergeControlTransferRelocations> +inline void applyBranchToBranchOptImpl( + Ctx &ctx, GetBranchInfo getBranchInfo, + GetControlTransferAddend getControlTransferAddend, + MergeControlTransferRelocations mergeControlTransferRelocations) { + // Needs to run serially because it writes to the relocations array as well as + // reading relocations of other sections. + for (ELFFileBase *f : ctx.objectFiles) { + auto getRelocBranchInfo = + [&ctx, &getBranchInfo](Relocation &r, + uint64_t addend) -> std::pair<Relocation *, uint64_t> { + auto *target = dyn_cast_or_null<Defined>(r.sym); + // We don't allow preemptible symbols (may go somewhere else), + // absolute symbols (runtime behavior unknown), non-executable memory + // (ditto) or non-regular sections (no section data). + if (!target || target->isPreemptible || !target->section || + !(target->section->flags & llvm::ELF::SHF_EXECINSTR) || + target->section->kind() != SectionBase::Regular) + return {nullptr, 0}; + return getBranchInfo(*cast<InputSection>(target->section), + target->value + addend); + }; + for (InputSectionBase *s : f->getSections()) { + if (!s) + continue; + for (Relocation &r : s->relocations) { + if (std::optional<uint64_t> addend = + getControlTransferAddend(*cast<InputSection>(s), + r)) { + std::pair<Relocation *, uint64_t> targetAndAddend = + getRelocBranchInfo(r, *addend); + if (targetAndAddend.first) { + while (1) { + std::pair<Relocation *, uint64_t> nextTargetAndAddend = + getRelocBranchInfo(*targetAndAddend.first, targetAndAddend.second); + if (!nextTargetAndAddend.first) + break; + targetAndAddend = nextTargetAndAddend; + } + mergeControlTransferRelocations(r, *targetAndAddend.first); + } + } + } + } + } +} + +} // namespace elf +} // namespace lld + +#endif diff --git a/lld/ELF/Arch/X86_64.cpp b/lld/ELF/Arch/X86_64.cpp index 0c4fd00cab65c..0a4578b0aca4b 100644 --- a/lld/ELF/Arch/X86_64.cpp +++ b/lld/ELF/Arch/X86_64.cpp @@ -11,6 +11,7 @@ #include "Symbols.h" #include "SyntheticSections.h" #include "Target.h" +#include "TargetImpl.h" #include "lld/Common/ErrorHandler.h" #include "llvm/BinaryFormat/ELF.h" #include "llvm/Support/Endian.h" @@ -50,6 +51,7 @@ class X86_64 : public TargetInfo { bool deleteFallThruJmpInsn(InputSection &is, InputFile *file, InputSection *nextIS) const override; bool relaxOnce(int pass) const override; + void applyBranchToBranchOpt() const override; private: void relaxTlsGdToLe(uint8_t *loc, const Relocation &rel, uint64_t val) const; @@ -1162,6 +1164,58 @@ void X86_64::relocateAlloc(InputSectionBase &sec, uint8_t *buf) const { } } +static std::optional<uint64_t> getControlTransferAddend(InputSection &is, + Relocation &r) { + // Identify a control transfer relocation for the branch-to-branch + // optimization. A "control transfer relocation" usually means a CALL or JMP + // target but it also includes relative vtable relocations for example. + // + // We require the relocation type to be PLT32. With a relocation type of PLT32 + // the value may be assumed to be used for branching directly to the symbol + // and the addend is only used to produce the relocated value (hence the + // effective addend is always 0). This is because if a PLT is needed the + // addend will be added to the address of the PLT, and it doesn't make sense + // to branch into the middle of a PLT. For example, relative vtable + // relocations use PLT32 and 0 or a positive value as the addend but still are + // used to branch to the symbol. + if (r.type == R_X86_64_PLT32) + return 0; + return std::nullopt; +} + +static std::pair<Relocation *, uint64_t> getBranchInfo(InputSection &is, + uint64_t offset) { + auto content = is.contentMaybeDecompress(); + if (content.size() > offset && content[offset] == 0xe9) { // JMP immediate + auto *i = std::lower_bound( + is.relocations.begin(), is.relocations.end(), offset + 1, + [](Relocation &r, uint64_t offset) { return r.offset < offset; }); + // Unlike with getControlTransferAddend() it is valid to accept a PC32 + // relocation here because we know that this is actually a JMP and not some + // other reference, so the interpretation is that we add 4 to the addend and + // use that as the effective addend. + if (i != is.relocations.end() && i->offset == offset + 1 && + (i->type == R_X86_64_PC32 || i->type == R_X86_64_PLT32)) { + return {i, i->addend + 4}; + } + } + return {nullptr, 0}; +} + +static void mergeControlTransferRelocations(Relocation &r1, + const Relocation &r2) { + r1.expr = r2.expr; + r1.sym = r2.sym; + // The +4 is here to compensate for r2.addend which will likely be -4, + // but may also be addend-4 in case of a PC32 branch to symbol+addend. + r1.addend += r2.addend + 4; +} + +void X86_64::applyBranchToBranchOpt() const { + applyBranchToBranchOptImpl(ctx, getBranchInfo, getControlTransferAddend, + mergeControlTransferRelocations); +} + // If Intel Indirect Branch Tracking is enabled, we have to emit special PLT // entries containing endbr64 instructions. A PLT entry will be split into two // parts, one in .plt.sec (writePlt), and the other in .plt (writeIBTPlt). diff --git a/lld/ELF/Config.h b/lld/ELF/Config.h index f0e9592d85dd6..b7449b9d13cf5 100644 --- a/lld/ELF/Config.h +++ b/lld/ELF/Config.h @@ -276,6 +276,7 @@ struct Config { bool bpFunctionOrderForCompression = false; bool bpDataOrderForCompression = false; bool bpVerboseSectionOrderer = false; + bool branchToBranch = false; bool checkSections; bool checkDynamicRelocs; std::optional<llvm::DebugCompressionType> compressDebugSections; diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp index 9d36071e1532f..e79372957e408 100644 --- a/lld/ELF/Driver.cpp +++ b/lld/ELF/Driver.cpp @@ -1589,6 +1589,8 @@ static void readConfigs(Ctx &ctx, opt::InputArgList &args) { ctx.arg.zWxneeded = hasZOption(args, "wxneeded"); setUnresolvedSymbolPolicy(ctx, args); ctx.arg.power10Stubs = args.getLastArgValue(OPT_power10_stubs_eq) != "no"; + ctx.arg.branchToBranch = args.hasFlag( + OPT_branch_to_branch, OPT_no_branch_to_branch, ctx.arg.optimize >= 2); if (opt::Arg *arg = args.getLastArg(OPT_eb, OPT_el)) { if (arg->getOption().matches(OPT_eb)) diff --git a/lld/ELF/Options.td b/lld/ELF/Options.td index 76d28096f82c8..40fc0d2c8c64e 100644 --- a/lld/ELF/Options.td +++ b/lld/ELF/Options.td @@ -59,6 +59,10 @@ def build_id: J<"build-id=">, HelpText<"Generate build ID note">, MetaVarName<"[fast,md5,sha1,uuid,0x<hexstring>]">; def : F<"build-id">, Alias<build_id>, AliasArgs<["sha1"]>, HelpText<"Alias for --build-id=sha1">; +defm branch_to_branch: B<"branch-to-branch", + "Enable branch-to-branch optimization (default at -O2)", + "Disable branch-to-branch optimization (default at -O0 and -O1)">; + defm check_sections: B<"check-sections", "Check section addresses for overlaps (default)", "Do not check section addresses for overlaps">; diff --git a/lld/ELF/Relocations.cpp b/lld/ELF/Relocations.cpp index 277acb26987bc..457fd19da5493 100644 --- a/lld/ELF/Relocations.cpp +++ b/lld/ELF/Relocations.cpp @@ -1671,9 +1671,10 @@ void RelocationScanner::scan(Relocs<RelTy> rels) { } // Sort relocations by offset for more efficient searching for - // R_RISCV_PCREL_HI20 and R_PPC64_ADDR64. + // R_RISCV_PCREL_HI20, R_PPC64_ADDR64 and the branch-to-branch optimization. if (ctx.arg.emachine == EM_RISCV || - (ctx.arg.emachine == EM_PPC64 && sec->name == ".toc")) + (ctx.arg.emachine == EM_PPC64 && sec->name == ".toc") || + ctx.arg.branchToBranch) llvm::stable_sort(sec->relocs(), [](const Relocation &lhs, const Relocation &rhs) { return lhs.offset < rhs.offset; @@ -1964,6 +1965,9 @@ void elf::postScanRelocations(Ctx &ctx) { for (ELFFileBase *file : ctx.objectFiles) for (Symbol *sym : file->getLocalSymbols()) fn(*sym); + + if (ctx.arg.branchToBranch) + ctx.target->applyBranchToBranchOpt(); } static bool mergeCmp(const InputSection *a, const InputSection *b) { diff --git a/lld/ELF/Target.h b/lld/ELF/Target.h index fd1e5d33c438a..6dd20b2f0cbaa 100644 --- a/lld/ELF/Target.h +++ b/lld/ELF/Target.h @@ -101,6 +101,7 @@ class TargetInfo { virtual void applyJumpInstrMod(uint8_t *loc, JumpModType type, JumpModType val) const {} + virtual void applyBranchToBranchOpt() const {} virtual ~TargetInfo(); diff --git a/lld/docs/ld.lld.1 b/lld/docs/ld.lld.1 index 7b2650637cb10..d7b987ded784d 100644 --- a/lld/docs/ld.lld.1 +++ b/lld/docs/ld.lld.1 @@ -93,6 +93,10 @@ Bind default visibility defined STB_GLOBAL function symbols locally for .Fl shared. .It Fl --be8 Write a Big Endian ELF File using BE8 format(AArch32 only) +.It Fl -branch-to-branch +Enable the branch-to-branch optimizations: a branch whose target is +another branch instruction is rewritten to point to the latter branch +target (AArch64 and X86_64 only). Enabled by default at -O2. .It Fl -build-id Ns = Ns Ar value Generate a build ID note. .Ar value @@ -414,7 +418,7 @@ If not specified, .Dv a.out is used as a default. .It Fl O Ns Ar value -Optimize output file size. +Optimize output file. .Ar value may be: .Pp @@ -424,7 +428,7 @@ Disable string merging. .It Cm 1 Enable string merging. .It Cm 2 -Enable string tail merging. +Enable string tail merging and branch-to-branch optimization. .El .Pp .Fl O Ns Cm 1 diff --git a/lld/test/ELF/aarch64-branch-to-branch.s b/lld/test/ELF/aarch64-branch-to-branch.s new file mode 100644 index 0000000000000..3a3ae04ac0538 --- /dev/null +++ b/lld/test/ELF/aarch64-branch-to-branch.s @@ -0,0 +1,58 @@ +# REQUIRES: aarch64 + +## Test that the branch-to-branch optimization follows the links +## from f1 -> f2 -> f3 and updates all references to point to f3. + +# RUN: llvm-mc -filetype=obj -triple=aarch64-pc-linux %s -o %t.o +# RUN: ld.lld %t.o -o %t --branch-to-branch +# RUN: llvm-objdump -d -s %t | FileCheck --check-prefix=B2B %s +# RUN: ld.lld %t.o -o %t -O2 +# RUN: llvm-objdump -d -s %t | FileCheck --check-prefix=B2B %s + +## Test that branch-to-branch is disabled by default. + +# RUN: ld.lld %t.o -o %t +# RUN: llvm-objdump -d -s %t | FileCheck --check-prefix=NOB2B %s +# RUN: ld.lld %t.o -o %t -O2 --no-branch-to-branch +# RUN: llvm-objdump -d -s %t | FileCheck --check-prefix=NOB2B %s + +## Test that branch-to-branch is disabled for preemptible symbols. + +# RUN: ld.lld %t.o -o %t --branch-to-branch -shared +# RUN: llvm-objdump -d -s %t | FileCheck --check-prefix=NOB2B %s + +.section .rodata.vtable,"a" +.globl vtable +vtable: +# B2B: Contents of section .rodata: +# B2B-NEXT: [[VF:[0-9a-f]{8}]] +.4byte f1@PLT - vtable +# B2B-SAME: [[VF]] +.4byte f2@PLT - vtable +# B2B-SAME: [[VF]] +.4byte f3@PLT - vtable + +.section .text._start,"ax" +.globl _start +_start: +# B2B: bl {{.*}} <f3> +# NOB2B: bl {{.*}} <f1{{.*}}> +bl f1 +# B2B: b {{.*}} <f3> +# NOB2B: b {{.*}} <f2{{.*}}> +b f2 + +.section .text.f1,"ax" +.globl f1 +f1: +b f2 + +.section .text.f2,"ax" +.globl f2 +f2: +b f3 + +.section .text.f3,"ax" +.globl f3 +f3: +ret diff --git a/lld/test/ELF/x86-64-branch-to-branch.s b/lld/test/ELF/x86-64-branch-to-branch.s new file mode 100644 index 0000000000000..b9c9abe2eb752 --- /dev/null +++ b/lld/test/ELF/x86-64-branch-to-branch.s @@ -0,0 +1,58 @@ +# REQUIRES: x86 + +## Test that the branch-to-branch optimization follows the links +## from f1 -> f2 -> f3 and updates all references to point to f3. + +# RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %s -o %t.o +# RUN: ld.lld %t.o -o %t --branch-to-branch +# RUN: llvm-objdump -d -s %t | FileCheck --check-prefix=B2B %s +# RUN: ld.lld %t.o -o %t -O2 +# RUN: llvm-objdump -d -s %t | FileCheck --check-prefix=B2B %s + +## Test that branch-to-branch is disabled by default. + +# RUN: ld.lld %t.o -o %t +# RUN: llvm-objdump -d -s %t | FileCheck --check-prefix=NOB2B %s +# RUN: ld.lld %t.o -o %t -O2 --no-branch-to-branch +# RUN: llvm-objdump -d -s %t | FileCheck --check-prefix=NOB2B %s + +## Test that branch-to-branch is disabled for preemptible symbols. + +# RUN: ld.lld %t.o -o %t --branch-to-branch -shared +# RUN: llvm-objdump -d -s %t | FileCheck --check-prefix=NOB2B %s + +.section .rodata.vtable,"a" +.globl vtable +vtable: +# B2B: Contents of section .rodata: +# B2B-NEXT: [[VF:[0-9a-f]{8}]] +.4byte f1@PLT - vtable +# B2B-SAME: [[VF]] +.4byte f2@PLT - vtable +# B2B-SAME: [[VF]] +.4byte f3@PLT - vtable + +.section .text._start,"ax" +.globl _start +_start: +# B2B: jmp {{.*}} <f3> +# NOB2B: jmp {{.*}} <f1{{.*}}> +jmp f1 +# B2B: jmp {{.*}} <f3> +# NOB2B: jmp {{.*}} <f2{{.*}}> +jmp f2 + +.section .text.f1,"ax" +.globl f1 +f1: +jmp f2 + +.section .text.f2,"ax" +.globl f2 +f2: +jmp f3 + +.section .text.f3,"ax" +.globl f3 +f3: +ret >From bcebed6353f63c980cd4d05ba9726f88193deca4 Mon Sep 17 00:00:00 2001 From: Peter Collingbourne <pe...@pcc.me.uk> Date: Fri, 2 May 2025 17:06:37 -0700 Subject: [PATCH 2/5] Formatting, fix warning Created using spr 1.3.6-beta.1 --- lld/ELF/Arch/TargetImpl.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/lld/ELF/Arch/TargetImpl.h b/lld/ELF/Arch/TargetImpl.h index bb10749516953..757c0e2c0c51b 100644 --- a/lld/ELF/Arch/TargetImpl.h +++ b/lld/ELF/Arch/TargetImpl.h @@ -43,8 +43,8 @@ inline void applyBranchToBranchOptImpl( // reading relocations of other sections. for (ELFFileBase *f : ctx.objectFiles) { auto getRelocBranchInfo = - [&ctx, &getBranchInfo](Relocation &r, - uint64_t addend) -> std::pair<Relocation *, uint64_t> { + [&getBranchInfo](Relocation &r, + uint64_t addend) -> std::pair<Relocation *, uint64_t> { auto *target = dyn_cast_or_null<Defined>(r.sym); // We don't allow preemptible symbols (may go somewhere else), // absolute symbols (runtime behavior unknown), non-executable memory @@ -54,21 +54,21 @@ inline void applyBranchToBranchOptImpl( target->section->kind() != SectionBase::Regular) return {nullptr, 0}; return getBranchInfo(*cast<InputSection>(target->section), - target->value + addend); + target->value + addend); }; for (InputSectionBase *s : f->getSections()) { if (!s) continue; for (Relocation &r : s->relocations) { if (std::optional<uint64_t> addend = - getControlTransferAddend(*cast<InputSection>(s), - r)) { + getControlTransferAddend(*cast<InputSection>(s), r)) { std::pair<Relocation *, uint64_t> targetAndAddend = getRelocBranchInfo(r, *addend); if (targetAndAddend.first) { while (1) { std::pair<Relocation *, uint64_t> nextTargetAndAddend = - getRelocBranchInfo(*targetAndAddend.first, targetAndAddend.second); + getRelocBranchInfo(*targetAndAddend.first, + targetAndAddend.second); if (!nextTargetAndAddend.first) break; targetAndAddend = nextTargetAndAddend; >From 815550728987ac7338db486a11008bb8ed7a9eba Mon Sep 17 00:00:00 2001 From: Peter Collingbourne <pe...@pcc.me.uk> Date: Mon, 5 May 2025 15:02:20 -0700 Subject: [PATCH 3/5] Address comments Created using spr 1.3.6-beta.1 --- lld/ELF/Options.td | 2 +- lld/test/ELF/aarch64-branch-to-branch.s | 13 ++++++++----- lld/test/ELF/x86-64-branch-to-branch.s | 21 ++++++++++++--------- 3 files changed, 21 insertions(+), 15 deletions(-) diff --git a/lld/ELF/Options.td b/lld/ELF/Options.td index 40fc0d2c8c64e..2ce9e07dc6f2b 100644 --- a/lld/ELF/Options.td +++ b/lld/ELF/Options.td @@ -59,7 +59,7 @@ def build_id: J<"build-id=">, HelpText<"Generate build ID note">, MetaVarName<"[fast,md5,sha1,uuid,0x<hexstring>]">; def : F<"build-id">, Alias<build_id>, AliasArgs<["sha1"]>, HelpText<"Alias for --build-id=sha1">; -defm branch_to_branch: B<"branch-to-branch", +defm branch_to_branch: BB<"branch-to-branch", "Enable branch-to-branch optimization (default at -O2)", "Disable branch-to-branch optimization (default at -O0 and -O1)">; diff --git a/lld/test/ELF/aarch64-branch-to-branch.s b/lld/test/ELF/aarch64-branch-to-branch.s index 3a3ae04ac0538..06c899fd4e6b7 100644 --- a/lld/test/ELF/aarch64-branch-to-branch.s +++ b/lld/test/ELF/aarch64-branch-to-branch.s @@ -5,21 +5,21 @@ # RUN: llvm-mc -filetype=obj -triple=aarch64-pc-linux %s -o %t.o # RUN: ld.lld %t.o -o %t --branch-to-branch -# RUN: llvm-objdump -d -s %t | FileCheck --check-prefix=B2B %s +# RUN: llvm-objdump -d -s %t | FileCheck --check-prefixes=CHECK,B2B %s # RUN: ld.lld %t.o -o %t -O2 -# RUN: llvm-objdump -d -s %t | FileCheck --check-prefix=B2B %s +# RUN: llvm-objdump -d -s %t | FileCheck --check-prefixes=CHECK,B2B %s ## Test that branch-to-branch is disabled by default. # RUN: ld.lld %t.o -o %t -# RUN: llvm-objdump -d -s %t | FileCheck --check-prefix=NOB2B %s +# RUN: llvm-objdump -d -s %t | FileCheck --check-prefixes=CHECK,NOB2B %s # RUN: ld.lld %t.o -o %t -O2 --no-branch-to-branch -# RUN: llvm-objdump -d -s %t | FileCheck --check-prefix=NOB2B %s +# RUN: llvm-objdump -d -s %t | FileCheck --check-prefixes=CHECK,NOB2B %s ## Test that branch-to-branch is disabled for preemptible symbols. # RUN: ld.lld %t.o -o %t --branch-to-branch -shared -# RUN: llvm-objdump -d -s %t | FileCheck --check-prefix=NOB2B %s +# RUN: llvm-objdump -d -s %t | FileCheck --check-prefixes=CHECK,NOB2B %s .section .rodata.vtable,"a" .globl vtable @@ -34,6 +34,7 @@ vtable: .section .text._start,"ax" .globl _start +# CHECK: <_start>: _start: # B2B: bl {{.*}} <f3> # NOB2B: bl {{.*}} <f1{{.*}}> @@ -49,7 +50,9 @@ b f2 .section .text.f2,"ax" .globl f2 +# CHECK: <f2>: f2: +# CHECK-NEXT: b {{.*}} <f3{{.*}}> b f3 .section .text.f3,"ax" diff --git a/lld/test/ELF/x86-64-branch-to-branch.s b/lld/test/ELF/x86-64-branch-to-branch.s index b9c9abe2eb752..0c9e903438f8e 100644 --- a/lld/test/ELF/x86-64-branch-to-branch.s +++ b/lld/test/ELF/x86-64-branch-to-branch.s @@ -5,21 +5,21 @@ # RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %s -o %t.o # RUN: ld.lld %t.o -o %t --branch-to-branch -# RUN: llvm-objdump -d -s %t | FileCheck --check-prefix=B2B %s +# RUN: llvm-objdump -d -s %t | FileCheck --check-prefixes=CHECK,B2B %s # RUN: ld.lld %t.o -o %t -O2 -# RUN: llvm-objdump -d -s %t | FileCheck --check-prefix=B2B %s +# RUN: llvm-objdump -d -s %t | FileCheck --check-prefixes=CHECK,B2B %s ## Test that branch-to-branch is disabled by default. # RUN: ld.lld %t.o -o %t -# RUN: llvm-objdump -d -s %t | FileCheck --check-prefix=NOB2B %s +# RUN: llvm-objdump -d -s %t | FileCheck --check-prefixes=CHECK,NOB2B %s # RUN: ld.lld %t.o -o %t -O2 --no-branch-to-branch -# RUN: llvm-objdump -d -s %t | FileCheck --check-prefix=NOB2B %s +# RUN: llvm-objdump -d -s %t | FileCheck --check-prefixes=CHECK,NOB2B %s ## Test that branch-to-branch is disabled for preemptible symbols. # RUN: ld.lld %t.o -o %t --branch-to-branch -shared -# RUN: llvm-objdump -d -s %t | FileCheck --check-prefix=NOB2B %s +# RUN: llvm-objdump -d -s %t | FileCheck --check-prefixes=CHECK,NOB2B %s .section .rodata.vtable,"a" .globl vtable @@ -34,12 +34,13 @@ vtable: .section .text._start,"ax" .globl _start +# CHECK: <_start>: _start: -# B2B: jmp {{.*}} <f3> -# NOB2B: jmp {{.*}} <f1{{.*}}> +# B2B-NEXT: jmp {{.*}} <f3> +# NOB2B-NEXT: jmp {{.*}} <f1{{.*}}> jmp f1 -# B2B: jmp {{.*}} <f3> -# NOB2B: jmp {{.*}} <f2{{.*}}> +# B2B-NEXT: jmp {{.*}} <f3> +# NOB2B-NEXT: jmp {{.*}} <f2{{.*}}> jmp f2 .section .text.f1,"ax" @@ -49,7 +50,9 @@ jmp f2 .section .text.f2,"ax" .globl f2 +# CHECK: <f2>: f2: +# CHECK-NEXT: jmp {{.*}} <f3{{.*}}> jmp f3 .section .text.f3,"ax" >From c2d4697dff86e36b307d8d489c34f4e7f156acab Mon Sep 17 00:00:00 2001 From: Peter Collingbourne <pe...@pcc.me.uk> Date: Thu, 8 May 2025 21:44:30 -0700 Subject: [PATCH 4/5] Fix bugs found during testing Created using spr 1.3.6-beta.1 --- lld/ELF/Arch/TargetImpl.h | 11 ++++++++--- lld/ELF/Arch/X86_64.cpp | 22 ++++++++++++++++++---- lld/test/ELF/x86-64-branch-to-branch.s | 12 ++++++++++++ 3 files changed, 38 insertions(+), 7 deletions(-) diff --git a/lld/ELF/Arch/TargetImpl.h b/lld/ELF/Arch/TargetImpl.h index 757c0e2c0c51b..fffb6ca927319 100644 --- a/lld/ELF/Arch/TargetImpl.h +++ b/lld/ELF/Arch/TargetImpl.h @@ -46,10 +46,11 @@ inline void applyBranchToBranchOptImpl( [&getBranchInfo](Relocation &r, uint64_t addend) -> std::pair<Relocation *, uint64_t> { auto *target = dyn_cast_or_null<Defined>(r.sym); - // We don't allow preemptible symbols (may go somewhere else), + // We don't allow preemptible symbols or ifuncs (may go somewhere else), // absolute symbols (runtime behavior unknown), non-executable memory // (ditto) or non-regular sections (no section data). - if (!target || target->isPreemptible || !target->section || + if (!target || target->isPreemptible || target->isGnuIFunc() || + !target->section || !(target->section->flags & llvm::ELF::SHF_EXECINSTR) || target->section->kind() != SectionBase::Regular) return {nullptr, 0}; @@ -65,7 +66,11 @@ inline void applyBranchToBranchOptImpl( std::pair<Relocation *, uint64_t> targetAndAddend = getRelocBranchInfo(r, *addend); if (targetAndAddend.first) { - while (1) { + // Avoid getting stuck in an infinite loop if we encounter a branch + // that (possibly indirectly) branches to itself. It is unlikely + // that more than 5 iterations will ever be needed in practice. + size_t iterations = 5; + while (iterations--) { std::pair<Relocation *, uint64_t> nextTargetAndAddend = getRelocBranchInfo(*targetAndAddend.first, targetAndAddend.second); diff --git a/lld/ELF/Arch/X86_64.cpp b/lld/ELF/Arch/X86_64.cpp index 0a4578b0aca4b..fb1410c1b84e8 100644 --- a/lld/ELF/Arch/X86_64.cpp +++ b/lld/ELF/Arch/X86_64.cpp @@ -1178,8 +1178,15 @@ static std::optional<uint64_t> getControlTransferAddend(InputSection &is, // to branch into the middle of a PLT. For example, relative vtable // relocations use PLT32 and 0 or a positive value as the addend but still are // used to branch to the symbol. - if (r.type == R_X86_64_PLT32) + // + // STT_SECTION symbols are a special case on x86 because the LLVM assembler + // uses them for branches to local symbols which are assembled as referring to + // the section symbol with the addend equal to the symbol value - 4. + if (r.type == R_X86_64_PLT32) { + if (r.sym->isSection()) + return r.addend + 4; return 0; + } return std::nullopt; } @@ -1204,11 +1211,18 @@ static std::pair<Relocation *, uint64_t> getBranchInfo(InputSection &is, static void mergeControlTransferRelocations(Relocation &r1, const Relocation &r2) { - r1.expr = r2.expr; - r1.sym = r2.sym; + // The isSection() check handles the STT_SECTION case described above. + // In that case the original addend is irrelevant because it referred to an + // offset within the original target section so we overwrite it. + // // The +4 is here to compensate for r2.addend which will likely be -4, // but may also be addend-4 in case of a PC32 branch to symbol+addend. - r1.addend += r2.addend + 4; + if (r1.sym->isSection()) + r1.addend = r2.addend; + else + r1.addend += r2.addend + 4; + r1.expr = r2.expr; + r1.sym = r2.sym; } void X86_64::applyBranchToBranchOpt() const { diff --git a/lld/test/ELF/x86-64-branch-to-branch.s b/lld/test/ELF/x86-64-branch-to-branch.s index 0c9e903438f8e..52da61ef8f5af 100644 --- a/lld/test/ELF/x86-64-branch-to-branch.s +++ b/lld/test/ELF/x86-64-branch-to-branch.s @@ -42,6 +42,12 @@ jmp f1 # B2B-NEXT: jmp {{.*}} <f3> # NOB2B-NEXT: jmp {{.*}} <f2{{.*}}> jmp f2 +# This will assemble to a relocation pointing to an STT_SECTION for .text.f4 +# with an addend, which looks similar to the relative vtable cases above but +# requires different handling of the addend so that we don't think this is +# branching to the `jmp f3` at the start of the target section. +# CHECK-NEXT: jmp {{.*}} <f4{{.*}}> +jmp f4 .section .text.f1,"ax" .globl f1 @@ -58,4 +64,10 @@ jmp f3 .section .text.f3,"ax" .globl f3 f3: +# Test that a self-branch doesn't trigger an infinite loop. +jmp f3 + +.section .text.f4,"ax" +jmp f3 +f4: ret >From 8dc8490ba0b5422184f0b8163d7c5a5f02606fc1 Mon Sep 17 00:00:00 2001 From: Peter Collingbourne <pe...@pcc.me.uk> Date: Thu, 8 May 2025 21:59:05 -0700 Subject: [PATCH 5/5] Add test for ifunc case Created using spr 1.3.6-beta.1 --- lld/test/ELF/x86-64-branch-to-branch.s | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/lld/test/ELF/x86-64-branch-to-branch.s b/lld/test/ELF/x86-64-branch-to-branch.s index 52da61ef8f5af..c127d49ba9833 100644 --- a/lld/test/ELF/x86-64-branch-to-branch.s +++ b/lld/test/ELF/x86-64-branch-to-branch.s @@ -48,6 +48,8 @@ jmp f2 # branching to the `jmp f3` at the start of the target section. # CHECK-NEXT: jmp {{.*}} <f4{{.*}}> jmp f4 +# B2B-NEXT: jmp 0x[[IPLT:[0-9a-f]*]] +jmp f5 .section .text.f1,"ax" .globl f1 @@ -71,3 +73,12 @@ jmp f3 jmp f3 f4: ret + +.section .text.f5,"ax" +.type f5, @gnu_indirect_function +.globl f5 +f5: +jmp f3 + +# B2B: <.iplt>: +# B2B-NEXT: [[IPLT]]: _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits