[llvm-branch-commits] [llvm] [LoongArch] Avoid scheduling relaxable code sequence and attach relax relocs (PR #121330)
@@ -443,6 +443,89 @@ bool LoongArchInstrInfo::isSchedulingBoundary(const MachineInstr &MI, break; } + const auto &STI = MF.getSubtarget(); + if (STI.hasFeature(LoongArch::FeatureRelax)) { +// When linker relaxation enabled, the following instruction patterns are +// prohibited from being reordered: +// +// * pcalau12i $a0, %pc_hi20(s) +// addi.w/d $a0, $a0, %pc_lo12(s) +// +// * pcalau12i $a0, %got_pc_hi20(s) +// ld.w/d $a0, $a0, %got_pc_lo12(s) +// +// * pcalau12i $a0, %ie_pc_hi20(s) +// ld.w/d $a0, $a0, %ie_pc_lo12(s) ylzsx wrote: I think tls ie can be scheduled. https://github.com/llvm/llvm-project/pull/121330 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [lld] [lld][LoongArch] Implement TLSDESC GD/LD to IE/LE. (PR #121120)
https://github.com/ylzsx closed https://github.com/llvm/llvm-project/pull/121120 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [lld] [lld][LoongArch] Implement TLSDESC GD/LD to IE/LE. (PR #121120)
https://github.com/ylzsx converted_to_draft https://github.com/llvm/llvm-project/pull/121120 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [lld] [lld][LoongArch] Relax TLSDESC code sequence. (PR #123677)
https://github.com/ylzsx created https://github.com/llvm/llvm-project/pull/123677 Relax TLSDESC code sequence. Original code sequence: * pcalau12i $a0, %desc_pc_hi20(sym_desc) * addi.d $a0, $a0, %desc_pc_lo12(sym_desc) * ld.d $ra, $a0, %desc_ld(sym_desc) * jirl $ra, $ra, %desc_call(sym_desc) Cannot convert to LE/IE, but relax: * pcaddi $a0, %desc_pcrel_20(sym_desc) * ld.d $ra, $a0, %desc_ld(sym_desc) * jirl $ra, $ra, %desc_call(sym_desc) FIXME: The conversion of TLSDESC GD/LD to LE/IE will implement in a future patch. >From 56c24f9746ef42c449a4d1d5caf10f7cd1dd7d81 Mon Sep 17 00:00:00 2001 From: yangzhaoxin Date: Tue, 31 Dec 2024 15:51:43 +0800 Subject: [PATCH 1/6] Relax TLSDESC code sequence. Original code sequence: * pcalau12i $a0, %desc_pc_hi20(sym_desc) * addi.d $a0, $a0, %desc_pc_lo12(sym_desc) * ld.d $ra, $a0, %desc_ld(sym_desc) * jirl $ra, $ra, %desc_call(sym_desc) Cannot convert to LE/IE, but relax: * pcaddi $a0, %desc_pcrel_20(sym_desc) * ld.d $ra, $a0, %desc_ld(sym_desc) * jirl $ra, $ra, %desc_call(sym_desc) FIXME: The conversion of TLSDESC GD/LD to LE/IE will implement in a future patch. --- lld/ELF/Arch/LoongArch.cpp | 44 +- 1 file changed, 43 insertions(+), 1 deletion(-) diff --git a/lld/ELF/Arch/LoongArch.cpp b/lld/ELF/Arch/LoongArch.cpp index 2d6d86d2ca63b2..a6db15bbf1efd4 100644 --- a/lld/ELF/Arch/LoongArch.cpp +++ b/lld/ELF/Arch/LoongArch.cpp @@ -782,7 +782,9 @@ static void relaxPCHi20Lo12(Ctx &ctx, const InputSection &sec, size_t i, (rHi20.type == R_LARCH_TLS_GD_PC_HI20 && rLo12.type == R_LARCH_GOT_PC_LO12) || (rHi20.type == R_LARCH_TLS_LD_PC_HI20 && - rLo12.type == R_LARCH_GOT_PC_LO12))) + rLo12.type == R_LARCH_GOT_PC_LO12) || +(rHi20.type == R_LARCH_TLS_DESC_PC_HI20 && + rLo12.type == R_LARCH_TLS_DESC_PC_LO12))) return; // GOT references to absolute symbols can't be relaxed to use pcaddi in @@ -804,6 +806,8 @@ static void relaxPCHi20Lo12(Ctx &ctx, const InputSection &sec, size_t i, symBase = rHi20.sym->getVA(ctx); else if (rHi20.expr == RE_LOONGARCH_TLSGD_PAGE_PC) symBase = ctx.in.got->getGlobalDynAddr(*rHi20.sym); + else if (rHi20.expr == RE_LOONGARCH_TLSDESC_PAGE_PC) +symBase = ctx.in.got->getTlsDescAddr(*rHi20.sym); else { Err(ctx) << getErrorLoc(ctx, (const uint8_t *)loc) << "unknown expr (" << rHi20.expr << ") against symbol " << rHi20.sym @@ -837,6 +841,8 @@ static void relaxPCHi20Lo12(Ctx &ctx, const InputSection &sec, size_t i, sec.relaxAux->relocTypes[i + 2] = R_LARCH_TLS_GD_PCREL20_S2; else if (rHi20.type == R_LARCH_TLS_LD_PC_HI20) sec.relaxAux->relocTypes[i + 2] = R_LARCH_TLS_LD_PCREL20_S2; + else if (rHi20.type == R_LARCH_TLS_DESC_PC_HI20) +sec.relaxAux->relocTypes[i + 2] = R_LARCH_TLS_DESC_PCREL20_S2; else sec.relaxAux->relocTypes[i + 2] = R_LARCH_PCREL20_S2; sec.relaxAux->writes.push_back(insn(PCADDI, getD5(nextInsn), 0, 0)); @@ -903,6 +909,33 @@ static void relaxTlsLe(Ctx &ctx, const InputSection &sec, size_t i, } } +// Relax TLSDESC code sequence. In LoongArch, the conversion of TLSDESC GD/LD to +// LE/IE is closely tied to relaxation, similar to how GCC handles it. (Due to +// the lack of an efficient way for handling conversions in the extreme code +// model and the difficulty in determining whether the extreme code model is +// being used in handleTlsRelocation, this approach may seem like a workaround). +// Consequently, the resulting code sequence depends on whether the conversion +// to LE/IE is performed. +// +// Original code sequence: +// * pcalau12i $a0, %desc_pc_hi20(sym_desc) +// * addi.d $a0, $a0, %desc_pc_lo12(sym_desc) +// * ld.d $ra, $a0, %desc_ld(sym_desc) +// * jirl $ra, $ra, %desc_call(sym_desc) +// +// Cannot convert to LE/IE, but relax: +// * pcaddi $a0, %desc_pcrel_20(sym_desc) +// * ld.d $ra, $a0, %desc_ld(sym_desc) +// * jirl $ra, $ra, %desc_call(sym_desc) +// +// FIXME: Implement TLSDESC GD/LD to LE/IE. +static void relaxTlsdesc(Ctx &ctx, const InputSection &sec, size_t i, + uint64_t loc, Relocation &rHi20, Relocation &rLo12, + uint32_t &remove) { + if (ctx.arg.shared && rHi20.type == R_LARCH_TLS_DESC_PC_HI20) +return relaxPCHi20Lo12(ctx, sec, i, loc, rHi20, rLo12, remove); +} + static bool relax(Ctx &ctx, InputSection &sec) { const uint64_t secAddr = sec.getVA(); const MutableArrayRef relocs = sec.relocs(); @@ -959,6 +992,10 @@ static bool relax(Ctx &ctx, InputSection &sec) { if (relaxable(relocs, i)) relaxTlsLe(ctx, sec, i, loc, r, remove); break; +case R_LARCH_TLS_DESC_PC_HI20: + if (isPairRelaxable(relocs, i)) +relaxTlsdesc(ctx, sec, i, loc, r, relocs[i + 2], remove); + break; } // For all anchors whose of
[llvm-branch-commits] [lld] [lld][LoongArch] Support TLSDESC GD/LD to IE/LE. (PR #123715)
https://github.com/ylzsx created https://github.com/llvm/llvm-project/pull/123715 Support TLSDESC to initial-exec or local-exec optimizations. Introduce a new hook RE_LOONGARCH_RELAX_TLS_GD_TO_IE_PAGE_PC and use existing R_RELAX_TLS_GD_TO_IE_ABS to support TLSDESC => IE, while use existing R_RELAX_TLS_GD_TO_LE to support TLSDESC => LE. In normal or medium code model, there are two forms of code sequences: * pcalau12i $a0, %desc_pc_hi20(sym_desc) * addi.d $a0, $a0, %desc_pc_lo12(sym_desc) * ld.d $ra, $a0, %desc_ld(sym_desc) * jirl $ra, $ra, %desc_call(sym_desc) -- * pcaddi $a0, %desc_pcrel_20(sym_desc) * ld.d $ra, $a0, %desc_ld(sym_desc) * jirl $ra, $ra, %desc_call(sym_desc) Convert to IE: * pcalau12i $a0, %ie_pc_hi20(sym_ie) * ld.[wd] $a0, $a0, %ie_pc_lo12(sym_ie) Convert to LE: * lu12i.w $a0, %le_hi20(sym_le) # le_hi20 != 0, otherwise nop * ori $a0 $a0, %le_lo12(sym_le) Simplicity, whether tlsdescToIe or tlsdescToLe, we always tend to convert the preceding instructions to NOPs, due to both forms of code sequence (corresponding to relocation combinations: R_LARCH_TLS_DESC_PC_HI20+R_LARCH_TLS_DESC_PC_LO12 and R_LARCH_TLS_DESC_PCREL20_S2) have same process. FIXME: When relaxation enables, redundant NOPs can be removed. It will be implemented in a future patch. Note: All forms of TLSDESC code sequences should not appear interleaved in the normal, medium or extreme code model, which compilers do not generate and lld is unsupported. This is thanks to the guard in PostRASchedulerList.cpp in llvm. ``` Calls are not scheduling boundaries before register allocation, but post-ra we don't gain anything by scheduling across calls since we don't need to worry about register pressure. ``` >From dff3031fdb2ca3755b73e3b81e56f8008a409470 Mon Sep 17 00:00:00 2001 From: yangzhaoxin Date: Fri, 3 Jan 2025 14:29:17 +0800 Subject: [PATCH 1/4] [lld][LoongArch] Implement TLSDESC GD/LD to IE/LE. Support TLSDESC to initial-exec or local-exec optimizations. Introduce a new hook RE_LOONGARCH_RELAX_TLS_GD_TO_IE_PAGE_PC and use existing R_RELAX_TLS_GD_TO_IE_ABS to support TLSDESC => IE, while use existing R_RELAX_TLS_GD_TO_LE to support TLSDESC => LE. In normal or medium code model, there are two forms of code sequences: * pcalau12i $a0, %desc_pc_hi20(sym_desc) * addi.d $a0, $a0, %desc_pc_lo12(sym_desc) * ld.d $ra, $a0, %desc_ld(sym_desc) * jirl $ra, $ra, %desc_call(sym_desc) -- * pcaddi $a0, %desc_pcrel_20(sym_desc) * ld.d $ra, $a0, %desc_ld(sym_desc) * jirl $ra, $ra, %desc_call(sym_desc) The code sequence obtained is as follows: * pcalau12i $a0, %ie_pc_hi20(sym_ie) * ld.[wd] $a0, $a0, %ie_pc_lo12(sym_ie) Simplicity, whether tlsdescToIe or tlsdescToLe, we always tend to convert the preceding instructions to NOPs, due to both forms of code sequence (corresponding to relocation combinations: R_LARCH_TLS_DESC_PC_HI20+R_LARCH_TLS_DESC_PC_LO12 and R_LARCH_TLS_DESC_PCREL20_S2) have same process. FIXME: When relaxation enables, redundant NOPs can be removed. It will be implemented in a future patch. Note: All forms of TLSDESC code sequences should not appear interleaved in the normal, medium or extreme code model, which compilers do not generate and lld is unsupported. This is thanks to the guard in PostRASchedulerList.cpp in llvm. ``` Calls are not scheduling boundaries before register allocation, but post-ra we don't gain anything by scheduling across calls since we don't need to worry about register pressure. ``` --- lld/ELF/Arch/LoongArch.cpp | 146 - lld/ELF/InputSection.cpp | 1 + lld/ELF/Relocations.cpp| 38 ++ lld/ELF/Relocations.h | 1 + 4 files changed, 169 insertions(+), 17 deletions(-) diff --git a/lld/ELF/Arch/LoongArch.cpp b/lld/ELF/Arch/LoongArch.cpp index ef25e741901d93..37614c3e9615d6 100644 --- a/lld/ELF/Arch/LoongArch.cpp +++ b/lld/ELF/Arch/LoongArch.cpp @@ -39,11 +39,14 @@ class LoongArch final : public TargetInfo { void relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const override; bool relaxOnce(int pass) const override; + RelExpr adjustTlsExpr(RelType type, RelExpr expr) const override; void relocateAlloc(InputSectionBase &sec, uint8_t *buf) const override; void finalizeRelax(int passes) const override; private: void tlsIeToLe(uint8_t *loc, const Relocation &rel, uint64_t val) const; + void tlsdescToIe(uint8_t *loc, const Relocation &rel, uint64_t val) const; + void tlsdescToLe(uint8_t *loc, const Relocation &rel, uint64_t val) const; }; } // end anonymous namespace @@ -61,6 +64,7 @@ enum Op { LU12I_W = 0x1400, PCADDI = 0x1800, PCADDU12I = 0x1c00, + PCALAU12I = 0x1a00, LD_W = 0x2880, LD_D = 0x28c0, JIRL = 0x4c00, @@ -72,6 +76,7 @@ enum Reg { R_ZERO = 0, R_RA = 1, R_TP = 2, + R_A0 = 4, R_T0 = 12, R_T1 = 13,
[llvm-branch-commits] [lld] [lld][LoongArch] Support TLSDESC GD/LD to IE/LE. (PR #123715)
https://github.com/ylzsx updated https://github.com/llvm/llvm-project/pull/123715 >From dff3031fdb2ca3755b73e3b81e56f8008a409470 Mon Sep 17 00:00:00 2001 From: yangzhaoxin Date: Fri, 3 Jan 2025 14:29:17 +0800 Subject: [PATCH 1/6] [lld][LoongArch] Implement TLSDESC GD/LD to IE/LE. Support TLSDESC to initial-exec or local-exec optimizations. Introduce a new hook RE_LOONGARCH_RELAX_TLS_GD_TO_IE_PAGE_PC and use existing R_RELAX_TLS_GD_TO_IE_ABS to support TLSDESC => IE, while use existing R_RELAX_TLS_GD_TO_LE to support TLSDESC => LE. In normal or medium code model, there are two forms of code sequences: * pcalau12i $a0, %desc_pc_hi20(sym_desc) * addi.d $a0, $a0, %desc_pc_lo12(sym_desc) * ld.d $ra, $a0, %desc_ld(sym_desc) * jirl $ra, $ra, %desc_call(sym_desc) -- * pcaddi $a0, %desc_pcrel_20(sym_desc) * ld.d $ra, $a0, %desc_ld(sym_desc) * jirl $ra, $ra, %desc_call(sym_desc) The code sequence obtained is as follows: * pcalau12i $a0, %ie_pc_hi20(sym_ie) * ld.[wd] $a0, $a0, %ie_pc_lo12(sym_ie) Simplicity, whether tlsdescToIe or tlsdescToLe, we always tend to convert the preceding instructions to NOPs, due to both forms of code sequence (corresponding to relocation combinations: R_LARCH_TLS_DESC_PC_HI20+R_LARCH_TLS_DESC_PC_LO12 and R_LARCH_TLS_DESC_PCREL20_S2) have same process. FIXME: When relaxation enables, redundant NOPs can be removed. It will be implemented in a future patch. Note: All forms of TLSDESC code sequences should not appear interleaved in the normal, medium or extreme code model, which compilers do not generate and lld is unsupported. This is thanks to the guard in PostRASchedulerList.cpp in llvm. ``` Calls are not scheduling boundaries before register allocation, but post-ra we don't gain anything by scheduling across calls since we don't need to worry about register pressure. ``` --- lld/ELF/Arch/LoongArch.cpp | 146 - lld/ELF/InputSection.cpp | 1 + lld/ELF/Relocations.cpp| 38 ++ lld/ELF/Relocations.h | 1 + 4 files changed, 169 insertions(+), 17 deletions(-) diff --git a/lld/ELF/Arch/LoongArch.cpp b/lld/ELF/Arch/LoongArch.cpp index ef25e741901d93..37614c3e9615d6 100644 --- a/lld/ELF/Arch/LoongArch.cpp +++ b/lld/ELF/Arch/LoongArch.cpp @@ -39,11 +39,14 @@ class LoongArch final : public TargetInfo { void relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const override; bool relaxOnce(int pass) const override; + RelExpr adjustTlsExpr(RelType type, RelExpr expr) const override; void relocateAlloc(InputSectionBase &sec, uint8_t *buf) const override; void finalizeRelax(int passes) const override; private: void tlsIeToLe(uint8_t *loc, const Relocation &rel, uint64_t val) const; + void tlsdescToIe(uint8_t *loc, const Relocation &rel, uint64_t val) const; + void tlsdescToLe(uint8_t *loc, const Relocation &rel, uint64_t val) const; }; } // end anonymous namespace @@ -61,6 +64,7 @@ enum Op { LU12I_W = 0x1400, PCADDI = 0x1800, PCADDU12I = 0x1c00, + PCALAU12I = 0x1a00, LD_W = 0x2880, LD_D = 0x28c0, JIRL = 0x4c00, @@ -72,6 +76,7 @@ enum Reg { R_ZERO = 0, R_RA = 1, R_TP = 2, + R_A0 = 4, R_T0 = 12, R_T1 = 13, R_T2 = 14, @@ -962,7 +967,8 @@ static bool relax(Ctx &ctx, InputSection &sec) { case R_LARCH_TLS_LD_PC_HI20: case R_LARCH_TLS_DESC_PC_HI20: // The overflow check for i+2 will be carried out in isPairRelaxable. - if (isPairRelaxable(relocs, i)) + if (r.expr != RE_LOONGARCH_RELAX_TLS_GD_TO_IE_PAGE_PC && + r.expr != R_RELAX_TLS_GD_TO_LE && isPairRelaxable(relocs, i)) relaxPCHi20Lo12(ctx, sec, i, loc, r, relocs[i + 2], remove); break; case R_LARCH_CALL36: @@ -1047,6 +1053,103 @@ void LoongArch::tlsIeToLe(uint8_t *loc, const Relocation &rel, } } +// Convert TLSDESC GD/LD to IE. +// In normal or medium code model, there are two forms of code sequences: +// * pcalau12i $a0, %desc_pc_hi20(sym_desc) +// * addi.d $a0, $a0, %desc_pc_lo12(sym_desc) +// * ld.d $ra, $a0, %desc_ld(sym_desc) +// * jirl $ra, $ra, %desc_call(sym_desc) +// -- +// * pcaddi $a0, %desc_pcrel_20(a) +// * load $ra, $a0, %desc_ld(a) +// * jirl $ra, $ra, %desc_call(a) +// +// The code sequence obtained is as follows: +// * pcalau12i $a0, %ie_pc_hi20(sym_ie) +// * ld.[wd] $a0, $a0, %ie_pc_lo12(sym_ie) +// +// Simplicity, whether tlsdescToIe or tlsdescToLe, we always tend to convert the +// preceding instructions to NOPs, due to both forms of code sequence +// (corresponding to relocation combinations: +// R_LARCH_TLS_DESC_PC_HI20+R_LARCH_TLS_DESC_PC_LO12 and +// R_LARCH_TLS_DESC_PCREL20_S2) have same process. +// +// When relaxation enables, redundant NOPs can be removed. +void LoongArch::tlsdescToIe(uint8_t *loc, const Relocation &rel, +uint64_t val) const { + switch (rel.type) { +
[llvm-branch-commits] [lld] [lld][LoongArch] GOT indirection to PC relative optimization. (PR #123743)
ylzsx wrote: I have submitted all the patches related to relaxation in lld for LoongArch. Below is a list for peer review: * users/ylzsx/r-got-to-pcrel │ https://github.com/llvm/llvm-project/pull/123743 │ * users/ylzsx/r-tlsdesc-to-iele-relax │ https://github.com/llvm/llvm-project/pull/123730 │ * users/ylzsx/r-tlsdesc-to-iele-norelax │ https://github.com/llvm/llvm-project/pull/123715 │ * users/ylzsx/r-tls-ie-to-le-relax │ https://github.com/llvm/llvm-project/pull/123702 │ * users/ylzsx/r-tls-ie-to-le-norelax │ https://github.com/llvm/llvm-project/pull/123680 │ * users/ylzsx/r-tlsdesc-noconversion │ https://github.com/llvm/llvm-project/pull/123677 │ * users/ylzsx/r-tls-noie │ https://github.com/llvm/llvm-project/pull/123600 │ * users/ylzsx/r-call36 │ https://github.com/llvm/llvm-project/pull/123576 │ * users/ylzsx/r-pchi20lo12 │ https://github.com/llvm/llvm-project/pull/123566 │ * main https://github.com/llvm/llvm-project/pull/123743 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [lld] [lld][LoongArch] Support relaxation during IE to LE conversion. (PR #123702)
https://github.com/ylzsx created https://github.com/llvm/llvm-project/pull/123702 Complement https://github.com/llvm/llvm-project/pull/123680. When relaxation enable, remove redundant NOPs. >From 83d8b7e00b16c97b11f3c19ed45bb70eeae95428 Mon Sep 17 00:00:00 2001 From: yangzhaoxin Date: Fri, 3 Jan 2025 11:15:56 +0800 Subject: [PATCH 1/3] Support relaxation during IE to LE conversion. Complement https://. When relaxation enable, remove redundant NOPs. --- lld/ELF/Arch/LoongArch.cpp | 13 +++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/lld/ELF/Arch/LoongArch.cpp b/lld/ELF/Arch/LoongArch.cpp index dc98dbec872c0c..ef25e741901d93 100644 --- a/lld/ELF/Arch/LoongArch.cpp +++ b/lld/ELF/Arch/LoongArch.cpp @@ -975,6 +975,11 @@ static bool relax(Ctx &ctx, InputSection &sec) { if (relaxable(relocs, i)) relaxTlsLe(ctx, sec, i, loc, r, remove); break; +case R_LARCH_TLS_IE_PC_HI20: + if (relaxable(relocs, i) && r.expr == R_RELAX_TLS_IE_TO_LE && + isUInt<12>(r.sym->getVA(ctx, r.addend))) +remove = 4; + break; } // For all anchors whose offsets are <= r.offset, they are preceded by @@ -1049,7 +1054,7 @@ void LoongArch::relocateAlloc(InputSectionBase &sec, uint8_t *buf) const { secAddr += s->outSecOff; else if (auto *ehIn = dyn_cast(&sec)) secAddr += ehIn->getParent()->outSecOff; - bool isExtreme = false; + bool isExtreme = false, isRelax = false; const MutableArrayRef relocs = sec.relocs(); for (size_t i = 0, size = relocs.size(); i != size; ++i) { Relocation &rel = relocs[i]; @@ -1077,8 +1082,12 @@ void LoongArch::relocateAlloc(InputSectionBase &sec, uint8_t *buf) const { val = SignExtend64(sec.getRelocTargetVA(ctx, rel, secAddr + rel.offset), bits); relocateNoSym(loc, rel.type, val); - } else + } else { +isRelax = relaxable(relocs, i); +if (isRelax && rel.type == R_LARCH_TLS_IE_PC_HI20 && isUInt<12>(val)) + continue; tlsIeToLe(loc, rel, val); + } continue; default: break; >From 6b79432dcb9172906deab445055d8197fdeed425 Mon Sep 17 00:00:00 2001 From: yangzhaoxin Date: Fri, 3 Jan 2025 11:20:10 +0800 Subject: [PATCH 2/3] Modify loongarch-relax-tls-ie.s when relaxation is enabled. --- lld/test/ELF/loongarch-relax-tls-ie.s | 28 --- 1 file changed, 25 insertions(+), 3 deletions(-) diff --git a/lld/test/ELF/loongarch-relax-tls-ie.s b/lld/test/ELF/loongarch-relax-tls-ie.s index f5375ae3a3b400..be76263f2978e0 100644 --- a/lld/test/ELF/loongarch-relax-tls-ie.s +++ b/lld/test/ELF/loongarch-relax-tls-ie.s @@ -3,11 +3,10 @@ # RUN: llvm-mc --filetype=obj --triple=loongarch64 -mattr=+relax %s -o %t.o -## FIXME: IE relaxation has not yet been implemented. -## --relax/--no-relax has the same result. Also check --emit-relocs. +## Also check --emit-relocs. # RUN: ld.lld --emit-relocs %t.o -o %t # RUN: llvm-readelf -x .got %t 2>&1 | FileCheck --check-prefix=LE-GOT %s -# RUN: llvm-objdump -dr --no-show-raw-insn %t | FileCheck --check-prefixes=LE %s +# RUN: llvm-objdump -dr --no-show-raw-insn %t | FileCheck --check-prefixes=LER %s # RUN: ld.lld --emit-relocs --no-relax %t.o -o %t.norelax # RUN: llvm-readelf -x .got %t.norelax 2>&1 | FileCheck --check-prefix=LE-GOT %s @@ -42,6 +41,29 @@ # LE-NEXT:add.d $a2, $a2, $tp # LE-NEXT:add.d $a3, $a3, $tp +# LER: 20158: ori $a0, $zero, 4095 +# LER-NEXT: R_LARCH_TLS_IE_PC_HI20 a +# LER-NEXT: R_LARCH_RELAX *ABS* +# LER-NEXT: R_LARCH_TLS_IE_PC_LO12 a +# LER-NEXT: R_LARCH_RELAX *ABS* +# LER-NEXT:add.d $a0, $a0, $tp +# LER-NEXT: 20160: lu12i.w $a1, 1 +# LER-NEXT: R_LARCH_TLS_IE_PC_HI20 b +# LER-NEXT:ori $a1, $a1, 0 +# LER-NEXT: R_LARCH_TLS_IE_PC_LO12 b +# LER-NEXT:add.d $a1, $a1, $tp +# LER-NEXT: 2016c: lu12i.w $a3, 1 +# LER-NEXT: R_LARCH_TLS_IE_PC_HI20 a +# LER-NEXT: R_LARCH_RELAX *ABS* +# LER-NEXT: R_LARCH_TLS_IE_PC_HI20 b +# LER-NEXT: R_LARCH_RELAX *ABS* +# LER-NEXT:ori $a2, $zero, 4095 +# LER-NEXT: R_LARCH_TLS_IE_PC_LO12 a +# LER-NEXT:ori $a3, $a3, 0 +# LER-NEXT: R_LARCH_TLS_IE_PC_LO12 b +# LER-NEXT:add.d $a2, $a2, $tp +# LER-NEXT:add.d $a3, $a3, $tp + la.tls.ie $a0, a# relax add.d $a0, $a0, $tp >From 9d99de87229b84f65f392dc5eb73bf1aef16fa65 Mon Sep 17 00:00:00 2001 From: yangzhaoxin Date: Fri, 17 Jan 2025 09:00:44 +0800 Subject: [PATCH 3/3] Modify test. Add --relax option. --- lld/test/ELF/loongarch-relax-tls-ie.s | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lld/test/ELF/loongarch-relax-tls-ie.s b/lld/test/ELF/loongarch-relax-tls-ie.s index be76263f2978e0..2c95a342251f20 100644 --- a/lld/test/ELF/loongarch-relax-tls-ie.s +++ b/lld/test/ELF/loongarch-relax-tls-ie.s
[llvm-branch-commits] [lld] [lld][LoongArch] Convert TLS IE to LE in the normal or medium code model. (PR #123680)
https://github.com/ylzsx updated https://github.com/llvm/llvm-project/pull/123680 >From a39c190e5c8351227178b6e5041bbd97fc6926a9 Mon Sep 17 00:00:00 2001 From: yangzhaoxin Date: Thu, 2 Jan 2025 20:58:56 +0800 Subject: [PATCH 1/5] Convert TLS IE to LE in the normal or medium code model. Original code sequence: * pcalau12i $a0, %ie_pc_hi20(sym) * ld.d $a0, $a0, %ie_pc_lo12(sym) The code sequence converted is as follows: * lu12i.w $a0, %ie_pc_hi20(sym) # le_hi20 != 0, otherwise NOP * ori $a0 $a0, %ie_pc_lo12(sym) FIXME: When relaxation enables, redundant NOP can be removed. This will be implemented in a future patch. Note: In the normal or medium code model, original code sequence with relocations can appear interleaved, because converted code sequence calculates the absolute offset. However, in extreme code model, to identify the current code model, the first four instructions with relocations must appear consecutively. --- lld/ELF/Arch/LoongArch.cpp | 87 ++ lld/ELF/Relocations.cpp| 15 ++- 2 files changed, 101 insertions(+), 1 deletion(-) diff --git a/lld/ELF/Arch/LoongArch.cpp b/lld/ELF/Arch/LoongArch.cpp index ec09437404eddc..dc98dbec872c0c 100644 --- a/lld/ELF/Arch/LoongArch.cpp +++ b/lld/ELF/Arch/LoongArch.cpp @@ -39,7 +39,11 @@ class LoongArch final : public TargetInfo { void relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const override; bool relaxOnce(int pass) const override; + void relocateAlloc(InputSectionBase &sec, uint8_t *buf) const override; void finalizeRelax(int passes) const override; + +private: + void tlsIeToLe(uint8_t *loc, const Relocation &rel, uint64_t val) const; }; } // end anonymous namespace @@ -53,6 +57,8 @@ enum Op { ADDI_W = 0x0280, ADDI_D = 0x02c0, ANDI = 0x0340, + ORI = 0x0380, + LU12I_W = 0x1400, PCADDI = 0x1800, PCADDU12I = 0x1c00, LD_W = 0x2880, @@ -1000,6 +1006,87 @@ static bool relax(Ctx &ctx, InputSection &sec) { return changed; } +// Convert TLS IE to LE in the normal or medium code model. +// Original code sequence: +// * pcalau12i $a0, %ie_pc_hi20(sym) +// * ld.d $a0, $a0, %ie_pc_lo12(sym) +// +// The code sequence converted is as follows: +// * lu12i.w $a0, %le_hi20(sym) # le_hi20 != 0, otherwise NOP +// * ori $a0 $a0, %le_lo12(sym) +// +// When relaxation enables, redundant NOPs can be removed. +void LoongArch::tlsIeToLe(uint8_t *loc, const Relocation &rel, + uint64_t val) const { + assert(isInt<32>(val) && + "val exceeds the range of medium code model in tlsIeToLe"); + + bool isUInt12 = isUInt<12>(val); + const uint32_t currInsn = read32le(loc); + switch (rel.type) { + case R_LARCH_TLS_IE_PC_HI20: +if (isUInt12) + write32le(loc, insn(ANDI, R_ZERO, R_ZERO, 0)); // nop +else + write32le(loc, insn(LU12I_W, getD5(currInsn), extractBits(val, 31, 12), + 0)); // lu12i.w $a0, %le_hi20 +break; + case R_LARCH_TLS_IE_PC_LO12: +if (isUInt12) + write32le(loc, insn(ORI, getD5(currInsn), R_ZERO, + val)); // ori $a0, $r0, %le_lo12 +else + write32le(loc, insn(ORI, getD5(currInsn), getJ5(currInsn), + lo12(val))); // ori $a0, $a0, %le_lo12 +break; + } +} + +void LoongArch::relocateAlloc(InputSectionBase &sec, uint8_t *buf) const { + const unsigned bits = ctx.arg.is64 ? 64 : 32; + uint64_t secAddr = sec.getOutputSection()->addr; + if (auto *s = dyn_cast(&sec)) +secAddr += s->outSecOff; + else if (auto *ehIn = dyn_cast(&sec)) +secAddr += ehIn->getParent()->outSecOff; + bool isExtreme = false; + const MutableArrayRef relocs = sec.relocs(); + for (size_t i = 0, size = relocs.size(); i != size; ++i) { +Relocation &rel = relocs[i]; +uint8_t *loc = buf + rel.offset; +uint64_t val = SignExtend64( +sec.getRelocTargetVA(ctx, rel, secAddr + rel.offset), bits); + +switch (rel.expr) { +case R_RELAX_HINT: + continue; +case R_RELAX_TLS_IE_TO_LE: + if (rel.type == R_LARCH_TLS_IE_PC_HI20) { +// LoongArch does not support IE to LE optimize in the extreme code +// model. In this case, the relocs are as follows: +// +// * i -- R_LARCH_TLS_IE_PC_HI20 +// * i+1 -- R_LARCH_TLS_IE_PC_LO12 +// * i+2 -- R_LARCH_TLS_IE64_PC_LO20 +// * i+3 -- R_LARCH_TLS_IE64_PC_HI12 +isExtreme = +(i + 2 < size && relocs[i + 2].type == R_LARCH_TLS_IE64_PC_LO20); + } + if (isExtreme) { +rel.expr = getRelExpr(rel.type, *rel.sym, loc); +val = SignExtend64(sec.getRelocTargetVA(ctx, rel, secAddr + rel.offset), + bits); +relocateNoSym(loc, rel.type, val); + } else +tlsIeToLe(loc, rel, val); + continue; +default: + break; +} +relocate(loc, rel, val); + } +} + //
[llvm-branch-commits] [lld] [lld][LoongArch] Convert TLS IE to LE in the normal or medium code model. (PR #123680)
https://github.com/ylzsx converted_to_draft https://github.com/llvm/llvm-project/pull/123680 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [lld] [lld][LoongArch] Convert TLS IE to LE in the normal or medium code model. (PR #123680)
https://github.com/ylzsx ready_for_review https://github.com/llvm/llvm-project/pull/123680 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [lld] [lld][LoongArch] Convert TLS IE to LE in the normal or medium code model. (PR #123680)
https://github.com/ylzsx created https://github.com/llvm/llvm-project/pull/123680 Original code sequence: * pcalau12i $a0, %ie_pc_hi20(sym) * ld.d $a0, $a0, %ie_pc_lo12(sym) The code sequence converted is as follows: * lu12i.w $a0, %ie_pc_hi20(sym) # le_hi20 != 0, otherwise NOP * ori $a0 $a0, %ie_pc_lo12(sym) FIXME: When relaxation enables, redundant NOP can be removed. This will be implemented in a future patch. Note: In the normal or medium code model, original code sequence with relocations allow interleaving, because converted code sequence calculates the absolute offset. However, in extreme code model, to identify the current code model, the first four instructions with relocations must appear consecutively. >From a39c190e5c8351227178b6e5041bbd97fc6926a9 Mon Sep 17 00:00:00 2001 From: yangzhaoxin Date: Thu, 2 Jan 2025 20:58:56 +0800 Subject: [PATCH 1/4] Convert TLS IE to LE in the normal or medium code model. Original code sequence: * pcalau12i $a0, %ie_pc_hi20(sym) * ld.d $a0, $a0, %ie_pc_lo12(sym) The code sequence converted is as follows: * lu12i.w $a0, %ie_pc_hi20(sym) # le_hi20 != 0, otherwise NOP * ori $a0 $a0, %ie_pc_lo12(sym) FIXME: When relaxation enables, redundant NOP can be removed. This will be implemented in a future patch. Note: In the normal or medium code model, original code sequence with relocations can appear interleaved, because converted code sequence calculates the absolute offset. However, in extreme code model, to identify the current code model, the first four instructions with relocations must appear consecutively. --- lld/ELF/Arch/LoongArch.cpp | 87 ++ lld/ELF/Relocations.cpp| 15 ++- 2 files changed, 101 insertions(+), 1 deletion(-) diff --git a/lld/ELF/Arch/LoongArch.cpp b/lld/ELF/Arch/LoongArch.cpp index ec09437404eddc..dc98dbec872c0c 100644 --- a/lld/ELF/Arch/LoongArch.cpp +++ b/lld/ELF/Arch/LoongArch.cpp @@ -39,7 +39,11 @@ class LoongArch final : public TargetInfo { void relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const override; bool relaxOnce(int pass) const override; + void relocateAlloc(InputSectionBase &sec, uint8_t *buf) const override; void finalizeRelax(int passes) const override; + +private: + void tlsIeToLe(uint8_t *loc, const Relocation &rel, uint64_t val) const; }; } // end anonymous namespace @@ -53,6 +57,8 @@ enum Op { ADDI_W = 0x0280, ADDI_D = 0x02c0, ANDI = 0x0340, + ORI = 0x0380, + LU12I_W = 0x1400, PCADDI = 0x1800, PCADDU12I = 0x1c00, LD_W = 0x2880, @@ -1000,6 +1006,87 @@ static bool relax(Ctx &ctx, InputSection &sec) { return changed; } +// Convert TLS IE to LE in the normal or medium code model. +// Original code sequence: +// * pcalau12i $a0, %ie_pc_hi20(sym) +// * ld.d $a0, $a0, %ie_pc_lo12(sym) +// +// The code sequence converted is as follows: +// * lu12i.w $a0, %le_hi20(sym) # le_hi20 != 0, otherwise NOP +// * ori $a0 $a0, %le_lo12(sym) +// +// When relaxation enables, redundant NOPs can be removed. +void LoongArch::tlsIeToLe(uint8_t *loc, const Relocation &rel, + uint64_t val) const { + assert(isInt<32>(val) && + "val exceeds the range of medium code model in tlsIeToLe"); + + bool isUInt12 = isUInt<12>(val); + const uint32_t currInsn = read32le(loc); + switch (rel.type) { + case R_LARCH_TLS_IE_PC_HI20: +if (isUInt12) + write32le(loc, insn(ANDI, R_ZERO, R_ZERO, 0)); // nop +else + write32le(loc, insn(LU12I_W, getD5(currInsn), extractBits(val, 31, 12), + 0)); // lu12i.w $a0, %le_hi20 +break; + case R_LARCH_TLS_IE_PC_LO12: +if (isUInt12) + write32le(loc, insn(ORI, getD5(currInsn), R_ZERO, + val)); // ori $a0, $r0, %le_lo12 +else + write32le(loc, insn(ORI, getD5(currInsn), getJ5(currInsn), + lo12(val))); // ori $a0, $a0, %le_lo12 +break; + } +} + +void LoongArch::relocateAlloc(InputSectionBase &sec, uint8_t *buf) const { + const unsigned bits = ctx.arg.is64 ? 64 : 32; + uint64_t secAddr = sec.getOutputSection()->addr; + if (auto *s = dyn_cast(&sec)) +secAddr += s->outSecOff; + else if (auto *ehIn = dyn_cast(&sec)) +secAddr += ehIn->getParent()->outSecOff; + bool isExtreme = false; + const MutableArrayRef relocs = sec.relocs(); + for (size_t i = 0, size = relocs.size(); i != size; ++i) { +Relocation &rel = relocs[i]; +uint8_t *loc = buf + rel.offset; +uint64_t val = SignExtend64( +sec.getRelocTargetVA(ctx, rel, secAddr + rel.offset), bits); + +switch (rel.expr) { +case R_RELAX_HINT: + continue; +case R_RELAX_TLS_IE_TO_LE: + if (rel.type == R_LARCH_TLS_IE_PC_HI20) { +// LoongArch does not support IE to LE optimize in the extreme code +// model. In this case, the relocs are as follows: +// +/
[llvm-branch-commits] [lld] [lld][LoongArch] Relax TLS LE/GD/LD. (PR #123600)
https://github.com/ylzsx created https://github.com/llvm/llvm-project/pull/123600 In local-exec form, the code sequence is converted as follows: ``` From: lu12i.w $rd, %le_hi20_r(sym) R_LARCH_TLS_LE_HI20_R, R_LARCH_RELAX add.w/d $rd, $rd, $tp, %le_add_r(sym) R_LARCH_TLS_LE_ADD_R, R_LARCH_RELAX addi/ld/st.w/d $rd, $rd, %le_lo12_r(sym) R_LARCH_TLS_LE_LO12_R, R_LARCH_RELAX To: addi/ld/st.w/d $rd, $tp, %le_lo12_r(sym) R_LARCH_TLS_LE_LO12_R ``` In global-dynamic or local-dynamic, the code sequence is converted as follows: ``` From: pcalau12i $a0, %ld_pc_hi20(sym) | %gd_pc_hi20(sym) R_LARCH_TLS_GD_PC_HI20 | R_LARCH_TLS_LD_PC_HI20, R_LARCH_RELAX addi.w/d $a0, $a0, %got_pc_lo12(sym) | %got_pc_lo12(sym) R_LARCH_GOT_PC_LO12, R_LARCH_RELAX To: pcaddi$a0, %got_pc_lo12(sym) | %got_pc_lo12(sym) R_LARCH_TLS_GD_PCREL20_S2 | R_LARCH_TLS_LD_PCREL20_S2 ``` Note: For initial-exec form, since it involves the conversion from IE to LE, we will implement it in a future patch. >From 7993434e2973437b010034051003f8c03d8eff71 Mon Sep 17 00:00:00 2001 From: yangzhaoxin Date: Fri, 27 Dec 2024 19:29:32 +0800 Subject: [PATCH 1/5] Relax TLS LE/GD/LD. In local-exec form, the code sequence is converted as follows: ``` From: lu12i.w $rd, %le_hi20_r(sym) R_LARCH_TLS_LE_HI20_R, R_LARCH_RELAX add.w/d $rd, $rd, $tp, %le_add_r(sym) R_LARCH_TLS_LE_ADD_R, R_LARCH_RELAX addi/ld/st.w/d $rd, $rd, %le_lo12_r(sym) R_LARCH_TLS_LE_LO12_R, R_LARCH_RELAX To: addi/ld/st.w/d $rd, $tp, %le_lo12_r(sym) R_LARCH_TLS_LE_LO12_R ``` In global-dynamic or local-dynamic, the code sequence is converted as follows: ``` From: pcalau12i $a0, %ld_pc_hi20(sym) | %gd_pc_hi20(sym) R_LARCH_TLS_GD_PC_HI20 | R_LARCH_TLS_LD_PC_HI20, R_LARCH_RELAX addi.w/d $a0, $a0, %got_pc_lo12(sym) | %got_pc_lo12(sym) R_LARCH_GOT_PC_LO12, R_LARCH_RELAX To: pcaddi$a0, %got_pc_lo12(sym) | %got_pc_lo12(sym) R_LARCH_TLS_GD_PCREL20_S2 | R_LARCH_TLS_LD_PCREL20_S2 ``` Note: For initial-exec form, since it involves the conversion from IE to LE, we will implement it in a future patch. --- lld/ELF/Arch/LoongArch.cpp | 68 +++--- 1 file changed, 64 insertions(+), 4 deletions(-) diff --git a/lld/ELF/Arch/LoongArch.cpp b/lld/ELF/Arch/LoongArch.cpp index 0aa0cf5b657a0f..2d6d86d2ca63b2 100644 --- a/lld/ELF/Arch/LoongArch.cpp +++ b/lld/ELF/Arch/LoongArch.cpp @@ -761,10 +761,10 @@ static bool isPairRelaxable(ArrayRef relocs, size_t i) { // Relax code sequence. // From: -// pcalau12i $a0, %pc_hi20(sym) -// addi.w/d $a0, $a0, %pc_lo12(sym) +// pcalau12i $a0, %pc_hi20(sym) | %ld_pc_hi20(sym) | %gd_pc_hi20(sym) +// addi.w/d $a0, $a0, %pc_lo12(sym) | %got_pc_lo12(sym) | %got_pc_lo12(sym) // To: -// pcaddi $a0, %pc_lo12(sym) +// pcaddi $a0, %pc_lo12(sym) | %got_pc_lo12(sym) | %got_pc_lo12(sym) // // From: // pcalau12i $a0, %got_pc_hi20(sym_got) @@ -778,6 +778,10 @@ static void relaxPCHi20Lo12(Ctx &ctx, const InputSection &sec, size_t i, if (!((rHi20.type == R_LARCH_PCALA_HI20 && rLo12.type == R_LARCH_PCALA_LO12) || (rHi20.type == R_LARCH_GOT_PC_HI20 && + rLo12.type == R_LARCH_GOT_PC_LO12) || +(rHi20.type == R_LARCH_TLS_GD_PC_HI20 && + rLo12.type == R_LARCH_GOT_PC_LO12) || +(rHi20.type == R_LARCH_TLS_LD_PC_HI20 && rLo12.type == R_LARCH_GOT_PC_LO12))) return; @@ -798,6 +802,8 @@ static void relaxPCHi20Lo12(Ctx &ctx, const InputSection &sec, size_t i, else if (rHi20.expr == RE_LOONGARCH_PAGE_PC || rHi20.expr == RE_LOONGARCH_GOT_PAGE_PC) symBase = rHi20.sym->getVA(ctx); + else if (rHi20.expr == RE_LOONGARCH_TLSGD_PAGE_PC) +symBase = ctx.in.got->getGlobalDynAddr(*rHi20.sym); else { Err(ctx) << getErrorLoc(ctx, (const uint8_t *)loc) << "unknown expr (" << rHi20.expr << ") against symbol " << rHi20.sym @@ -827,7 +833,12 @@ static void relaxPCHi20Lo12(Ctx &ctx, const InputSection &sec, size_t i, return; sec.relaxAux->relocTypes[i] = R_LARCH_RELAX; - sec.relaxAux->relocTypes[i + 2] = R_LARCH_PCREL20_S2; + if (rHi20.type == R_LARCH_TLS_GD_PC_HI20) +sec.relaxAux->relocTypes[i + 2] = R_LARCH_TLS_GD_PCREL20_S2; + else if (rHi20.type == R_LARCH_TLS_LD_PC_HI20) +sec.relaxAux->relocTypes[i + 2] = R_LARCH_TLS_LD_PCREL20_S2; + else +sec.relaxAux->relocTypes[i + 2] = R_LARCH_PCREL20_S2; sec.relaxAux->writes.push_back(insn(PCADDI, getD5(nextInsn), 0, 0)); remove = 4; } @@ -863,6 +874,35 @@ static void relaxCall36(Ctx &ctx, const InputSection &sec, size_t i, } } +// Relax code sequence. +// From: +// lu12i.w $rd, %le_hi20_r(sym) +// add.w/d $rd, $rd, $tp, %le_add_r(sym) +// addi/ld/st.w/d $rd, $rd, %le_lo12_r(sym) +// To: +// addi/ld/st.w/d $rd, $tp, %le_lo12_r(sym) +static void relaxTlsLe(Ctx &ctx, const InputSection &sec, size_t i, + uint64_t loc, Relocation &r, uint32_t &remove) { + uint64_t val
[llvm-branch-commits] [lld] [lld][LoongArch] Relax call36/tail36: R_LARCH_CALL36 (PR #123576)
https://github.com/ylzsx created https://github.com/llvm/llvm-project/pull/123576 Instructions with relocation `R_LARCH_CALL36` may be relax as follows: ``` From: pcaddu18i $dest, %call36(foo) R_LARCH_CALL36, R_LARCH_RELAX jirl $r, $dest, 0 To: b/bl foo # bl if r=$ra, b if r=$zero R_LARCH_B26 ``` >From f1f995b5fc8e90126b5825d52b9c75cd45d27cfc Mon Sep 17 00:00:00 2001 From: yangzhaoxin Date: Thu, 26 Dec 2024 11:32:33 +0800 Subject: [PATCH 1/3] Relax call36/tail36. Instructions with relocation `R_LARCH_CALL36` may be relax as follows: ``` From: pcaddu18i $dest, %call36(foo) R_LARCH_CALL36, R_LARCH_RELAX jirl $r, $dest, 0 To: b/bl foo # bl if r=$ra, b if r=$zero R_LARCH_B26 ``` --- lld/ELF/Arch/LoongArch.cpp | 41 ++ 1 file changed, 41 insertions(+) diff --git a/lld/ELF/Arch/LoongArch.cpp b/lld/ELF/Arch/LoongArch.cpp index b999e7fd27ae9d..0aa0cf5b657a0f 100644 --- a/lld/ELF/Arch/LoongArch.cpp +++ b/lld/ELF/Arch/LoongArch.cpp @@ -58,6 +58,8 @@ enum Op { LD_W = 0x2880, LD_D = 0x28c0, JIRL = 0x4c00, + B = 0x5000, + BL = 0x5400, }; enum Reg { @@ -830,6 +832,37 @@ static void relaxPCHi20Lo12(Ctx &ctx, const InputSection &sec, size_t i, remove = 4; } +// Relax code sequence. +// From: +// pcaddu18i $ra, %call36(foo) +// jirl $ra, $ra, 0 +// To: +// b/bl foo +static void relaxCall36(Ctx &ctx, const InputSection &sec, size_t i, +uint64_t loc, Relocation &r, uint32_t &remove) { + const uint64_t symLocal = + (r.expr == R_PLT_PC ? r.sym->getPltVA(ctx) : r.sym->getVA(ctx)) + + r.addend; + + const int64_t distance = symLocal - loc; + // Check if the distance aligns 4 bytes or exceeds the range of b[l]. + if ((distance & 0x3) != 0 || !isInt<28>(distance)) +return; + + const uint32_t nextInsn = read32le(sec.content().data() + r.offset + 4); + if (getD5(nextInsn) == R_RA) { +// convert jirl to bl +sec.relaxAux->relocTypes[i] = R_LARCH_B26; +sec.relaxAux->writes.push_back(insn(BL, 0, 0, 0)); +remove = 4; + } else if (getD5(nextInsn) == R_ZERO) { +// convert jirl to b +sec.relaxAux->relocTypes[i] = R_LARCH_B26; +sec.relaxAux->writes.push_back(insn(B, 0, 0, 0)); +remove = 4; + } +} + static bool relax(Ctx &ctx, InputSection &sec) { const uint64_t secAddr = sec.getVA(); const MutableArrayRef relocs = sec.relocs(); @@ -874,6 +907,10 @@ static bool relax(Ctx &ctx, InputSection &sec) { if (isPairRelaxable(relocs, i)) relaxPCHi20Lo12(ctx, sec, i, loc, r, relocs[i + 2], remove); break; +case R_LARCH_CALL36: + if (relaxable(relocs, i)) +relaxCall36(ctx, sec, i, loc, r, remove); + break; } // For all anchors whose offsets are <= r.offset, they are preceded by @@ -977,6 +1014,10 @@ void LoongArch::finalizeRelax(int passes) const { // RelExpr is needed for relocating. r.expr = r.sym->hasFlag(NEEDS_PLT) ? R_PLT_PC : R_PC; break; + case R_LARCH_B26: +skip = 4; +write32le(p, aux.writes[writesIdx++]); +break; default: llvm_unreachable("unsupported type"); } >From f227ae532236e20148a872c811721a8de4e16318 Mon Sep 17 00:00:00 2001 From: yangzhaoxin Date: Fri, 27 Dec 2024 14:37:40 +0800 Subject: [PATCH 2/3] modify test for call36/tail36. --- lld/test/ELF/loongarch-relax-call36-2.s | 63 + lld/test/ELF/loongarch-relax-call36.s| 135 +++ lld/test/ELF/loongarch-relax-emit-relocs-2.s | 61 + 3 files changed, 259 insertions(+) create mode 100644 lld/test/ELF/loongarch-relax-call36-2.s create mode 100644 lld/test/ELF/loongarch-relax-call36.s create mode 100644 lld/test/ELF/loongarch-relax-emit-relocs-2.s diff --git a/lld/test/ELF/loongarch-relax-call36-2.s b/lld/test/ELF/loongarch-relax-call36-2.s new file mode 100644 index 00..1c216a9bdc35ed --- /dev/null +++ b/lld/test/ELF/loongarch-relax-call36-2.s @@ -0,0 +1,63 @@ +# REQUIRES: loongarch +# RUN: rm -rf %t && split-file %s %t && cd %t +# RUN: llvm-mc -filetype=obj -triple=loongarch64 -mattr=+relax a.s -o a.o + +# RUN: ld.lld -T lds a.o -o a +# RUN: llvm-objdump -d --no-show-raw-insn a | FileCheck %s --check-prefixes=RELAX,RELAX-MID + +## Unsure whether this needs a diagnostic. GNU ld allows this. +# RUN: ld.lld -T lds -pie a.o -o a.pie +# RUN: llvm-objdump -d --no-show-raw-insn a.pie | FileCheck %s --check-prefixes=RELAX,RELAX-MID + +# RUN: ld.lld -T lds -pie -z notext -z ifunc-noplt a.o -o a.ifunc-noplt +# RUN: llvm-objdump -d --no-show-raw-insn a.ifunc-noplt | FileCheck %s --check-prefixes=RELAX,NORELAX-MID + +# RELAX-LABEL: <_start>: +## offset = 0x1000 - 0x800 = 0x800(134217728), hi=512, lo18=0 +# RELAX-NEXT:800: pcaddu18i $ra, 512 +# RELAX-NEXT: jirl $ra, $ra, 0 +# RELAX-NEXT: bl 134217720 +
[llvm-branch-commits] [lld] [lld][LoongArch] Support relaxation during TLSDESC GD/LD to IE/LE conversion. (PR #123730)
https://github.com/ylzsx updated https://github.com/llvm/llvm-project/pull/123730 >From 187759562d861034a79cd8c4ee4ab063bba5f4ff Mon Sep 17 00:00:00 2001 From: yangzhaoxin Date: Sat, 4 Jan 2025 15:03:47 +0800 Subject: [PATCH 1/2] Support relaxation during TLSDESC GD/LD to IE/LE conversion. Complement https://. When relaxation enable, remove redundant NOPs. --- lld/ELF/Arch/LoongArch.cpp | 32 +--- 1 file changed, 29 insertions(+), 3 deletions(-) diff --git a/lld/ELF/Arch/LoongArch.cpp b/lld/ELF/Arch/LoongArch.cpp index 37614c3e9615d6..5f49b23e8ffb1a 100644 --- a/lld/ELF/Arch/LoongArch.cpp +++ b/lld/ELF/Arch/LoongArch.cpp @@ -965,10 +965,16 @@ static bool relax(Ctx &ctx, InputSection &sec) { case R_LARCH_GOT_PC_HI20: case R_LARCH_TLS_GD_PC_HI20: case R_LARCH_TLS_LD_PC_HI20: -case R_LARCH_TLS_DESC_PC_HI20: // The overflow check for i+2 will be carried out in isPairRelaxable. - if (r.expr != RE_LOONGARCH_RELAX_TLS_GD_TO_IE_PAGE_PC && - r.expr != R_RELAX_TLS_GD_TO_LE && isPairRelaxable(relocs, i)) + if (isPairRelaxable(relocs, i)) +relaxPCHi20Lo12(ctx, sec, i, loc, r, relocs[i + 2], remove); + break; +case R_LARCH_TLS_DESC_PC_HI20: + if (r.expr == RE_LOONGARCH_RELAX_TLS_GD_TO_IE_PAGE_PC || + r.expr == R_RELAX_TLS_GD_TO_LE) { +if (relaxable(relocs, i)) + remove = 4; + } else if (isPairRelaxable(relocs, i)) relaxPCHi20Lo12(ctx, sec, i, loc, r, relocs[i + 2], remove); break; case R_LARCH_CALL36: @@ -986,6 +992,17 @@ static bool relax(Ctx &ctx, InputSection &sec) { isUInt<12>(r.sym->getVA(ctx, r.addend))) remove = 4; break; +case R_LARCH_TLS_DESC_PC_LO12: + if (relaxable(relocs, i) && + (r.expr == RE_LOONGARCH_RELAX_TLS_GD_TO_IE_PAGE_PC || + r.expr == R_RELAX_TLS_GD_TO_LE)) +remove = 4; + break; +case R_LARCH_TLS_DESC_LD: + if (relaxable(relocs, i) && r.expr == R_RELAX_TLS_GD_TO_LE && + isUInt<12>(r.sym->getVA(ctx, r.addend))) +remove = 4; + break; } // For all anchors whose offsets are <= r.offset, they are preceded by @@ -1214,6 +1231,10 @@ void LoongArch::relocateAlloc(InputSectionBase &sec, uint8_t *buf) const { bits); relocateNoSym(loc, rel.type, val); } else { +isRelax = relaxable(relocs, i); +if (isRelax && (rel.type == R_LARCH_TLS_DESC_PC_HI20 || +rel.type == R_LARCH_TLS_DESC_PC_LO12)) + continue; tlsdescToIe(loc, rel, val); } continue; @@ -1230,6 +1251,11 @@ void LoongArch::relocateAlloc(InputSectionBase &sec, uint8_t *buf) const { bits); relocateNoSym(loc, rel.type, val); } else { +isRelax = relaxable(relocs, i); +if (isRelax && (rel.type == R_LARCH_TLS_DESC_PC_HI20 || +rel.type == R_LARCH_TLS_DESC_PC_LO12 || +(rel.type == R_LARCH_TLS_DESC_LD && isUInt<12>(val + continue; tlsdescToLe(loc, rel, val); } continue; >From e024b7c5a85100627aa64a47dfc46221e709f400 Mon Sep 17 00:00:00 2001 From: yangzhaoxin Date: Sat, 4 Jan 2025 15:30:55 +0800 Subject: [PATCH 2/2] Modify loongarch-relax-tlsdesc.s. --- lld/test/ELF/loongarch-relax-tlsdesc.s | 45 +- 1 file changed, 16 insertions(+), 29 deletions(-) diff --git a/lld/test/ELF/loongarch-relax-tlsdesc.s b/lld/test/ELF/loongarch-relax-tlsdesc.s index 5e538985d1402c..666ca6bd1e7243 100644 --- a/lld/test/ELF/loongarch-relax-tlsdesc.s +++ b/lld/test/ELF/loongarch-relax-tlsdesc.s @@ -9,7 +9,6 @@ # RUN: llvm-readobj -r -x .got a.64.so | FileCheck --check-prefix=GD64-RELA %s # RUN: llvm-objdump --no-show-raw-insn -dr -h a.64.so | FileCheck %s --check-prefix=GD64 -## FIXME: IE/LE relaxation have not yet been implemented, --relax/--no-relax obtain the same results. ## Transition from TLSDESC to IE/LE. Also check --emit-relocs. # RUN: ld.lld --relax -e 0 -z now --emit-relocs a.64.o c.64.o -o a.64.le # RUN: llvm-readobj -r -x .got a.64.le 2>&1 | FileCheck --check-prefix=LE64-RELA %s @@ -73,25 +72,21 @@ # LE64-RELA: could not find section '.got' ## a@tprel = 0x8 -# LE64:20158: nop +# LE64:20158: ori $a0, $zero, 8 # LE64-NEXT:R_LARCH_TLS_DESC_PC_HI20 a # LE64-NEXT:R_LARCH_RELAX *ABS* -# LE64-NEXT: nop # LE64-NEXT:R_LARCH_TLS_DESC_PC_LO12 a # LE64-NEXT:R_LARCH_RELAX *ABS* -# LE64-NEXT: nop # LE64-NEXT:R_LARCH_TLS_DESC_LD a # LE64-NEXT:R_LARCH_RELAX *ABS* -# LE64-NEXT: ori $a0, $zero, 8 # LE64-NEXT:R_LARCH_TLS_DESC_CALL a # LE64-NEXT:R_LARCH_RELAX *ABS* # LE64-NEXT: add.d $a1, $a0, $tp ## b@tprel = 0x7ff -# LE64:2016c: nop +# LE64:20160:
[llvm-branch-commits] [lld] [lld][LoongArch] GOT indirection to PC relative optimization. (PR #123743)
https://github.com/ylzsx created https://github.com/llvm/llvm-project/pull/123743 In LoongArch, this optimization is only supported when relaxation is enabled. From: * pcalau12i $a0, %got_pc_hi20(sym_got) * ld.w/d $a0, $a0, %got_pc_lo12(sym_got) To: * pcalau12i $a0, %pc_hi20(sym) * addi.w/d $a0, $a0, %pc_lo12(sym) If the original code sequence can be relaxed into a single instruction `pcaddi`, this patch will not be taken (see https://github.com/llvm/llvm-project/pull/123566). The implementation related to `got` is split into two locations because the `relax()` function is part of an iteration fixed-point algorithm. We should minimize it to achieve better linker performance. FIXME: Althouth the optimization has been performed, the GOT entries still exists, similarly to AArch64. Eliminating the entries may be require additional marking in the common code. >From 6e05f360c33f6a1999032d104d020afa5191c21c Mon Sep 17 00:00:00 2001 From: yangzhaoxin Date: Tue, 14 Jan 2025 15:50:49 +0800 Subject: [PATCH 1/2] [lld][LoongArch] GOT indirection to PC relative optimization. In LoongArch, this optimization is only supported when relaxation is enabled. From: * pcalau12i $a0, %got_pc_hi20(sym_got) * ld.w/d $a0, $a0, %got_pc_lo12(sym_got) To: * pcalau12i $a0, %pc_hi20(sym) * addi.w/d $a0, $a0, %pc_lo12(sym) If the original code sequence can be relaxed into a single instruction `pcaddi`, this patch will not be taken (see https://). The implementation related to `got` is split into two locations because the `relax()` function is part of an iteration fixed-point algorithm. We should minimize it to achieve better linker performance. FIXME: Althouth the optimization has been performed, the GOT entries still exists, similarly to AArch64. Eliminating the entries may be require additional marking in the common code. --- lld/ELF/Arch/LoongArch.cpp | 66 + lld/test/ELF/loongarch-relax-pc-hi20-lo12.s | 10 ++-- 2 files changed, 72 insertions(+), 4 deletions(-) diff --git a/lld/ELF/Arch/LoongArch.cpp b/lld/ELF/Arch/LoongArch.cpp index 5f49b23e8ffb1a..bd98831fba5257 100644 --- a/lld/ELF/Arch/LoongArch.cpp +++ b/lld/ELF/Arch/LoongArch.cpp @@ -47,6 +47,8 @@ class LoongArch final : public TargetInfo { void tlsIeToLe(uint8_t *loc, const Relocation &rel, uint64_t val) const; void tlsdescToIe(uint8_t *loc, const Relocation &rel, uint64_t val) const; void tlsdescToLe(uint8_t *loc, const Relocation &rel, uint64_t val) const; + bool tryGotToPCRel(uint8_t *loc, const Relocation &rHi20, + const Relocation &rLo12, uint64_t secAddr) const; }; } // end anonymous namespace @@ -1150,6 +1152,54 @@ void LoongArch::tlsdescToLe(uint8_t *loc, const Relocation &rel, } } +// Try GOT indirection to PC relative optimization when relaxation is enabled. +// From: +// * pcalau12i $a0, %got_pc_hi20(sym_got) +// * ld.w/d$a0, $a0, %got_pc_lo12(sym_got) +// To: +// * pcalau12i $a0, %pc_hi20(sym) +// * addi.w/d $a0, $a0, %pc_lo12(sym) +// +// FIXME: Althouth the optimization has been performed, the GOT entries still +// exists, similarly to AArch64. Eliminating the entries may be require +// additional marking in the common code. +bool LoongArch::tryGotToPCRel(uint8_t *loc, const Relocation &rHi20, + const Relocation &rLo12, uint64_t secAddr) const { + if (!rHi20.sym->isDefined() || rHi20.sym->isPreemptible || + rHi20.sym->isGnuIFunc() || + (ctx.arg.isPic && !cast(*rHi20.sym).section)) +return false; + + Symbol &sym = *rHi20.sym; + uint64_t symLocal = sym.getVA(ctx) + rHi20.addend; + // Check if the address difference is within +/-2GB range. + // For simplicity, the range mentioned here is an approximate estimate and is + // not fully equivalent to the entire region that PC-relative addressing can + // cover. + int64_t pageOffset = + getLoongArchPage(symLocal) - getLoongArchPage(secAddr + rHi20.offset); + if (!isInt<20>(pageOffset >> 12)) +return false; + + Relocation newRHi20 = {RE_LOONGARCH_PAGE_PC, R_LARCH_PCALA_HI20, rHi20.offset, + rHi20.addend, &sym}; + Relocation newRLo12 = {R_ABS, R_LARCH_PCALA_LO12, rLo12.offset, rLo12.addend, + &sym}; + + const uint32_t currInsn = read32le(loc); + const uint32_t nextInsn = read32le(loc + 4); + uint64_t pageDelta = + getLoongArchPageDelta(symLocal, secAddr + rHi20.offset, rHi20.type); + // pcalau12i $a0, %pc_hi20 + write32le(loc, insn(PCALAU12I, getD5(currInsn), 0, 0)); + relocate(loc, newRHi20, pageDelta); + // addi.w/d $a0, $a0, %pc_lo12 + write32le(loc + 4, insn(ctx.arg.is64 ? ADDI_D : ADDI_W, getD5(nextInsn), + getJ5(nextInsn), 0)); + relocate(loc + 4, newRLo12, SignExtend64(symLocal, 64)); + return true; +} + // During TLSDESC GD_TO_IE, the converted code sequence always includes an // instruction related to the Lo12 relocation (ld.[wd]). To obtain correct
[llvm-branch-commits] [lld] [lld][LoongArch] Support TLSDESC GD/LD to IE/LE. (PR #123715)
https://github.com/ylzsx updated https://github.com/llvm/llvm-project/pull/123715 >From dff3031fdb2ca3755b73e3b81e56f8008a409470 Mon Sep 17 00:00:00 2001 From: yangzhaoxin Date: Fri, 3 Jan 2025 14:29:17 +0800 Subject: [PATCH 1/5] [lld][LoongArch] Implement TLSDESC GD/LD to IE/LE. Support TLSDESC to initial-exec or local-exec optimizations. Introduce a new hook RE_LOONGARCH_RELAX_TLS_GD_TO_IE_PAGE_PC and use existing R_RELAX_TLS_GD_TO_IE_ABS to support TLSDESC => IE, while use existing R_RELAX_TLS_GD_TO_LE to support TLSDESC => LE. In normal or medium code model, there are two forms of code sequences: * pcalau12i $a0, %desc_pc_hi20(sym_desc) * addi.d $a0, $a0, %desc_pc_lo12(sym_desc) * ld.d $ra, $a0, %desc_ld(sym_desc) * jirl $ra, $ra, %desc_call(sym_desc) -- * pcaddi $a0, %desc_pcrel_20(sym_desc) * ld.d $ra, $a0, %desc_ld(sym_desc) * jirl $ra, $ra, %desc_call(sym_desc) The code sequence obtained is as follows: * pcalau12i $a0, %ie_pc_hi20(sym_ie) * ld.[wd] $a0, $a0, %ie_pc_lo12(sym_ie) Simplicity, whether tlsdescToIe or tlsdescToLe, we always tend to convert the preceding instructions to NOPs, due to both forms of code sequence (corresponding to relocation combinations: R_LARCH_TLS_DESC_PC_HI20+R_LARCH_TLS_DESC_PC_LO12 and R_LARCH_TLS_DESC_PCREL20_S2) have same process. FIXME: When relaxation enables, redundant NOPs can be removed. It will be implemented in a future patch. Note: All forms of TLSDESC code sequences should not appear interleaved in the normal, medium or extreme code model, which compilers do not generate and lld is unsupported. This is thanks to the guard in PostRASchedulerList.cpp in llvm. ``` Calls are not scheduling boundaries before register allocation, but post-ra we don't gain anything by scheduling across calls since we don't need to worry about register pressure. ``` --- lld/ELF/Arch/LoongArch.cpp | 146 - lld/ELF/InputSection.cpp | 1 + lld/ELF/Relocations.cpp| 38 ++ lld/ELF/Relocations.h | 1 + 4 files changed, 169 insertions(+), 17 deletions(-) diff --git a/lld/ELF/Arch/LoongArch.cpp b/lld/ELF/Arch/LoongArch.cpp index ef25e741901d93..37614c3e9615d6 100644 --- a/lld/ELF/Arch/LoongArch.cpp +++ b/lld/ELF/Arch/LoongArch.cpp @@ -39,11 +39,14 @@ class LoongArch final : public TargetInfo { void relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const override; bool relaxOnce(int pass) const override; + RelExpr adjustTlsExpr(RelType type, RelExpr expr) const override; void relocateAlloc(InputSectionBase &sec, uint8_t *buf) const override; void finalizeRelax(int passes) const override; private: void tlsIeToLe(uint8_t *loc, const Relocation &rel, uint64_t val) const; + void tlsdescToIe(uint8_t *loc, const Relocation &rel, uint64_t val) const; + void tlsdescToLe(uint8_t *loc, const Relocation &rel, uint64_t val) const; }; } // end anonymous namespace @@ -61,6 +64,7 @@ enum Op { LU12I_W = 0x1400, PCADDI = 0x1800, PCADDU12I = 0x1c00, + PCALAU12I = 0x1a00, LD_W = 0x2880, LD_D = 0x28c0, JIRL = 0x4c00, @@ -72,6 +76,7 @@ enum Reg { R_ZERO = 0, R_RA = 1, R_TP = 2, + R_A0 = 4, R_T0 = 12, R_T1 = 13, R_T2 = 14, @@ -962,7 +967,8 @@ static bool relax(Ctx &ctx, InputSection &sec) { case R_LARCH_TLS_LD_PC_HI20: case R_LARCH_TLS_DESC_PC_HI20: // The overflow check for i+2 will be carried out in isPairRelaxable. - if (isPairRelaxable(relocs, i)) + if (r.expr != RE_LOONGARCH_RELAX_TLS_GD_TO_IE_PAGE_PC && + r.expr != R_RELAX_TLS_GD_TO_LE && isPairRelaxable(relocs, i)) relaxPCHi20Lo12(ctx, sec, i, loc, r, relocs[i + 2], remove); break; case R_LARCH_CALL36: @@ -1047,6 +1053,103 @@ void LoongArch::tlsIeToLe(uint8_t *loc, const Relocation &rel, } } +// Convert TLSDESC GD/LD to IE. +// In normal or medium code model, there are two forms of code sequences: +// * pcalau12i $a0, %desc_pc_hi20(sym_desc) +// * addi.d $a0, $a0, %desc_pc_lo12(sym_desc) +// * ld.d $ra, $a0, %desc_ld(sym_desc) +// * jirl $ra, $ra, %desc_call(sym_desc) +// -- +// * pcaddi $a0, %desc_pcrel_20(a) +// * load $ra, $a0, %desc_ld(a) +// * jirl $ra, $ra, %desc_call(a) +// +// The code sequence obtained is as follows: +// * pcalau12i $a0, %ie_pc_hi20(sym_ie) +// * ld.[wd] $a0, $a0, %ie_pc_lo12(sym_ie) +// +// Simplicity, whether tlsdescToIe or tlsdescToLe, we always tend to convert the +// preceding instructions to NOPs, due to both forms of code sequence +// (corresponding to relocation combinations: +// R_LARCH_TLS_DESC_PC_HI20+R_LARCH_TLS_DESC_PC_LO12 and +// R_LARCH_TLS_DESC_PCREL20_S2) have same process. +// +// When relaxation enables, redundant NOPs can be removed. +void LoongArch::tlsdescToIe(uint8_t *loc, const Relocation &rel, +uint64_t val) const { + switch (rel.type) { +
[llvm-branch-commits] [lld] [lld][LoongArch] Support relaxation during TLSDESC GD/LD to IE/LE conversion. (PR #123730)
https://github.com/ylzsx created https://github.com/llvm/llvm-project/pull/123730 Complement https://github.com/llvm/llvm-project/pull/123715. When relaxation enable, remove redundant NOPs. >From ddb64ee49845b302df8ea50546164cceb87cf288 Mon Sep 17 00:00:00 2001 From: yangzhaoxin Date: Sat, 4 Jan 2025 15:03:47 +0800 Subject: [PATCH 1/2] Support relaxation during TLSDESC GD/LD to IE/LE conversion. Complement https://. When relaxation enable, remove redundant NOPs. --- lld/ELF/Arch/LoongArch.cpp | 32 +--- 1 file changed, 29 insertions(+), 3 deletions(-) diff --git a/lld/ELF/Arch/LoongArch.cpp b/lld/ELF/Arch/LoongArch.cpp index 37614c3e9615d6..5f49b23e8ffb1a 100644 --- a/lld/ELF/Arch/LoongArch.cpp +++ b/lld/ELF/Arch/LoongArch.cpp @@ -965,10 +965,16 @@ static bool relax(Ctx &ctx, InputSection &sec) { case R_LARCH_GOT_PC_HI20: case R_LARCH_TLS_GD_PC_HI20: case R_LARCH_TLS_LD_PC_HI20: -case R_LARCH_TLS_DESC_PC_HI20: // The overflow check for i+2 will be carried out in isPairRelaxable. - if (r.expr != RE_LOONGARCH_RELAX_TLS_GD_TO_IE_PAGE_PC && - r.expr != R_RELAX_TLS_GD_TO_LE && isPairRelaxable(relocs, i)) + if (isPairRelaxable(relocs, i)) +relaxPCHi20Lo12(ctx, sec, i, loc, r, relocs[i + 2], remove); + break; +case R_LARCH_TLS_DESC_PC_HI20: + if (r.expr == RE_LOONGARCH_RELAX_TLS_GD_TO_IE_PAGE_PC || + r.expr == R_RELAX_TLS_GD_TO_LE) { +if (relaxable(relocs, i)) + remove = 4; + } else if (isPairRelaxable(relocs, i)) relaxPCHi20Lo12(ctx, sec, i, loc, r, relocs[i + 2], remove); break; case R_LARCH_CALL36: @@ -986,6 +992,17 @@ static bool relax(Ctx &ctx, InputSection &sec) { isUInt<12>(r.sym->getVA(ctx, r.addend))) remove = 4; break; +case R_LARCH_TLS_DESC_PC_LO12: + if (relaxable(relocs, i) && + (r.expr == RE_LOONGARCH_RELAX_TLS_GD_TO_IE_PAGE_PC || + r.expr == R_RELAX_TLS_GD_TO_LE)) +remove = 4; + break; +case R_LARCH_TLS_DESC_LD: + if (relaxable(relocs, i) && r.expr == R_RELAX_TLS_GD_TO_LE && + isUInt<12>(r.sym->getVA(ctx, r.addend))) +remove = 4; + break; } // For all anchors whose offsets are <= r.offset, they are preceded by @@ -1214,6 +1231,10 @@ void LoongArch::relocateAlloc(InputSectionBase &sec, uint8_t *buf) const { bits); relocateNoSym(loc, rel.type, val); } else { +isRelax = relaxable(relocs, i); +if (isRelax && (rel.type == R_LARCH_TLS_DESC_PC_HI20 || +rel.type == R_LARCH_TLS_DESC_PC_LO12)) + continue; tlsdescToIe(loc, rel, val); } continue; @@ -1230,6 +1251,11 @@ void LoongArch::relocateAlloc(InputSectionBase &sec, uint8_t *buf) const { bits); relocateNoSym(loc, rel.type, val); } else { +isRelax = relaxable(relocs, i); +if (isRelax && (rel.type == R_LARCH_TLS_DESC_PC_HI20 || +rel.type == R_LARCH_TLS_DESC_PC_LO12 || +(rel.type == R_LARCH_TLS_DESC_LD && isUInt<12>(val + continue; tlsdescToLe(loc, rel, val); } continue; >From ad372aff2088a03136424af2c845194a79ae7221 Mon Sep 17 00:00:00 2001 From: yangzhaoxin Date: Sat, 4 Jan 2025 15:30:55 +0800 Subject: [PATCH 2/2] Modify loongarch-relax-tlsdesc.s. --- lld/test/ELF/loongarch-relax-tlsdesc.s | 45 +- 1 file changed, 16 insertions(+), 29 deletions(-) diff --git a/lld/test/ELF/loongarch-relax-tlsdesc.s b/lld/test/ELF/loongarch-relax-tlsdesc.s index 5e538985d1402c..666ca6bd1e7243 100644 --- a/lld/test/ELF/loongarch-relax-tlsdesc.s +++ b/lld/test/ELF/loongarch-relax-tlsdesc.s @@ -9,7 +9,6 @@ # RUN: llvm-readobj -r -x .got a.64.so | FileCheck --check-prefix=GD64-RELA %s # RUN: llvm-objdump --no-show-raw-insn -dr -h a.64.so | FileCheck %s --check-prefix=GD64 -## FIXME: IE/LE relaxation have not yet been implemented, --relax/--no-relax obtain the same results. ## Transition from TLSDESC to IE/LE. Also check --emit-relocs. # RUN: ld.lld --relax -e 0 -z now --emit-relocs a.64.o c.64.o -o a.64.le # RUN: llvm-readobj -r -x .got a.64.le 2>&1 | FileCheck --check-prefix=LE64-RELA %s @@ -73,25 +72,21 @@ # LE64-RELA: could not find section '.got' ## a@tprel = 0x8 -# LE64:20158: nop +# LE64:20158: ori $a0, $zero, 8 # LE64-NEXT:R_LARCH_TLS_DESC_PC_HI20 a # LE64-NEXT:R_LARCH_RELAX *ABS* -# LE64-NEXT: nop # LE64-NEXT:R_LARCH_TLS_DESC_PC_LO12 a # LE64-NEXT:R_LARCH_RELAX *ABS* -# LE64-NEXT: nop # LE64-NEXT:R_LARCH_TLS_DESC_LD a # LE64-NEXT:R_LARCH_RELAX *ABS* -# LE64-NEXT: ori $a0, $zero, 8 # LE64-NEXT:R_LARCH_TLS_DESC_CALL a # LE64-NEXT:R_LARCH_RELAX *ABS* # LE64
[llvm-branch-commits] [lld] [lld][LoongArch] Support TLSDESC GD/LD to IE/LE. (PR #123715)
https://github.com/ylzsx updated https://github.com/llvm/llvm-project/pull/123715 >From dff3031fdb2ca3755b73e3b81e56f8008a409470 Mon Sep 17 00:00:00 2001 From: yangzhaoxin Date: Fri, 3 Jan 2025 14:29:17 +0800 Subject: [PATCH 1/7] [lld][LoongArch] Implement TLSDESC GD/LD to IE/LE. Support TLSDESC to initial-exec or local-exec optimizations. Introduce a new hook RE_LOONGARCH_RELAX_TLS_GD_TO_IE_PAGE_PC and use existing R_RELAX_TLS_GD_TO_IE_ABS to support TLSDESC => IE, while use existing R_RELAX_TLS_GD_TO_LE to support TLSDESC => LE. In normal or medium code model, there are two forms of code sequences: * pcalau12i $a0, %desc_pc_hi20(sym_desc) * addi.d $a0, $a0, %desc_pc_lo12(sym_desc) * ld.d $ra, $a0, %desc_ld(sym_desc) * jirl $ra, $ra, %desc_call(sym_desc) -- * pcaddi $a0, %desc_pcrel_20(sym_desc) * ld.d $ra, $a0, %desc_ld(sym_desc) * jirl $ra, $ra, %desc_call(sym_desc) The code sequence obtained is as follows: * pcalau12i $a0, %ie_pc_hi20(sym_ie) * ld.[wd] $a0, $a0, %ie_pc_lo12(sym_ie) Simplicity, whether tlsdescToIe or tlsdescToLe, we always tend to convert the preceding instructions to NOPs, due to both forms of code sequence (corresponding to relocation combinations: R_LARCH_TLS_DESC_PC_HI20+R_LARCH_TLS_DESC_PC_LO12 and R_LARCH_TLS_DESC_PCREL20_S2) have same process. FIXME: When relaxation enables, redundant NOPs can be removed. It will be implemented in a future patch. Note: All forms of TLSDESC code sequences should not appear interleaved in the normal, medium or extreme code model, which compilers do not generate and lld is unsupported. This is thanks to the guard in PostRASchedulerList.cpp in llvm. ``` Calls are not scheduling boundaries before register allocation, but post-ra we don't gain anything by scheduling across calls since we don't need to worry about register pressure. ``` --- lld/ELF/Arch/LoongArch.cpp | 146 - lld/ELF/InputSection.cpp | 1 + lld/ELF/Relocations.cpp| 38 ++ lld/ELF/Relocations.h | 1 + 4 files changed, 169 insertions(+), 17 deletions(-) diff --git a/lld/ELF/Arch/LoongArch.cpp b/lld/ELF/Arch/LoongArch.cpp index ef25e741901d93..37614c3e9615d6 100644 --- a/lld/ELF/Arch/LoongArch.cpp +++ b/lld/ELF/Arch/LoongArch.cpp @@ -39,11 +39,14 @@ class LoongArch final : public TargetInfo { void relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const override; bool relaxOnce(int pass) const override; + RelExpr adjustTlsExpr(RelType type, RelExpr expr) const override; void relocateAlloc(InputSectionBase &sec, uint8_t *buf) const override; void finalizeRelax(int passes) const override; private: void tlsIeToLe(uint8_t *loc, const Relocation &rel, uint64_t val) const; + void tlsdescToIe(uint8_t *loc, const Relocation &rel, uint64_t val) const; + void tlsdescToLe(uint8_t *loc, const Relocation &rel, uint64_t val) const; }; } // end anonymous namespace @@ -61,6 +64,7 @@ enum Op { LU12I_W = 0x1400, PCADDI = 0x1800, PCADDU12I = 0x1c00, + PCALAU12I = 0x1a00, LD_W = 0x2880, LD_D = 0x28c0, JIRL = 0x4c00, @@ -72,6 +76,7 @@ enum Reg { R_ZERO = 0, R_RA = 1, R_TP = 2, + R_A0 = 4, R_T0 = 12, R_T1 = 13, R_T2 = 14, @@ -962,7 +967,8 @@ static bool relax(Ctx &ctx, InputSection &sec) { case R_LARCH_TLS_LD_PC_HI20: case R_LARCH_TLS_DESC_PC_HI20: // The overflow check for i+2 will be carried out in isPairRelaxable. - if (isPairRelaxable(relocs, i)) + if (r.expr != RE_LOONGARCH_RELAX_TLS_GD_TO_IE_PAGE_PC && + r.expr != R_RELAX_TLS_GD_TO_LE && isPairRelaxable(relocs, i)) relaxPCHi20Lo12(ctx, sec, i, loc, r, relocs[i + 2], remove); break; case R_LARCH_CALL36: @@ -1047,6 +1053,103 @@ void LoongArch::tlsIeToLe(uint8_t *loc, const Relocation &rel, } } +// Convert TLSDESC GD/LD to IE. +// In normal or medium code model, there are two forms of code sequences: +// * pcalau12i $a0, %desc_pc_hi20(sym_desc) +// * addi.d $a0, $a0, %desc_pc_lo12(sym_desc) +// * ld.d $ra, $a0, %desc_ld(sym_desc) +// * jirl $ra, $ra, %desc_call(sym_desc) +// -- +// * pcaddi $a0, %desc_pcrel_20(a) +// * load $ra, $a0, %desc_ld(a) +// * jirl $ra, $ra, %desc_call(a) +// +// The code sequence obtained is as follows: +// * pcalau12i $a0, %ie_pc_hi20(sym_ie) +// * ld.[wd] $a0, $a0, %ie_pc_lo12(sym_ie) +// +// Simplicity, whether tlsdescToIe or tlsdescToLe, we always tend to convert the +// preceding instructions to NOPs, due to both forms of code sequence +// (corresponding to relocation combinations: +// R_LARCH_TLS_DESC_PC_HI20+R_LARCH_TLS_DESC_PC_LO12 and +// R_LARCH_TLS_DESC_PCREL20_S2) have same process. +// +// When relaxation enables, redundant NOPs can be removed. +void LoongArch::tlsdescToIe(uint8_t *loc, const Relocation &rel, +uint64_t val) const { + switch (rel.type) { +
[llvm-branch-commits] [lld] [lld][LoongArch] Support relaxation during IE to LE conversion (PR #123702)
https://github.com/ylzsx edited https://github.com/llvm/llvm-project/pull/123702 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [lld] [lld][LoongArch] Support TLSDESC GD/LD to IE/LE (PR #123715)
https://github.com/ylzsx edited https://github.com/llvm/llvm-project/pull/123715 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [lld] [lld][LoongArch] Support relaxation during TLSDESC GD/LD to IE/LE conversion (PR #123730)
https://github.com/ylzsx edited https://github.com/llvm/llvm-project/pull/123730 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [lld] [lld][LoongArch] Convert TLS IE to LE in the normal or medium code model (PR #123680)
https://github.com/ylzsx edited https://github.com/llvm/llvm-project/pull/123680 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [lld] [lld][LoongArch] GOT indirection to PC relative optimization. (PR #123743)
@@ -1150,6 +1152,58 @@ void LoongArch::tlsdescToLe(uint8_t *loc, const Relocation &rel, } } +// Try GOT indirection to PC relative optimization when relaxation is enabled. +// From: +// * pcalau12i $a0, %got_pc_hi20(sym_got) +// * ld.w/d$a0, $a0, %got_pc_lo12(sym_got) +// To: +// * pcalau12i $a0, %pc_hi20(sym) +// * addi.w/d $a0, $a0, %pc_lo12(sym) +// +// FIXME: Althouth the optimization has been performed, the GOT entries still +// exists, similarly to AArch64. Eliminating the entries may be require +// additional marking in the common code. +bool LoongArch::tryGotToPCRel(uint8_t *loc, const Relocation &rHi20, + const Relocation &rLo12, uint64_t secAddr) const { + if (!rHi20.sym->isDefined() || rHi20.sym->isPreemptible || ylzsx wrote: Thanks. I have added this test in a previous patch(https://github.com/llvm/llvm-project/pull/123566). https://github.com/llvm/llvm-project/pull/123743 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [lld] [lld][LoongArch] GOT indirection to PC relative optimization. (PR #123743)
@@ -1150,6 +1152,58 @@ void LoongArch::tlsdescToLe(uint8_t *loc, const Relocation &rel, } } +// Try GOT indirection to PC relative optimization when relaxation is enabled. +// From: +// * pcalau12i $a0, %got_pc_hi20(sym_got) +// * ld.w/d$a0, $a0, %got_pc_lo12(sym_got) +// To: +// * pcalau12i $a0, %pc_hi20(sym) +// * addi.w/d $a0, $a0, %pc_lo12(sym) +// +// FIXME: Althouth the optimization has been performed, the GOT entries still ylzsx wrote: Thanks, I will change it as follows: ``` Note: Althouth the optimization has been performed, the GOT entries still exists, similarly to AArch64. Eliminating the entries will increase code complexity. ``` https://github.com/llvm/llvm-project/pull/123743 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [lld] [lld][LoongArch] Relax TLSDESC code sequence (PR #123677)
https://github.com/ylzsx edited https://github.com/llvm/llvm-project/pull/123677 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [lld] [lld][LoongArch] Relax TLS LE/GD/LD (PR #123600)
https://github.com/ylzsx edited https://github.com/llvm/llvm-project/pull/123600 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [lld] [lld][LoongArch] GOT indirection to PC relative optimization (PR #123743)
https://github.com/ylzsx edited https://github.com/llvm/llvm-project/pull/123743 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [lld] [lld][LoongArch] Implement TLSDESC GD/LD to IE/LE. (PR #121120)
https://github.com/ylzsx created https://github.com/llvm/llvm-project/pull/121120 Support TLSDESC to initial-exec or local-exec optimizations. Introduce a new hook RE_LOONGARCH_RELAX_TLS_GD_TO_IE_PAGE_PC and use existing R_RELAX_TLS_GD_TO_IE_ABS to support TLSDESC => IE, while use existing R_RELAX_TLS_GD_TO_LE to support TLSDESC => LE. Different code models correspond to different TLSDESC code sequences. In normal or medium code model, there are two forms of code sequences: ``` pcalau12i $a0, %desc_pc_hi20(sym_desc) addi.d $a0, $a0, %desc_pc_lo12(sym_desc) ld.d $ra, $a0, %desc_ld(sym_desc) jirl $ra, $ra, %desc_call(sym_desc) --- pcaddi $a0, %desc_pcrel_20(sym_desc) ld.d $ra, $a0, %desc_ld(sym_desc) jirl $ra, $ra, %desc_call(sym_desc) ``` In extreme code model, there is only one: ``` pcalau12i $a0, %desc_pc_hi20(sym_desc_large) addi.d $a1, $zero, %desc_pc_lo12(sym_desc_large) lu32i.d$a1, %desc64_pc_lo20(sym_desc_large) lu52i.d$a1, $a1, %desc64_pc_hi12(sym_desc_large) add.d $a0, $a0, $a1 ld.d $ra, $a0, %desc_ld(sym_desc_large) jirl $ra, $ra, %desc_call(sym_desc_large) ``` Simplicity, we tend to convert the preceding instructions to NOP in normal or medium code model, due to both forms of code sequence (corresponding to relocation combinations: `R_LARCH_TLS_DESC_PC_HI20+R_LARCH_TLS_DESC_PC_LO12` and `R_LARCH_TLS_DESC_PCREL20_S2`) have same process. However, for the extreme code model, IE optimization requires a temporary register $a1, which exists in the original preceding code sequence. To avoid additional bookkeeping, in the exterme code model, we convert the last two instructions to NOP. Fortunately, the extreme code model only has one form (donot use `R_LARCH_TLS_DESC_PCREL20_S2` relocation), and it makes this conversion strategy feasible. >From 30939363f35f367e00bdde74d273c7db9ef76e0b Mon Sep 17 00:00:00 2001 From: yangzhaoxin Date: Tue, 24 Dec 2024 22:39:15 +0800 Subject: [PATCH 1/2] [lld][LoongArch] Implement TLSDESC GD/LD to IE/LE. Support TLSDESC to initial-exec or local-exec optimizations. Introduce a new hook RE_LOONGARCH_RELAX_TLS_GD_TO_IE_PAGE_PC and use existing R_RELAX_TLS_GD_TO_IE_ABS to support TLSDESC => IE, while use existing R_RELAX_TLS_GD_TO_LE to support TLSDESC => LE. Different code models correspond to different TLSDESC code sequences. In normal or medium code model, there are two forms of code sequences: ``` pcalau12i $a0, %desc_pc_hi20(sym_desc) addi.d $a0, $a0, %desc_pc_lo12(sym_desc) ld.d $ra, $a0, %desc_ld(sym_desc) jirl $ra, $ra, %desc_call(sym_desc) --- pcaddi $a0, %desc_pcrel_20(sym_desc) ld.d $ra, $a0, %desc_ld(sym_desc) jirl $ra, $ra, %desc_call(sym_desc) ``` In extreme code model, there is only one: ``` pcalau12i $a0, %desc_pc_hi20(sym_desc_large) addi.d $a1, $zero, %desc_pc_lo12(sym_desc_large) lu32i.d$a1, %desc64_pc_lo20(sym_desc_large) lu52i.d$a1, $a1, %desc64_pc_hi12(sym_desc_large) add.d $a0, $a0, $a1 ld.d $ra, $a0, %desc_ld(sym_desc_large) jirl $ra, $ra, %desc_call(sym_desc_large) ``` Simplicity, we tend to convert the preceding instructions to NOP in normal or medium code model, due to both forms of code sequence (corresponding to relocation combinations: `R_LARCH_TLS_DESC_PC_HI20+R_LARCH_TLS_DESC_PC_LO12` and `R_LARCH_TLS_DESC_PCREL20_S2`) have same process. However, for the extreme code model, IE optimization requires a temporary register $a1, which exists in the original preceding code sequence. To avoid additional bookkeeping, in the exterme code model, we convert the last two instructions to NOP. Fortunately, the extreme code model only has one form (donot use `R_LARCH_TLS_DESC_PCREL20_S2` relocation), and it makes this conversion strategy feasible. --- lld/ELF/Arch/LoongArch.cpp | 268 + lld/ELF/InputSection.cpp | 1 + lld/ELF/Relocations.cpp| 21 +-- lld/ELF/Relocations.h | 1 + 4 files changed, 276 insertions(+), 15 deletions(-) diff --git a/lld/ELF/Arch/LoongArch.cpp b/lld/ELF/Arch/LoongArch.cpp index 3280c34cb6ed05..6119bda8200a77 100644 --- a/lld/ELF/Arch/LoongArch.cpp +++ b/lld/ELF/Arch/LoongArch.cpp @@ -39,7 +39,15 @@ class LoongArch final : public TargetInfo { void relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const override; bool relaxOnce(int pass) const override; + RelExpr adjustTlsExpr(RelType type, RelExpr expr) const override; + void relocateAlloc(InputSectionBase &sec, uint8_t *buf) const override; void finalizeRelax(int passes) const override; + +private: + void tlsdescToIe(uint8_t *loc, const Relocation &rel, uint64_t val, + bool isExtreme) const; + void tlsdescToLe(uint8_t *loc, const Relocation &rel, uint64_t val, + bool isExtreme) const; }; } // end anonymous namespace @@ -53,9 +61,15 @@ enum Op { ADDI_W = 0x0280, ADDI_D = 0x02c0, ANDI = 0
[llvm-branch-commits] [lld] [lld][LoongArch] Relax TLS LE/GD/LD (PR #123600)
https://github.com/ylzsx updated https://github.com/llvm/llvm-project/pull/123600 >From 7993434e2973437b010034051003f8c03d8eff71 Mon Sep 17 00:00:00 2001 From: yangzhaoxin Date: Fri, 27 Dec 2024 19:29:32 +0800 Subject: [PATCH 1/6] Relax TLS LE/GD/LD. In local-exec form, the code sequence is converted as follows: ``` From: lu12i.w $rd, %le_hi20_r(sym) R_LARCH_TLS_LE_HI20_R, R_LARCH_RELAX add.w/d $rd, $rd, $tp, %le_add_r(sym) R_LARCH_TLS_LE_ADD_R, R_LARCH_RELAX addi/ld/st.w/d $rd, $rd, %le_lo12_r(sym) R_LARCH_TLS_LE_LO12_R, R_LARCH_RELAX To: addi/ld/st.w/d $rd, $tp, %le_lo12_r(sym) R_LARCH_TLS_LE_LO12_R ``` In global-dynamic or local-dynamic, the code sequence is converted as follows: ``` From: pcalau12i $a0, %ld_pc_hi20(sym) | %gd_pc_hi20(sym) R_LARCH_TLS_GD_PC_HI20 | R_LARCH_TLS_LD_PC_HI20, R_LARCH_RELAX addi.w/d $a0, $a0, %got_pc_lo12(sym) | %got_pc_lo12(sym) R_LARCH_GOT_PC_LO12, R_LARCH_RELAX To: pcaddi$a0, %got_pc_lo12(sym) | %got_pc_lo12(sym) R_LARCH_TLS_GD_PCREL20_S2 | R_LARCH_TLS_LD_PCREL20_S2 ``` Note: For initial-exec form, since it involves the conversion from IE to LE, we will implement it in a future patch. --- lld/ELF/Arch/LoongArch.cpp | 68 +++--- 1 file changed, 64 insertions(+), 4 deletions(-) diff --git a/lld/ELF/Arch/LoongArch.cpp b/lld/ELF/Arch/LoongArch.cpp index 0aa0cf5b657a0..2d6d86d2ca63b 100644 --- a/lld/ELF/Arch/LoongArch.cpp +++ b/lld/ELF/Arch/LoongArch.cpp @@ -761,10 +761,10 @@ static bool isPairRelaxable(ArrayRef relocs, size_t i) { // Relax code sequence. // From: -// pcalau12i $a0, %pc_hi20(sym) -// addi.w/d $a0, $a0, %pc_lo12(sym) +// pcalau12i $a0, %pc_hi20(sym) | %ld_pc_hi20(sym) | %gd_pc_hi20(sym) +// addi.w/d $a0, $a0, %pc_lo12(sym) | %got_pc_lo12(sym) | %got_pc_lo12(sym) // To: -// pcaddi $a0, %pc_lo12(sym) +// pcaddi $a0, %pc_lo12(sym) | %got_pc_lo12(sym) | %got_pc_lo12(sym) // // From: // pcalau12i $a0, %got_pc_hi20(sym_got) @@ -778,6 +778,10 @@ static void relaxPCHi20Lo12(Ctx &ctx, const InputSection &sec, size_t i, if (!((rHi20.type == R_LARCH_PCALA_HI20 && rLo12.type == R_LARCH_PCALA_LO12) || (rHi20.type == R_LARCH_GOT_PC_HI20 && + rLo12.type == R_LARCH_GOT_PC_LO12) || +(rHi20.type == R_LARCH_TLS_GD_PC_HI20 && + rLo12.type == R_LARCH_GOT_PC_LO12) || +(rHi20.type == R_LARCH_TLS_LD_PC_HI20 && rLo12.type == R_LARCH_GOT_PC_LO12))) return; @@ -798,6 +802,8 @@ static void relaxPCHi20Lo12(Ctx &ctx, const InputSection &sec, size_t i, else if (rHi20.expr == RE_LOONGARCH_PAGE_PC || rHi20.expr == RE_LOONGARCH_GOT_PAGE_PC) symBase = rHi20.sym->getVA(ctx); + else if (rHi20.expr == RE_LOONGARCH_TLSGD_PAGE_PC) +symBase = ctx.in.got->getGlobalDynAddr(*rHi20.sym); else { Err(ctx) << getErrorLoc(ctx, (const uint8_t *)loc) << "unknown expr (" << rHi20.expr << ") against symbol " << rHi20.sym @@ -827,7 +833,12 @@ static void relaxPCHi20Lo12(Ctx &ctx, const InputSection &sec, size_t i, return; sec.relaxAux->relocTypes[i] = R_LARCH_RELAX; - sec.relaxAux->relocTypes[i + 2] = R_LARCH_PCREL20_S2; + if (rHi20.type == R_LARCH_TLS_GD_PC_HI20) +sec.relaxAux->relocTypes[i + 2] = R_LARCH_TLS_GD_PCREL20_S2; + else if (rHi20.type == R_LARCH_TLS_LD_PC_HI20) +sec.relaxAux->relocTypes[i + 2] = R_LARCH_TLS_LD_PCREL20_S2; + else +sec.relaxAux->relocTypes[i + 2] = R_LARCH_PCREL20_S2; sec.relaxAux->writes.push_back(insn(PCADDI, getD5(nextInsn), 0, 0)); remove = 4; } @@ -863,6 +874,35 @@ static void relaxCall36(Ctx &ctx, const InputSection &sec, size_t i, } } +// Relax code sequence. +// From: +// lu12i.w $rd, %le_hi20_r(sym) +// add.w/d $rd, $rd, $tp, %le_add_r(sym) +// addi/ld/st.w/d $rd, $rd, %le_lo12_r(sym) +// To: +// addi/ld/st.w/d $rd, $tp, %le_lo12_r(sym) +static void relaxTlsLe(Ctx &ctx, const InputSection &sec, size_t i, + uint64_t loc, Relocation &r, uint32_t &remove) { + uint64_t val = r.sym->getVA(ctx, r.addend); + // Check if the val exceeds the range of addi/ld/st. + if (!isInt<12>(val)) +return; + uint32_t currInsn = read32le(sec.content().data() + r.offset); + switch (r.type) { + case R_LARCH_TLS_LE_HI20_R: + case R_LARCH_TLS_LE_ADD_R: +sec.relaxAux->relocTypes[i] = R_LARCH_RELAX; +remove = 4; +break; + case R_LARCH_TLS_LE_LO12_R: +currInsn = +insn(extractBits(currInsn, 31, 22) << 22, getD5(currInsn), R_TP, 0); +sec.relaxAux->writes.push_back(currInsn); +sec.relaxAux->relocTypes[i] = R_LARCH_TLS_LE_LO12_R; +break; + } +} + static bool relax(Ctx &ctx, InputSection &sec) { const uint64_t secAddr = sec.getVA(); const MutableArrayRef relocs = sec.relocs(); @@ -903,6 +943,8 @@ static bool relax(Ctx &ctx, InputSection &sec) { } case R_LARCH_PCALA_HI20: case R_LARCH_GOT_PC_HI20: +case R_LARCH_TLS_GD_PC_HI20: +case R_LARCH_TLS_LD_PC_HI20:
[llvm-branch-commits] [lld] [lld][LoongArch] Relax call36/tail36: R_LARCH_CALL36 (PR #123576)
https://github.com/ylzsx updated https://github.com/llvm/llvm-project/pull/123576 >From f1f995b5fc8e90126b5825d52b9c75cd45d27cfc Mon Sep 17 00:00:00 2001 From: yangzhaoxin Date: Thu, 26 Dec 2024 11:32:33 +0800 Subject: [PATCH 1/6] Relax call36/tail36. Instructions with relocation `R_LARCH_CALL36` may be relax as follows: ``` From: pcaddu18i $dest, %call36(foo) R_LARCH_CALL36, R_LARCH_RELAX jirl $r, $dest, 0 To: b/bl foo # bl if r=$ra, b if r=$zero R_LARCH_B26 ``` --- lld/ELF/Arch/LoongArch.cpp | 41 ++ 1 file changed, 41 insertions(+) diff --git a/lld/ELF/Arch/LoongArch.cpp b/lld/ELF/Arch/LoongArch.cpp index b999e7fd27ae9..0aa0cf5b657a0 100644 --- a/lld/ELF/Arch/LoongArch.cpp +++ b/lld/ELF/Arch/LoongArch.cpp @@ -58,6 +58,8 @@ enum Op { LD_W = 0x2880, LD_D = 0x28c0, JIRL = 0x4c00, + B = 0x5000, + BL = 0x5400, }; enum Reg { @@ -830,6 +832,37 @@ static void relaxPCHi20Lo12(Ctx &ctx, const InputSection &sec, size_t i, remove = 4; } +// Relax code sequence. +// From: +// pcaddu18i $ra, %call36(foo) +// jirl $ra, $ra, 0 +// To: +// b/bl foo +static void relaxCall36(Ctx &ctx, const InputSection &sec, size_t i, +uint64_t loc, Relocation &r, uint32_t &remove) { + const uint64_t symLocal = + (r.expr == R_PLT_PC ? r.sym->getPltVA(ctx) : r.sym->getVA(ctx)) + + r.addend; + + const int64_t distance = symLocal - loc; + // Check if the distance aligns 4 bytes or exceeds the range of b[l]. + if ((distance & 0x3) != 0 || !isInt<28>(distance)) +return; + + const uint32_t nextInsn = read32le(sec.content().data() + r.offset + 4); + if (getD5(nextInsn) == R_RA) { +// convert jirl to bl +sec.relaxAux->relocTypes[i] = R_LARCH_B26; +sec.relaxAux->writes.push_back(insn(BL, 0, 0, 0)); +remove = 4; + } else if (getD5(nextInsn) == R_ZERO) { +// convert jirl to b +sec.relaxAux->relocTypes[i] = R_LARCH_B26; +sec.relaxAux->writes.push_back(insn(B, 0, 0, 0)); +remove = 4; + } +} + static bool relax(Ctx &ctx, InputSection &sec) { const uint64_t secAddr = sec.getVA(); const MutableArrayRef relocs = sec.relocs(); @@ -874,6 +907,10 @@ static bool relax(Ctx &ctx, InputSection &sec) { if (isPairRelaxable(relocs, i)) relaxPCHi20Lo12(ctx, sec, i, loc, r, relocs[i + 2], remove); break; +case R_LARCH_CALL36: + if (relaxable(relocs, i)) +relaxCall36(ctx, sec, i, loc, r, remove); + break; } // For all anchors whose offsets are <= r.offset, they are preceded by @@ -977,6 +1014,10 @@ void LoongArch::finalizeRelax(int passes) const { // RelExpr is needed for relocating. r.expr = r.sym->hasFlag(NEEDS_PLT) ? R_PLT_PC : R_PC; break; + case R_LARCH_B26: +skip = 4; +write32le(p, aux.writes[writesIdx++]); +break; default: llvm_unreachable("unsupported type"); } >From f227ae532236e20148a872c811721a8de4e16318 Mon Sep 17 00:00:00 2001 From: yangzhaoxin Date: Fri, 27 Dec 2024 14:37:40 +0800 Subject: [PATCH 2/6] modify test for call36/tail36. --- lld/test/ELF/loongarch-relax-call36-2.s | 63 + lld/test/ELF/loongarch-relax-call36.s| 135 +++ lld/test/ELF/loongarch-relax-emit-relocs-2.s | 61 + 3 files changed, 259 insertions(+) create mode 100644 lld/test/ELF/loongarch-relax-call36-2.s create mode 100644 lld/test/ELF/loongarch-relax-call36.s create mode 100644 lld/test/ELF/loongarch-relax-emit-relocs-2.s diff --git a/lld/test/ELF/loongarch-relax-call36-2.s b/lld/test/ELF/loongarch-relax-call36-2.s new file mode 100644 index 0..1c216a9bdc35e --- /dev/null +++ b/lld/test/ELF/loongarch-relax-call36-2.s @@ -0,0 +1,63 @@ +# REQUIRES: loongarch +# RUN: rm -rf %t && split-file %s %t && cd %t +# RUN: llvm-mc -filetype=obj -triple=loongarch64 -mattr=+relax a.s -o a.o + +# RUN: ld.lld -T lds a.o -o a +# RUN: llvm-objdump -d --no-show-raw-insn a | FileCheck %s --check-prefixes=RELAX,RELAX-MID + +## Unsure whether this needs a diagnostic. GNU ld allows this. +# RUN: ld.lld -T lds -pie a.o -o a.pie +# RUN: llvm-objdump -d --no-show-raw-insn a.pie | FileCheck %s --check-prefixes=RELAX,RELAX-MID + +# RUN: ld.lld -T lds -pie -z notext -z ifunc-noplt a.o -o a.ifunc-noplt +# RUN: llvm-objdump -d --no-show-raw-insn a.ifunc-noplt | FileCheck %s --check-prefixes=RELAX,NORELAX-MID + +# RELAX-LABEL: <_start>: +## offset = 0x1000 - 0x800 = 0x800(134217728), hi=512, lo18=0 +# RELAX-NEXT:800: pcaddu18i $ra, 512 +# RELAX-NEXT: jirl $ra, $ra, 0 +# RELAX-NEXT: bl 134217720 +# RELAX-NEXT: bl -134217728 +## offset = 12 - 0x810 = -0x804(-134217732), hi=512, lo18=-4 +# RELAX-NEXT:810: pcaddu18i $ra, -512 +# RELAX-NEXT: jirl $ra, $ra, -4 +# RELAX-EMPTY: + +# RELAX-MID
[llvm-branch-commits] [lld] [lld][LoongArch] Relax TLS LE/GD/LD (PR #123600)
https://github.com/ylzsx updated https://github.com/llvm/llvm-project/pull/123600 >From 7993434e2973437b010034051003f8c03d8eff71 Mon Sep 17 00:00:00 2001 From: yangzhaoxin Date: Fri, 27 Dec 2024 19:29:32 +0800 Subject: [PATCH 1/7] Relax TLS LE/GD/LD. In local-exec form, the code sequence is converted as follows: ``` From: lu12i.w $rd, %le_hi20_r(sym) R_LARCH_TLS_LE_HI20_R, R_LARCH_RELAX add.w/d $rd, $rd, $tp, %le_add_r(sym) R_LARCH_TLS_LE_ADD_R, R_LARCH_RELAX addi/ld/st.w/d $rd, $rd, %le_lo12_r(sym) R_LARCH_TLS_LE_LO12_R, R_LARCH_RELAX To: addi/ld/st.w/d $rd, $tp, %le_lo12_r(sym) R_LARCH_TLS_LE_LO12_R ``` In global-dynamic or local-dynamic, the code sequence is converted as follows: ``` From: pcalau12i $a0, %ld_pc_hi20(sym) | %gd_pc_hi20(sym) R_LARCH_TLS_GD_PC_HI20 | R_LARCH_TLS_LD_PC_HI20, R_LARCH_RELAX addi.w/d $a0, $a0, %got_pc_lo12(sym) | %got_pc_lo12(sym) R_LARCH_GOT_PC_LO12, R_LARCH_RELAX To: pcaddi$a0, %got_pc_lo12(sym) | %got_pc_lo12(sym) R_LARCH_TLS_GD_PCREL20_S2 | R_LARCH_TLS_LD_PCREL20_S2 ``` Note: For initial-exec form, since it involves the conversion from IE to LE, we will implement it in a future patch. --- lld/ELF/Arch/LoongArch.cpp | 68 +++--- 1 file changed, 64 insertions(+), 4 deletions(-) diff --git a/lld/ELF/Arch/LoongArch.cpp b/lld/ELF/Arch/LoongArch.cpp index 0aa0cf5b657a0..2d6d86d2ca63b 100644 --- a/lld/ELF/Arch/LoongArch.cpp +++ b/lld/ELF/Arch/LoongArch.cpp @@ -761,10 +761,10 @@ static bool isPairRelaxable(ArrayRef relocs, size_t i) { // Relax code sequence. // From: -// pcalau12i $a0, %pc_hi20(sym) -// addi.w/d $a0, $a0, %pc_lo12(sym) +// pcalau12i $a0, %pc_hi20(sym) | %ld_pc_hi20(sym) | %gd_pc_hi20(sym) +// addi.w/d $a0, $a0, %pc_lo12(sym) | %got_pc_lo12(sym) | %got_pc_lo12(sym) // To: -// pcaddi $a0, %pc_lo12(sym) +// pcaddi $a0, %pc_lo12(sym) | %got_pc_lo12(sym) | %got_pc_lo12(sym) // // From: // pcalau12i $a0, %got_pc_hi20(sym_got) @@ -778,6 +778,10 @@ static void relaxPCHi20Lo12(Ctx &ctx, const InputSection &sec, size_t i, if (!((rHi20.type == R_LARCH_PCALA_HI20 && rLo12.type == R_LARCH_PCALA_LO12) || (rHi20.type == R_LARCH_GOT_PC_HI20 && + rLo12.type == R_LARCH_GOT_PC_LO12) || +(rHi20.type == R_LARCH_TLS_GD_PC_HI20 && + rLo12.type == R_LARCH_GOT_PC_LO12) || +(rHi20.type == R_LARCH_TLS_LD_PC_HI20 && rLo12.type == R_LARCH_GOT_PC_LO12))) return; @@ -798,6 +802,8 @@ static void relaxPCHi20Lo12(Ctx &ctx, const InputSection &sec, size_t i, else if (rHi20.expr == RE_LOONGARCH_PAGE_PC || rHi20.expr == RE_LOONGARCH_GOT_PAGE_PC) symBase = rHi20.sym->getVA(ctx); + else if (rHi20.expr == RE_LOONGARCH_TLSGD_PAGE_PC) +symBase = ctx.in.got->getGlobalDynAddr(*rHi20.sym); else { Err(ctx) << getErrorLoc(ctx, (const uint8_t *)loc) << "unknown expr (" << rHi20.expr << ") against symbol " << rHi20.sym @@ -827,7 +833,12 @@ static void relaxPCHi20Lo12(Ctx &ctx, const InputSection &sec, size_t i, return; sec.relaxAux->relocTypes[i] = R_LARCH_RELAX; - sec.relaxAux->relocTypes[i + 2] = R_LARCH_PCREL20_S2; + if (rHi20.type == R_LARCH_TLS_GD_PC_HI20) +sec.relaxAux->relocTypes[i + 2] = R_LARCH_TLS_GD_PCREL20_S2; + else if (rHi20.type == R_LARCH_TLS_LD_PC_HI20) +sec.relaxAux->relocTypes[i + 2] = R_LARCH_TLS_LD_PCREL20_S2; + else +sec.relaxAux->relocTypes[i + 2] = R_LARCH_PCREL20_S2; sec.relaxAux->writes.push_back(insn(PCADDI, getD5(nextInsn), 0, 0)); remove = 4; } @@ -863,6 +874,35 @@ static void relaxCall36(Ctx &ctx, const InputSection &sec, size_t i, } } +// Relax code sequence. +// From: +// lu12i.w $rd, %le_hi20_r(sym) +// add.w/d $rd, $rd, $tp, %le_add_r(sym) +// addi/ld/st.w/d $rd, $rd, %le_lo12_r(sym) +// To: +// addi/ld/st.w/d $rd, $tp, %le_lo12_r(sym) +static void relaxTlsLe(Ctx &ctx, const InputSection &sec, size_t i, + uint64_t loc, Relocation &r, uint32_t &remove) { + uint64_t val = r.sym->getVA(ctx, r.addend); + // Check if the val exceeds the range of addi/ld/st. + if (!isInt<12>(val)) +return; + uint32_t currInsn = read32le(sec.content().data() + r.offset); + switch (r.type) { + case R_LARCH_TLS_LE_HI20_R: + case R_LARCH_TLS_LE_ADD_R: +sec.relaxAux->relocTypes[i] = R_LARCH_RELAX; +remove = 4; +break; + case R_LARCH_TLS_LE_LO12_R: +currInsn = +insn(extractBits(currInsn, 31, 22) << 22, getD5(currInsn), R_TP, 0); +sec.relaxAux->writes.push_back(currInsn); +sec.relaxAux->relocTypes[i] = R_LARCH_TLS_LE_LO12_R; +break; + } +} + static bool relax(Ctx &ctx, InputSection &sec) { const uint64_t secAddr = sec.getVA(); const MutableArrayRef relocs = sec.relocs(); @@ -903,6 +943,8 @@ static bool relax(Ctx &ctx, InputSection &sec) { } case R_LARCH_PCALA_HI20: case R_LARCH_GOT_PC_HI20: +case R_LARCH_TLS_GD_PC_HI20: +case R_LARCH_TLS_LD_PC_HI20:
[llvm-branch-commits] [lld] [lld][LoongArch] Relax TLS LE/GD/LD (PR #123600)
@@ -1015,8 +1063,20 @@ void LoongArch::finalizeRelax(int passes) const { r.expr = r.sym->hasFlag(NEEDS_PLT) ? R_PLT_PC : R_PC; break; case R_LARCH_B26: + case R_LARCH_TLS_LE_LO12_R: +skip = 4; +write32le(p, aux.writes[writesIdx++]); +break; + case R_LARCH_TLS_GD_PCREL20_S2: +// Note: R_LARCH_TLS_LD_PCREL20_S2 must also use R_TLSGD_PC instead +// of R_TLSLD_PC because the processing of relocation +// R_LARCH_TLS_LD_PC_HI20 is the same as R_LARCH_TLS_GD_PC_HI20. If ylzsx wrote: Thanks for your review. I will revise it to the following description. Do you think it is clear? ``` Note: R_LARCH_TLS_LD_PCREL20_S2 must also use R_TLSGD_PC instead of R_TLSLD_PC due to historical reasons. In fact, TLSLD is not fully supported on LoongArch. We need to handle relocation of R_LARCH_TLS_LD_PC_HI20 as equivalent to R_LARCH_TLS_GD_PC_HI20. This reason has also been mentioned in mold commit: https://github.com/rui314/mold/commit/5dfa1cf07c03bd57cb3d493b652ef22441bcd71c ``` https://github.com/llvm/llvm-project/pull/123600 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [lld] [lld][LoongArch] Relax TLS LE/GD/LD (PR #123600)
https://github.com/ylzsx updated https://github.com/llvm/llvm-project/pull/123600 >From 7993434e2973437b010034051003f8c03d8eff71 Mon Sep 17 00:00:00 2001 From: yangzhaoxin Date: Fri, 27 Dec 2024 19:29:32 +0800 Subject: [PATCH 1/8] Relax TLS LE/GD/LD. In local-exec form, the code sequence is converted as follows: ``` From: lu12i.w $rd, %le_hi20_r(sym) R_LARCH_TLS_LE_HI20_R, R_LARCH_RELAX add.w/d $rd, $rd, $tp, %le_add_r(sym) R_LARCH_TLS_LE_ADD_R, R_LARCH_RELAX addi/ld/st.w/d $rd, $rd, %le_lo12_r(sym) R_LARCH_TLS_LE_LO12_R, R_LARCH_RELAX To: addi/ld/st.w/d $rd, $tp, %le_lo12_r(sym) R_LARCH_TLS_LE_LO12_R ``` In global-dynamic or local-dynamic, the code sequence is converted as follows: ``` From: pcalau12i $a0, %ld_pc_hi20(sym) | %gd_pc_hi20(sym) R_LARCH_TLS_GD_PC_HI20 | R_LARCH_TLS_LD_PC_HI20, R_LARCH_RELAX addi.w/d $a0, $a0, %got_pc_lo12(sym) | %got_pc_lo12(sym) R_LARCH_GOT_PC_LO12, R_LARCH_RELAX To: pcaddi$a0, %got_pc_lo12(sym) | %got_pc_lo12(sym) R_LARCH_TLS_GD_PCREL20_S2 | R_LARCH_TLS_LD_PCREL20_S2 ``` Note: For initial-exec form, since it involves the conversion from IE to LE, we will implement it in a future patch. --- lld/ELF/Arch/LoongArch.cpp | 68 +++--- 1 file changed, 64 insertions(+), 4 deletions(-) diff --git a/lld/ELF/Arch/LoongArch.cpp b/lld/ELF/Arch/LoongArch.cpp index 0aa0cf5b657a0..2d6d86d2ca63b 100644 --- a/lld/ELF/Arch/LoongArch.cpp +++ b/lld/ELF/Arch/LoongArch.cpp @@ -761,10 +761,10 @@ static bool isPairRelaxable(ArrayRef relocs, size_t i) { // Relax code sequence. // From: -// pcalau12i $a0, %pc_hi20(sym) -// addi.w/d $a0, $a0, %pc_lo12(sym) +// pcalau12i $a0, %pc_hi20(sym) | %ld_pc_hi20(sym) | %gd_pc_hi20(sym) +// addi.w/d $a0, $a0, %pc_lo12(sym) | %got_pc_lo12(sym) | %got_pc_lo12(sym) // To: -// pcaddi $a0, %pc_lo12(sym) +// pcaddi $a0, %pc_lo12(sym) | %got_pc_lo12(sym) | %got_pc_lo12(sym) // // From: // pcalau12i $a0, %got_pc_hi20(sym_got) @@ -778,6 +778,10 @@ static void relaxPCHi20Lo12(Ctx &ctx, const InputSection &sec, size_t i, if (!((rHi20.type == R_LARCH_PCALA_HI20 && rLo12.type == R_LARCH_PCALA_LO12) || (rHi20.type == R_LARCH_GOT_PC_HI20 && + rLo12.type == R_LARCH_GOT_PC_LO12) || +(rHi20.type == R_LARCH_TLS_GD_PC_HI20 && + rLo12.type == R_LARCH_GOT_PC_LO12) || +(rHi20.type == R_LARCH_TLS_LD_PC_HI20 && rLo12.type == R_LARCH_GOT_PC_LO12))) return; @@ -798,6 +802,8 @@ static void relaxPCHi20Lo12(Ctx &ctx, const InputSection &sec, size_t i, else if (rHi20.expr == RE_LOONGARCH_PAGE_PC || rHi20.expr == RE_LOONGARCH_GOT_PAGE_PC) symBase = rHi20.sym->getVA(ctx); + else if (rHi20.expr == RE_LOONGARCH_TLSGD_PAGE_PC) +symBase = ctx.in.got->getGlobalDynAddr(*rHi20.sym); else { Err(ctx) << getErrorLoc(ctx, (const uint8_t *)loc) << "unknown expr (" << rHi20.expr << ") against symbol " << rHi20.sym @@ -827,7 +833,12 @@ static void relaxPCHi20Lo12(Ctx &ctx, const InputSection &sec, size_t i, return; sec.relaxAux->relocTypes[i] = R_LARCH_RELAX; - sec.relaxAux->relocTypes[i + 2] = R_LARCH_PCREL20_S2; + if (rHi20.type == R_LARCH_TLS_GD_PC_HI20) +sec.relaxAux->relocTypes[i + 2] = R_LARCH_TLS_GD_PCREL20_S2; + else if (rHi20.type == R_LARCH_TLS_LD_PC_HI20) +sec.relaxAux->relocTypes[i + 2] = R_LARCH_TLS_LD_PCREL20_S2; + else +sec.relaxAux->relocTypes[i + 2] = R_LARCH_PCREL20_S2; sec.relaxAux->writes.push_back(insn(PCADDI, getD5(nextInsn), 0, 0)); remove = 4; } @@ -863,6 +874,35 @@ static void relaxCall36(Ctx &ctx, const InputSection &sec, size_t i, } } +// Relax code sequence. +// From: +// lu12i.w $rd, %le_hi20_r(sym) +// add.w/d $rd, $rd, $tp, %le_add_r(sym) +// addi/ld/st.w/d $rd, $rd, %le_lo12_r(sym) +// To: +// addi/ld/st.w/d $rd, $tp, %le_lo12_r(sym) +static void relaxTlsLe(Ctx &ctx, const InputSection &sec, size_t i, + uint64_t loc, Relocation &r, uint32_t &remove) { + uint64_t val = r.sym->getVA(ctx, r.addend); + // Check if the val exceeds the range of addi/ld/st. + if (!isInt<12>(val)) +return; + uint32_t currInsn = read32le(sec.content().data() + r.offset); + switch (r.type) { + case R_LARCH_TLS_LE_HI20_R: + case R_LARCH_TLS_LE_ADD_R: +sec.relaxAux->relocTypes[i] = R_LARCH_RELAX; +remove = 4; +break; + case R_LARCH_TLS_LE_LO12_R: +currInsn = +insn(extractBits(currInsn, 31, 22) << 22, getD5(currInsn), R_TP, 0); +sec.relaxAux->writes.push_back(currInsn); +sec.relaxAux->relocTypes[i] = R_LARCH_TLS_LE_LO12_R; +break; + } +} + static bool relax(Ctx &ctx, InputSection &sec) { const uint64_t secAddr = sec.getVA(); const MutableArrayRef relocs = sec.relocs(); @@ -903,6 +943,8 @@ static bool relax(Ctx &ctx, InputSection &sec) { } case R_LARCH_PCALA_HI20: case R_LARCH_GOT_PC_HI20: +case R_LARCH_TLS_GD_PC_HI20: +case R_LARCH_TLS_LD_PC_HI20:
[llvm-branch-commits] [lld] [lld][LoongArch] Convert TLS IE to LE in the normal or medium code model (PR #123680)
@@ -1002,6 +1008,87 @@ static bool relax(Ctx &ctx, InputSection &sec) { return changed; } +// Convert TLS IE to LE in the normal or medium code model. +// Original code sequence: +// * pcalau12i $a0, %ie_pc_hi20(sym) +// * ld.d $a0, $a0, %ie_pc_lo12(sym) +// +// The code sequence converted is as follows: +// * lu12i.w $a0, %le_hi20(sym) # le_hi20 != 0, otherwise NOP +// * ori $a0 $a0, %le_lo12(sym) ylzsx wrote: Yeah, I have fixed it. https://github.com/llvm/llvm-project/pull/123680 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [lld] [lld][LoongArch] Convert TLS IE to LE in the normal or medium code model (PR #123680)
https://github.com/ylzsx updated https://github.com/llvm/llvm-project/pull/123680 >From 0f580567169ffbf1546a5389ab4b9f7d1fc07c71 Mon Sep 17 00:00:00 2001 From: yangzhaoxin Date: Thu, 2 Jan 2025 20:58:56 +0800 Subject: [PATCH 1/8] Convert TLS IE to LE in the normal or medium code model. Original code sequence: * pcalau12i $a0, %ie_pc_hi20(sym) * ld.d $a0, $a0, %ie_pc_lo12(sym) The code sequence converted is as follows: * lu12i.w $a0, %ie_pc_hi20(sym) # le_hi20 != 0, otherwise NOP * ori $a0 $a0, %ie_pc_lo12(sym) FIXME: When relaxation enables, redundant NOP can be removed. This will be implemented in a future patch. Note: In the normal or medium code model, original code sequence with relocations can appear interleaved, because converted code sequence calculates the absolute offset. However, in extreme code model, to identify the current code model, the first four instructions with relocations must appear consecutively. --- lld/ELF/Arch/LoongArch.cpp | 87 ++ lld/ELF/Relocations.cpp| 15 ++- 2 files changed, 101 insertions(+), 1 deletion(-) diff --git a/lld/ELF/Arch/LoongArch.cpp b/lld/ELF/Arch/LoongArch.cpp index 4edc625b05cb0..f9a22a7bd5218 100644 --- a/lld/ELF/Arch/LoongArch.cpp +++ b/lld/ELF/Arch/LoongArch.cpp @@ -39,7 +39,11 @@ class LoongArch final : public TargetInfo { void relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const override; bool relaxOnce(int pass) const override; + void relocateAlloc(InputSectionBase &sec, uint8_t *buf) const override; void finalizeRelax(int passes) const override; + +private: + void tlsIeToLe(uint8_t *loc, const Relocation &rel, uint64_t val) const; }; } // end anonymous namespace @@ -53,6 +57,8 @@ enum Op { ADDI_W = 0x0280, ADDI_D = 0x02c0, ANDI = 0x0340, + ORI = 0x0380, + LU12I_W = 0x1400, PCADDI = 0x1800, PCADDU12I = 0x1c00, LD_W = 0x2880, @@ -1002,6 +1008,87 @@ static bool relax(Ctx &ctx, InputSection &sec) { return changed; } +// Convert TLS IE to LE in the normal or medium code model. +// Original code sequence: +// * pcalau12i $a0, %ie_pc_hi20(sym) +// * ld.d $a0, $a0, %ie_pc_lo12(sym) +// +// The code sequence converted is as follows: +// * lu12i.w $a0, %le_hi20(sym) # le_hi20 != 0, otherwise NOP +// * ori $a0 $a0, %le_lo12(sym) +// +// When relaxation enables, redundant NOPs can be removed. +void LoongArch::tlsIeToLe(uint8_t *loc, const Relocation &rel, + uint64_t val) const { + assert(isInt<32>(val) && + "val exceeds the range of medium code model in tlsIeToLe"); + + bool isUInt12 = isUInt<12>(val); + const uint32_t currInsn = read32le(loc); + switch (rel.type) { + case R_LARCH_TLS_IE_PC_HI20: +if (isUInt12) + write32le(loc, insn(ANDI, R_ZERO, R_ZERO, 0)); // nop +else + write32le(loc, insn(LU12I_W, getD5(currInsn), extractBits(val, 31, 12), + 0)); // lu12i.w $a0, %le_hi20 +break; + case R_LARCH_TLS_IE_PC_LO12: +if (isUInt12) + write32le(loc, insn(ORI, getD5(currInsn), R_ZERO, + val)); // ori $a0, $r0, %le_lo12 +else + write32le(loc, insn(ORI, getD5(currInsn), getJ5(currInsn), + lo12(val))); // ori $a0, $a0, %le_lo12 +break; + } +} + +void LoongArch::relocateAlloc(InputSectionBase &sec, uint8_t *buf) const { + const unsigned bits = ctx.arg.is64 ? 64 : 32; + uint64_t secAddr = sec.getOutputSection()->addr; + if (auto *s = dyn_cast(&sec)) +secAddr += s->outSecOff; + else if (auto *ehIn = dyn_cast(&sec)) +secAddr += ehIn->getParent()->outSecOff; + bool isExtreme = false; + const MutableArrayRef relocs = sec.relocs(); + for (size_t i = 0, size = relocs.size(); i != size; ++i) { +Relocation &rel = relocs[i]; +uint8_t *loc = buf + rel.offset; +uint64_t val = SignExtend64( +sec.getRelocTargetVA(ctx, rel, secAddr + rel.offset), bits); + +switch (rel.expr) { +case R_RELAX_HINT: + continue; +case R_RELAX_TLS_IE_TO_LE: + if (rel.type == R_LARCH_TLS_IE_PC_HI20) { +// LoongArch does not support IE to LE optimize in the extreme code +// model. In this case, the relocs are as follows: +// +// * i -- R_LARCH_TLS_IE_PC_HI20 +// * i+1 -- R_LARCH_TLS_IE_PC_LO12 +// * i+2 -- R_LARCH_TLS_IE64_PC_LO20 +// * i+3 -- R_LARCH_TLS_IE64_PC_HI12 +isExtreme = +(i + 2 < size && relocs[i + 2].type == R_LARCH_TLS_IE64_PC_LO20); + } + if (isExtreme) { +rel.expr = getRelExpr(rel.type, *rel.sym, loc); +val = SignExtend64(sec.getRelocTargetVA(ctx, rel, secAddr + rel.offset), + bits); +relocateNoSym(loc, rel.type, val); + } else +tlsIeToLe(loc, rel, val); + continue; +default: + break; +} +relocate(loc, rel, val); + } +} + // Wh
[llvm-branch-commits] [lld] [lld][LoongArch] Convert TLS IE to LE in the normal or medium code model (PR #123680)
https://github.com/ylzsx updated https://github.com/llvm/llvm-project/pull/123680 >From 0f580567169ffbf1546a5389ab4b9f7d1fc07c71 Mon Sep 17 00:00:00 2001 From: yangzhaoxin Date: Thu, 2 Jan 2025 20:58:56 +0800 Subject: [PATCH 1/6] Convert TLS IE to LE in the normal or medium code model. Original code sequence: * pcalau12i $a0, %ie_pc_hi20(sym) * ld.d $a0, $a0, %ie_pc_lo12(sym) The code sequence converted is as follows: * lu12i.w $a0, %ie_pc_hi20(sym) # le_hi20 != 0, otherwise NOP * ori $a0 $a0, %ie_pc_lo12(sym) FIXME: When relaxation enables, redundant NOP can be removed. This will be implemented in a future patch. Note: In the normal or medium code model, original code sequence with relocations can appear interleaved, because converted code sequence calculates the absolute offset. However, in extreme code model, to identify the current code model, the first four instructions with relocations must appear consecutively. --- lld/ELF/Arch/LoongArch.cpp | 87 ++ lld/ELF/Relocations.cpp| 15 ++- 2 files changed, 101 insertions(+), 1 deletion(-) diff --git a/lld/ELF/Arch/LoongArch.cpp b/lld/ELF/Arch/LoongArch.cpp index 4edc625b05cb0..f9a22a7bd5218 100644 --- a/lld/ELF/Arch/LoongArch.cpp +++ b/lld/ELF/Arch/LoongArch.cpp @@ -39,7 +39,11 @@ class LoongArch final : public TargetInfo { void relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const override; bool relaxOnce(int pass) const override; + void relocateAlloc(InputSectionBase &sec, uint8_t *buf) const override; void finalizeRelax(int passes) const override; + +private: + void tlsIeToLe(uint8_t *loc, const Relocation &rel, uint64_t val) const; }; } // end anonymous namespace @@ -53,6 +57,8 @@ enum Op { ADDI_W = 0x0280, ADDI_D = 0x02c0, ANDI = 0x0340, + ORI = 0x0380, + LU12I_W = 0x1400, PCADDI = 0x1800, PCADDU12I = 0x1c00, LD_W = 0x2880, @@ -1002,6 +1008,87 @@ static bool relax(Ctx &ctx, InputSection &sec) { return changed; } +// Convert TLS IE to LE in the normal or medium code model. +// Original code sequence: +// * pcalau12i $a0, %ie_pc_hi20(sym) +// * ld.d $a0, $a0, %ie_pc_lo12(sym) +// +// The code sequence converted is as follows: +// * lu12i.w $a0, %le_hi20(sym) # le_hi20 != 0, otherwise NOP +// * ori $a0 $a0, %le_lo12(sym) +// +// When relaxation enables, redundant NOPs can be removed. +void LoongArch::tlsIeToLe(uint8_t *loc, const Relocation &rel, + uint64_t val) const { + assert(isInt<32>(val) && + "val exceeds the range of medium code model in tlsIeToLe"); + + bool isUInt12 = isUInt<12>(val); + const uint32_t currInsn = read32le(loc); + switch (rel.type) { + case R_LARCH_TLS_IE_PC_HI20: +if (isUInt12) + write32le(loc, insn(ANDI, R_ZERO, R_ZERO, 0)); // nop +else + write32le(loc, insn(LU12I_W, getD5(currInsn), extractBits(val, 31, 12), + 0)); // lu12i.w $a0, %le_hi20 +break; + case R_LARCH_TLS_IE_PC_LO12: +if (isUInt12) + write32le(loc, insn(ORI, getD5(currInsn), R_ZERO, + val)); // ori $a0, $r0, %le_lo12 +else + write32le(loc, insn(ORI, getD5(currInsn), getJ5(currInsn), + lo12(val))); // ori $a0, $a0, %le_lo12 +break; + } +} + +void LoongArch::relocateAlloc(InputSectionBase &sec, uint8_t *buf) const { + const unsigned bits = ctx.arg.is64 ? 64 : 32; + uint64_t secAddr = sec.getOutputSection()->addr; + if (auto *s = dyn_cast(&sec)) +secAddr += s->outSecOff; + else if (auto *ehIn = dyn_cast(&sec)) +secAddr += ehIn->getParent()->outSecOff; + bool isExtreme = false; + const MutableArrayRef relocs = sec.relocs(); + for (size_t i = 0, size = relocs.size(); i != size; ++i) { +Relocation &rel = relocs[i]; +uint8_t *loc = buf + rel.offset; +uint64_t val = SignExtend64( +sec.getRelocTargetVA(ctx, rel, secAddr + rel.offset), bits); + +switch (rel.expr) { +case R_RELAX_HINT: + continue; +case R_RELAX_TLS_IE_TO_LE: + if (rel.type == R_LARCH_TLS_IE_PC_HI20) { +// LoongArch does not support IE to LE optimize in the extreme code +// model. In this case, the relocs are as follows: +// +// * i -- R_LARCH_TLS_IE_PC_HI20 +// * i+1 -- R_LARCH_TLS_IE_PC_LO12 +// * i+2 -- R_LARCH_TLS_IE64_PC_LO20 +// * i+3 -- R_LARCH_TLS_IE64_PC_HI12 +isExtreme = +(i + 2 < size && relocs[i + 2].type == R_LARCH_TLS_IE64_PC_LO20); + } + if (isExtreme) { +rel.expr = getRelExpr(rel.type, *rel.sym, loc); +val = SignExtend64(sec.getRelocTargetVA(ctx, rel, secAddr + rel.offset), + bits); +relocateNoSym(loc, rel.type, val); + } else +tlsIeToLe(loc, rel, val); + continue; +default: + break; +} +relocate(loc, rel, val); + } +} + // Wh
[llvm-branch-commits] [lld] [lld][LoongArch] Convert TLS IE to LE in the normal or medium code model (PR #123680)
@@ -1375,14 +1375,20 @@ unsigned RelocationScanner::handleTlsRelocation(RelExpr expr, RelType type, return 1; } + // LoongArch support IE to LE optimization in non-extreme code model. + bool execOptimizeInLoongArch = + ctx.arg.emachine == EM_LOONGARCH && + (type == R_LARCH_TLS_IE_PC_HI20 || type == R_LARCH_TLS_IE_PC_LO12); ylzsx wrote: At this point, I cannot determine whether current execution is in extreme or medium code mode, since I cannot obtain the next relocation record. So, as a workaround, I have chosen to convert the relocation type in both modes and then restore it(extreme code mode) or relax it(medium code mode) later in relocateAlloc. https://github.com/llvm/llvm-project/pull/123680 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [lld] [lld][LoongArch] Convert TLS IE to LE in the normal or medium code model (PR #123680)
https://github.com/ylzsx updated https://github.com/llvm/llvm-project/pull/123680 >From 0f580567169ffbf1546a5389ab4b9f7d1fc07c71 Mon Sep 17 00:00:00 2001 From: yangzhaoxin Date: Thu, 2 Jan 2025 20:58:56 +0800 Subject: [PATCH 1/5] Convert TLS IE to LE in the normal or medium code model. Original code sequence: * pcalau12i $a0, %ie_pc_hi20(sym) * ld.d $a0, $a0, %ie_pc_lo12(sym) The code sequence converted is as follows: * lu12i.w $a0, %ie_pc_hi20(sym) # le_hi20 != 0, otherwise NOP * ori $a0 $a0, %ie_pc_lo12(sym) FIXME: When relaxation enables, redundant NOP can be removed. This will be implemented in a future patch. Note: In the normal or medium code model, original code sequence with relocations can appear interleaved, because converted code sequence calculates the absolute offset. However, in extreme code model, to identify the current code model, the first four instructions with relocations must appear consecutively. --- lld/ELF/Arch/LoongArch.cpp | 87 ++ lld/ELF/Relocations.cpp| 15 ++- 2 files changed, 101 insertions(+), 1 deletion(-) diff --git a/lld/ELF/Arch/LoongArch.cpp b/lld/ELF/Arch/LoongArch.cpp index 4edc625b05cb0..f9a22a7bd5218 100644 --- a/lld/ELF/Arch/LoongArch.cpp +++ b/lld/ELF/Arch/LoongArch.cpp @@ -39,7 +39,11 @@ class LoongArch final : public TargetInfo { void relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const override; bool relaxOnce(int pass) const override; + void relocateAlloc(InputSectionBase &sec, uint8_t *buf) const override; void finalizeRelax(int passes) const override; + +private: + void tlsIeToLe(uint8_t *loc, const Relocation &rel, uint64_t val) const; }; } // end anonymous namespace @@ -53,6 +57,8 @@ enum Op { ADDI_W = 0x0280, ADDI_D = 0x02c0, ANDI = 0x0340, + ORI = 0x0380, + LU12I_W = 0x1400, PCADDI = 0x1800, PCADDU12I = 0x1c00, LD_W = 0x2880, @@ -1002,6 +1008,87 @@ static bool relax(Ctx &ctx, InputSection &sec) { return changed; } +// Convert TLS IE to LE in the normal or medium code model. +// Original code sequence: +// * pcalau12i $a0, %ie_pc_hi20(sym) +// * ld.d $a0, $a0, %ie_pc_lo12(sym) +// +// The code sequence converted is as follows: +// * lu12i.w $a0, %le_hi20(sym) # le_hi20 != 0, otherwise NOP +// * ori $a0 $a0, %le_lo12(sym) +// +// When relaxation enables, redundant NOPs can be removed. +void LoongArch::tlsIeToLe(uint8_t *loc, const Relocation &rel, + uint64_t val) const { + assert(isInt<32>(val) && + "val exceeds the range of medium code model in tlsIeToLe"); + + bool isUInt12 = isUInt<12>(val); + const uint32_t currInsn = read32le(loc); + switch (rel.type) { + case R_LARCH_TLS_IE_PC_HI20: +if (isUInt12) + write32le(loc, insn(ANDI, R_ZERO, R_ZERO, 0)); // nop +else + write32le(loc, insn(LU12I_W, getD5(currInsn), extractBits(val, 31, 12), + 0)); // lu12i.w $a0, %le_hi20 +break; + case R_LARCH_TLS_IE_PC_LO12: +if (isUInt12) + write32le(loc, insn(ORI, getD5(currInsn), R_ZERO, + val)); // ori $a0, $r0, %le_lo12 +else + write32le(loc, insn(ORI, getD5(currInsn), getJ5(currInsn), + lo12(val))); // ori $a0, $a0, %le_lo12 +break; + } +} + +void LoongArch::relocateAlloc(InputSectionBase &sec, uint8_t *buf) const { + const unsigned bits = ctx.arg.is64 ? 64 : 32; + uint64_t secAddr = sec.getOutputSection()->addr; + if (auto *s = dyn_cast(&sec)) +secAddr += s->outSecOff; + else if (auto *ehIn = dyn_cast(&sec)) +secAddr += ehIn->getParent()->outSecOff; + bool isExtreme = false; + const MutableArrayRef relocs = sec.relocs(); + for (size_t i = 0, size = relocs.size(); i != size; ++i) { +Relocation &rel = relocs[i]; +uint8_t *loc = buf + rel.offset; +uint64_t val = SignExtend64( +sec.getRelocTargetVA(ctx, rel, secAddr + rel.offset), bits); + +switch (rel.expr) { +case R_RELAX_HINT: + continue; +case R_RELAX_TLS_IE_TO_LE: + if (rel.type == R_LARCH_TLS_IE_PC_HI20) { +// LoongArch does not support IE to LE optimize in the extreme code +// model. In this case, the relocs are as follows: +// +// * i -- R_LARCH_TLS_IE_PC_HI20 +// * i+1 -- R_LARCH_TLS_IE_PC_LO12 +// * i+2 -- R_LARCH_TLS_IE64_PC_LO20 +// * i+3 -- R_LARCH_TLS_IE64_PC_HI12 +isExtreme = +(i + 2 < size && relocs[i + 2].type == R_LARCH_TLS_IE64_PC_LO20); + } + if (isExtreme) { +rel.expr = getRelExpr(rel.type, *rel.sym, loc); +val = SignExtend64(sec.getRelocTargetVA(ctx, rel, secAddr + rel.offset), + bits); +relocateNoSym(loc, rel.type, val); + } else +tlsIeToLe(loc, rel, val); + continue; +default: + break; +} +relocate(loc, rel, val); + } +} + // Wh
[llvm-branch-commits] [lld] [lld][LoongArch] Relax call36/tail36: R_LARCH_CALL36 (PR #123576)
https://github.com/ylzsx updated https://github.com/llvm/llvm-project/pull/123576 >From f1f995b5fc8e90126b5825d52b9c75cd45d27cfc Mon Sep 17 00:00:00 2001 From: yangzhaoxin Date: Thu, 26 Dec 2024 11:32:33 +0800 Subject: [PATCH 1/5] Relax call36/tail36. Instructions with relocation `R_LARCH_CALL36` may be relax as follows: ``` From: pcaddu18i $dest, %call36(foo) R_LARCH_CALL36, R_LARCH_RELAX jirl $r, $dest, 0 To: b/bl foo # bl if r=$ra, b if r=$zero R_LARCH_B26 ``` --- lld/ELF/Arch/LoongArch.cpp | 41 ++ 1 file changed, 41 insertions(+) diff --git a/lld/ELF/Arch/LoongArch.cpp b/lld/ELF/Arch/LoongArch.cpp index b999e7fd27ae9..0aa0cf5b657a0 100644 --- a/lld/ELF/Arch/LoongArch.cpp +++ b/lld/ELF/Arch/LoongArch.cpp @@ -58,6 +58,8 @@ enum Op { LD_W = 0x2880, LD_D = 0x28c0, JIRL = 0x4c00, + B = 0x5000, + BL = 0x5400, }; enum Reg { @@ -830,6 +832,37 @@ static void relaxPCHi20Lo12(Ctx &ctx, const InputSection &sec, size_t i, remove = 4; } +// Relax code sequence. +// From: +// pcaddu18i $ra, %call36(foo) +// jirl $ra, $ra, 0 +// To: +// b/bl foo +static void relaxCall36(Ctx &ctx, const InputSection &sec, size_t i, +uint64_t loc, Relocation &r, uint32_t &remove) { + const uint64_t symLocal = + (r.expr == R_PLT_PC ? r.sym->getPltVA(ctx) : r.sym->getVA(ctx)) + + r.addend; + + const int64_t distance = symLocal - loc; + // Check if the distance aligns 4 bytes or exceeds the range of b[l]. + if ((distance & 0x3) != 0 || !isInt<28>(distance)) +return; + + const uint32_t nextInsn = read32le(sec.content().data() + r.offset + 4); + if (getD5(nextInsn) == R_RA) { +// convert jirl to bl +sec.relaxAux->relocTypes[i] = R_LARCH_B26; +sec.relaxAux->writes.push_back(insn(BL, 0, 0, 0)); +remove = 4; + } else if (getD5(nextInsn) == R_ZERO) { +// convert jirl to b +sec.relaxAux->relocTypes[i] = R_LARCH_B26; +sec.relaxAux->writes.push_back(insn(B, 0, 0, 0)); +remove = 4; + } +} + static bool relax(Ctx &ctx, InputSection &sec) { const uint64_t secAddr = sec.getVA(); const MutableArrayRef relocs = sec.relocs(); @@ -874,6 +907,10 @@ static bool relax(Ctx &ctx, InputSection &sec) { if (isPairRelaxable(relocs, i)) relaxPCHi20Lo12(ctx, sec, i, loc, r, relocs[i + 2], remove); break; +case R_LARCH_CALL36: + if (relaxable(relocs, i)) +relaxCall36(ctx, sec, i, loc, r, remove); + break; } // For all anchors whose offsets are <= r.offset, they are preceded by @@ -977,6 +1014,10 @@ void LoongArch::finalizeRelax(int passes) const { // RelExpr is needed for relocating. r.expr = r.sym->hasFlag(NEEDS_PLT) ? R_PLT_PC : R_PC; break; + case R_LARCH_B26: +skip = 4; +write32le(p, aux.writes[writesIdx++]); +break; default: llvm_unreachable("unsupported type"); } >From f227ae532236e20148a872c811721a8de4e16318 Mon Sep 17 00:00:00 2001 From: yangzhaoxin Date: Fri, 27 Dec 2024 14:37:40 +0800 Subject: [PATCH 2/5] modify test for call36/tail36. --- lld/test/ELF/loongarch-relax-call36-2.s | 63 + lld/test/ELF/loongarch-relax-call36.s| 135 +++ lld/test/ELF/loongarch-relax-emit-relocs-2.s | 61 + 3 files changed, 259 insertions(+) create mode 100644 lld/test/ELF/loongarch-relax-call36-2.s create mode 100644 lld/test/ELF/loongarch-relax-call36.s create mode 100644 lld/test/ELF/loongarch-relax-emit-relocs-2.s diff --git a/lld/test/ELF/loongarch-relax-call36-2.s b/lld/test/ELF/loongarch-relax-call36-2.s new file mode 100644 index 0..1c216a9bdc35e --- /dev/null +++ b/lld/test/ELF/loongarch-relax-call36-2.s @@ -0,0 +1,63 @@ +# REQUIRES: loongarch +# RUN: rm -rf %t && split-file %s %t && cd %t +# RUN: llvm-mc -filetype=obj -triple=loongarch64 -mattr=+relax a.s -o a.o + +# RUN: ld.lld -T lds a.o -o a +# RUN: llvm-objdump -d --no-show-raw-insn a | FileCheck %s --check-prefixes=RELAX,RELAX-MID + +## Unsure whether this needs a diagnostic. GNU ld allows this. +# RUN: ld.lld -T lds -pie a.o -o a.pie +# RUN: llvm-objdump -d --no-show-raw-insn a.pie | FileCheck %s --check-prefixes=RELAX,RELAX-MID + +# RUN: ld.lld -T lds -pie -z notext -z ifunc-noplt a.o -o a.ifunc-noplt +# RUN: llvm-objdump -d --no-show-raw-insn a.ifunc-noplt | FileCheck %s --check-prefixes=RELAX,NORELAX-MID + +# RELAX-LABEL: <_start>: +## offset = 0x1000 - 0x800 = 0x800(134217728), hi=512, lo18=0 +# RELAX-NEXT:800: pcaddu18i $ra, 512 +# RELAX-NEXT: jirl $ra, $ra, 0 +# RELAX-NEXT: bl 134217720 +# RELAX-NEXT: bl -134217728 +## offset = 12 - 0x810 = -0x804(-134217732), hi=512, lo18=-4 +# RELAX-NEXT:810: pcaddu18i $ra, -512 +# RELAX-NEXT: jirl $ra, $ra, -4 +# RELAX-EMPTY: + +# RELAX-MID
[llvm-branch-commits] [lld] [lld][LoongArch] Relax call36/tail36: R_LARCH_CALL36 (PR #123576)
https://github.com/ylzsx updated https://github.com/llvm/llvm-project/pull/123576 >From f1f995b5fc8e90126b5825d52b9c75cd45d27cfc Mon Sep 17 00:00:00 2001 From: yangzhaoxin Date: Thu, 26 Dec 2024 11:32:33 +0800 Subject: [PATCH 1/5] Relax call36/tail36. Instructions with relocation `R_LARCH_CALL36` may be relax as follows: ``` From: pcaddu18i $dest, %call36(foo) R_LARCH_CALL36, R_LARCH_RELAX jirl $r, $dest, 0 To: b/bl foo # bl if r=$ra, b if r=$zero R_LARCH_B26 ``` --- lld/ELF/Arch/LoongArch.cpp | 41 ++ 1 file changed, 41 insertions(+) diff --git a/lld/ELF/Arch/LoongArch.cpp b/lld/ELF/Arch/LoongArch.cpp index b999e7fd27ae9..0aa0cf5b657a0 100644 --- a/lld/ELF/Arch/LoongArch.cpp +++ b/lld/ELF/Arch/LoongArch.cpp @@ -58,6 +58,8 @@ enum Op { LD_W = 0x2880, LD_D = 0x28c0, JIRL = 0x4c00, + B = 0x5000, + BL = 0x5400, }; enum Reg { @@ -830,6 +832,37 @@ static void relaxPCHi20Lo12(Ctx &ctx, const InputSection &sec, size_t i, remove = 4; } +// Relax code sequence. +// From: +// pcaddu18i $ra, %call36(foo) +// jirl $ra, $ra, 0 +// To: +// b/bl foo +static void relaxCall36(Ctx &ctx, const InputSection &sec, size_t i, +uint64_t loc, Relocation &r, uint32_t &remove) { + const uint64_t symLocal = + (r.expr == R_PLT_PC ? r.sym->getPltVA(ctx) : r.sym->getVA(ctx)) + + r.addend; + + const int64_t distance = symLocal - loc; + // Check if the distance aligns 4 bytes or exceeds the range of b[l]. + if ((distance & 0x3) != 0 || !isInt<28>(distance)) +return; + + const uint32_t nextInsn = read32le(sec.content().data() + r.offset + 4); + if (getD5(nextInsn) == R_RA) { +// convert jirl to bl +sec.relaxAux->relocTypes[i] = R_LARCH_B26; +sec.relaxAux->writes.push_back(insn(BL, 0, 0, 0)); +remove = 4; + } else if (getD5(nextInsn) == R_ZERO) { +// convert jirl to b +sec.relaxAux->relocTypes[i] = R_LARCH_B26; +sec.relaxAux->writes.push_back(insn(B, 0, 0, 0)); +remove = 4; + } +} + static bool relax(Ctx &ctx, InputSection &sec) { const uint64_t secAddr = sec.getVA(); const MutableArrayRef relocs = sec.relocs(); @@ -874,6 +907,10 @@ static bool relax(Ctx &ctx, InputSection &sec) { if (isPairRelaxable(relocs, i)) relaxPCHi20Lo12(ctx, sec, i, loc, r, relocs[i + 2], remove); break; +case R_LARCH_CALL36: + if (relaxable(relocs, i)) +relaxCall36(ctx, sec, i, loc, r, remove); + break; } // For all anchors whose offsets are <= r.offset, they are preceded by @@ -977,6 +1014,10 @@ void LoongArch::finalizeRelax(int passes) const { // RelExpr is needed for relocating. r.expr = r.sym->hasFlag(NEEDS_PLT) ? R_PLT_PC : R_PC; break; + case R_LARCH_B26: +skip = 4; +write32le(p, aux.writes[writesIdx++]); +break; default: llvm_unreachable("unsupported type"); } >From f227ae532236e20148a872c811721a8de4e16318 Mon Sep 17 00:00:00 2001 From: yangzhaoxin Date: Fri, 27 Dec 2024 14:37:40 +0800 Subject: [PATCH 2/5] modify test for call36/tail36. --- lld/test/ELF/loongarch-relax-call36-2.s | 63 + lld/test/ELF/loongarch-relax-call36.s| 135 +++ lld/test/ELF/loongarch-relax-emit-relocs-2.s | 61 + 3 files changed, 259 insertions(+) create mode 100644 lld/test/ELF/loongarch-relax-call36-2.s create mode 100644 lld/test/ELF/loongarch-relax-call36.s create mode 100644 lld/test/ELF/loongarch-relax-emit-relocs-2.s diff --git a/lld/test/ELF/loongarch-relax-call36-2.s b/lld/test/ELF/loongarch-relax-call36-2.s new file mode 100644 index 0..1c216a9bdc35e --- /dev/null +++ b/lld/test/ELF/loongarch-relax-call36-2.s @@ -0,0 +1,63 @@ +# REQUIRES: loongarch +# RUN: rm -rf %t && split-file %s %t && cd %t +# RUN: llvm-mc -filetype=obj -triple=loongarch64 -mattr=+relax a.s -o a.o + +# RUN: ld.lld -T lds a.o -o a +# RUN: llvm-objdump -d --no-show-raw-insn a | FileCheck %s --check-prefixes=RELAX,RELAX-MID + +## Unsure whether this needs a diagnostic. GNU ld allows this. +# RUN: ld.lld -T lds -pie a.o -o a.pie +# RUN: llvm-objdump -d --no-show-raw-insn a.pie | FileCheck %s --check-prefixes=RELAX,RELAX-MID + +# RUN: ld.lld -T lds -pie -z notext -z ifunc-noplt a.o -o a.ifunc-noplt +# RUN: llvm-objdump -d --no-show-raw-insn a.ifunc-noplt | FileCheck %s --check-prefixes=RELAX,NORELAX-MID + +# RELAX-LABEL: <_start>: +## offset = 0x1000 - 0x800 = 0x800(134217728), hi=512, lo18=0 +# RELAX-NEXT:800: pcaddu18i $ra, 512 +# RELAX-NEXT: jirl $ra, $ra, 0 +# RELAX-NEXT: bl 134217720 +# RELAX-NEXT: bl -134217728 +## offset = 12 - 0x810 = -0x804(-134217732), hi=512, lo18=-4 +# RELAX-NEXT:810: pcaddu18i $ra, -512 +# RELAX-NEXT: jirl $ra, $ra, -4 +# RELAX-EMPTY: + +# RELAX-MID
[llvm-branch-commits] [lld] [lld][LoongArch] Convert TLS IE to LE in the normal or medium code model (PR #123680)
ylzsx wrote: @MaskRay @xen0n Could you help me review the code? https://github.com/llvm/llvm-project/pull/123680 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [lld] [lld][LoongArch] Support relaxation during TLSDESC GD/LD to IE/LE conversion (PR #123730)
https://github.com/ylzsx updated https://github.com/llvm/llvm-project/pull/123730 Rate limit · GitHub body { background-color: #f6f8fa; color: #24292e; font-family: -apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji,Segoe UI Symbol; font-size: 14px; line-height: 1.5; margin: 0; } .container { margin: 50px auto; max-width: 600px; text-align: center; padding: 0 24px; } a { color: #0366d6; text-decoration: none; } a:hover { text-decoration: underline; } h1 { line-height: 60px; font-size: 48px; font-weight: 300; margin: 0px; text-shadow: 0 1px 0 #fff; } p { color: rgba(0, 0, 0, 0.5); margin: 20px 0 40px; } ul { list-style: none; margin: 25px 0; padding: 0; } li { display: table-cell; font-weight: bold; width: 1%; } .logo { display: inline-block; margin-top: 35px; } .logo-img-2x { display: none; } @media only screen and (-webkit-min-device-pixel-ratio: 2), only screen and ( min--moz-device-pixel-ratio: 2), only screen and ( -o-min-device-pixel-ratio: 2/1), only screen and (min-device-pixel-ratio: 2), only screen and (min-resolution: 192dpi), only screen and (min-resolution: 2dppx) { .logo-img-1x { display: none; } .logo-img-2x { display: inline-block; } } #suggestions { margin-top: 35px; color: #ccc; } #suggestions a { color: #66; font-weight: 200; font-size: 14px; margin: 0 10px; } Whoa there! You have exceeded a secondary rate limit. Please wait a few minutes before you try again; in some cases this may take up to an hour. https://support.github.com/contact";>Contact Support — https://githubstatus.com";>GitHub Status — https://twitter.com/githubstatus";>@githubstatus ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [lld] [lld][LoongArch] GOT indirection to PC relative optimization (PR #123743)
https://github.com/ylzsx updated https://github.com/llvm/llvm-project/pull/123743 Rate limit · GitHub body { background-color: #f6f8fa; color: #24292e; font-family: -apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji,Segoe UI Symbol; font-size: 14px; line-height: 1.5; margin: 0; } .container { margin: 50px auto; max-width: 600px; text-align: center; padding: 0 24px; } a { color: #0366d6; text-decoration: none; } a:hover { text-decoration: underline; } h1 { line-height: 60px; font-size: 48px; font-weight: 300; margin: 0px; text-shadow: 0 1px 0 #fff; } p { color: rgba(0, 0, 0, 0.5); margin: 20px 0 40px; } ul { list-style: none; margin: 25px 0; padding: 0; } li { display: table-cell; font-weight: bold; width: 1%; } .logo { display: inline-block; margin-top: 35px; } .logo-img-2x { display: none; } @media only screen and (-webkit-min-device-pixel-ratio: 2), only screen and ( min--moz-device-pixel-ratio: 2), only screen and ( -o-min-device-pixel-ratio: 2/1), only screen and (min-device-pixel-ratio: 2), only screen and (min-resolution: 192dpi), only screen and (min-resolution: 2dppx) { .logo-img-1x { display: none; } .logo-img-2x { display: inline-block; } } #suggestions { margin-top: 35px; color: #ccc; } #suggestions a { color: #66; font-weight: 200; font-size: 14px; margin: 0 10px; } Whoa there! You have exceeded a secondary rate limit. Please wait a few minutes before you try again; in some cases this may take up to an hour. https://support.github.com/contact";>Contact Support — https://githubstatus.com";>GitHub Status — https://twitter.com/githubstatus";>@githubstatus ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits