[llvm-branch-commits] [lld] [lld][LoongArch] Relax TLSDESC code sequence. (PR #123677)
https://github.com/ylzsx created https://github.com/llvm/llvm-project/pull/123677 Relax TLSDESC code sequence. Original code sequence: * pcalau12i $a0, %desc_pc_hi20(sym_desc) * addi.d $a0, $a0, %desc_pc_lo12(sym_desc) * ld.d $ra, $a0, %desc_ld(sym_desc) * jirl $ra, $ra, %desc_call(sym_desc) Cannot convert to LE/IE, but relax: * pcaddi $a0, %desc_pcrel_20(sym_desc) * ld.d $ra, $a0, %desc_ld(sym_desc) * jirl $ra, $ra, %desc_call(sym_desc) FIXME: The conversion of TLSDESC GD/LD to LE/IE will implement in a future patch. >From 56c24f9746ef42c449a4d1d5caf10f7cd1dd7d81 Mon Sep 17 00:00:00 2001 From: yangzhaoxin Date: Tue, 31 Dec 2024 15:51:43 +0800 Subject: [PATCH 1/6] Relax TLSDESC code sequence. Original code sequence: * pcalau12i $a0, %desc_pc_hi20(sym_desc) * addi.d $a0, $a0, %desc_pc_lo12(sym_desc) * ld.d $ra, $a0, %desc_ld(sym_desc) * jirl $ra, $ra, %desc_call(sym_desc) Cannot convert to LE/IE, but relax: * pcaddi $a0, %desc_pcrel_20(sym_desc) * ld.d $ra, $a0, %desc_ld(sym_desc) * jirl $ra, $ra, %desc_call(sym_desc) FIXME: The conversion of TLSDESC GD/LD to LE/IE will implement in a future patch. --- lld/ELF/Arch/LoongArch.cpp | 44 +- 1 file changed, 43 insertions(+), 1 deletion(-) diff --git a/lld/ELF/Arch/LoongArch.cpp b/lld/ELF/Arch/LoongArch.cpp index 2d6d86d2ca63b2..a6db15bbf1efd4 100644 --- a/lld/ELF/Arch/LoongArch.cpp +++ b/lld/ELF/Arch/LoongArch.cpp @@ -782,7 +782,9 @@ static void relaxPCHi20Lo12(Ctx &ctx, const InputSection &sec, size_t i, (rHi20.type == R_LARCH_TLS_GD_PC_HI20 && rLo12.type == R_LARCH_GOT_PC_LO12) || (rHi20.type == R_LARCH_TLS_LD_PC_HI20 && - rLo12.type == R_LARCH_GOT_PC_LO12))) + rLo12.type == R_LARCH_GOT_PC_LO12) || +(rHi20.type == R_LARCH_TLS_DESC_PC_HI20 && + rLo12.type == R_LARCH_TLS_DESC_PC_LO12))) return; // GOT references to absolute symbols can't be relaxed to use pcaddi in @@ -804,6 +806,8 @@ static void relaxPCHi20Lo12(Ctx &ctx, const InputSection &sec, size_t i, symBase = rHi20.sym->getVA(ctx); else if (rHi20.expr == RE_LOONGARCH_TLSGD_PAGE_PC) symBase = ctx.in.got->getGlobalDynAddr(*rHi20.sym); + else if (rHi20.expr == RE_LOONGARCH_TLSDESC_PAGE_PC) +symBase = ctx.in.got->getTlsDescAddr(*rHi20.sym); else { Err(ctx) << getErrorLoc(ctx, (const uint8_t *)loc) << "unknown expr (" << rHi20.expr << ") against symbol " << rHi20.sym @@ -837,6 +841,8 @@ static void relaxPCHi20Lo12(Ctx &ctx, const InputSection &sec, size_t i, sec.relaxAux->relocTypes[i + 2] = R_LARCH_TLS_GD_PCREL20_S2; else if (rHi20.type == R_LARCH_TLS_LD_PC_HI20) sec.relaxAux->relocTypes[i + 2] = R_LARCH_TLS_LD_PCREL20_S2; + else if (rHi20.type == R_LARCH_TLS_DESC_PC_HI20) +sec.relaxAux->relocTypes[i + 2] = R_LARCH_TLS_DESC_PCREL20_S2; else sec.relaxAux->relocTypes[i + 2] = R_LARCH_PCREL20_S2; sec.relaxAux->writes.push_back(insn(PCADDI, getD5(nextInsn), 0, 0)); @@ -903,6 +909,33 @@ static void relaxTlsLe(Ctx &ctx, const InputSection &sec, size_t i, } } +// Relax TLSDESC code sequence. In LoongArch, the conversion of TLSDESC GD/LD to +// LE/IE is closely tied to relaxation, similar to how GCC handles it. (Due to +// the lack of an efficient way for handling conversions in the extreme code +// model and the difficulty in determining whether the extreme code model is +// being used in handleTlsRelocation, this approach may seem like a workaround). +// Consequently, the resulting code sequence depends on whether the conversion +// to LE/IE is performed. +// +// Original code sequence: +// * pcalau12i $a0, %desc_pc_hi20(sym_desc) +// * addi.d $a0, $a0, %desc_pc_lo12(sym_desc) +// * ld.d $ra, $a0, %desc_ld(sym_desc) +// * jirl $ra, $ra, %desc_call(sym_desc) +// +// Cannot convert to LE/IE, but relax: +// * pcaddi $a0, %desc_pcrel_20(sym_desc) +// * ld.d $ra, $a0, %desc_ld(sym_desc) +// * jirl $ra, $ra, %desc_call(sym_desc) +// +// FIXME: Implement TLSDESC GD/LD to LE/IE. +static void relaxTlsdesc(Ctx &ctx, const InputSection &sec, size_t i, + uint64_t loc, Relocation &rHi20, Relocation &rLo12, + uint32_t &remove) { + if (ctx.arg.shared && rHi20.type == R_LARCH_TLS_DESC_PC_HI20) +return relaxPCHi20Lo12(ctx, sec, i, loc, rHi20, rLo12, remove); +} + static bool relax(Ctx &ctx, InputSection &sec) { const uint64_t secAddr = sec.getVA(); const MutableArrayRef relocs = sec.relocs(); @@ -959,6 +992,10 @@ static bool relax(Ctx &ctx, InputSection &sec) { if (relaxable(relocs, i)) relaxTlsLe(ctx, sec, i, loc, r, remove); break; +case R_LARCH_TLS_DESC_PC_HI20: + if (isPairRelaxable(relocs, i)) +relaxTlsdesc(ctx, sec, i, loc, r, relocs[i + 2], remove); + break; } // For all anchors whose of
[llvm-branch-commits] [lld] [lld][LoongArch] Relax TLSDESC code sequence. (PR #123677)
llvmbot wrote: @llvm/pr-subscribers-lld @llvm/pr-subscribers-lld-elf Author: Zhaoxin Yang (ylzsx) Changes Relax TLSDESC code sequence. Original code sequence: * pcalau12i $a0, %desc_pc_hi20(sym_desc) * addi.d $a0, $a0, %desc_pc_lo12(sym_desc) * ld.d $ra, $a0, %desc_ld(sym_desc) * jirl $ra, $ra, %desc_call(sym_desc) Cannot convert to LE/IE, but relax: * pcaddi $a0, %desc_pcrel_20(sym_desc) * ld.d $ra, $a0, %desc_ld(sym_desc) * jirl $ra, $ra, %desc_call(sym_desc) FIXME: The conversion of TLSDESC GD/LD to LE/IE will implement in a future patch. --- Full diff: https://github.com/llvm/llvm-project/pull/123677.diff 2 Files Affected: - (modified) lld/ELF/Arch/LoongArch.cpp (+17-2) - (added) lld/test/ELF/loongarch-relax-tlsdesc.s (+280) ``diff diff --git a/lld/ELF/Arch/LoongArch.cpp b/lld/ELF/Arch/LoongArch.cpp index 2d6d86d2ca63b2..ec09437404eddc 100644 --- a/lld/ELF/Arch/LoongArch.cpp +++ b/lld/ELF/Arch/LoongArch.cpp @@ -762,9 +762,12 @@ static bool isPairRelaxable(ArrayRef relocs, size_t i) { // Relax code sequence. // From: // pcalau12i $a0, %pc_hi20(sym) | %ld_pc_hi20(sym) | %gd_pc_hi20(sym) +//| %desc_pc_hi20(sym) // addi.w/d $a0, $a0, %pc_lo12(sym) | %got_pc_lo12(sym) | %got_pc_lo12(sym) +//| %desc_pc_lo12(sym) // To: -// pcaddi $a0, %pc_lo12(sym) | %got_pc_lo12(sym) | %got_pc_lo12(sym) +// pcaddi$a0, %pc_lo12(sym) | %got_pc_lo12(sym) | %got_pc_lo12(sym) +//| %desc_pcrel_20(sym) // // From: // pcalau12i $a0, %got_pc_hi20(sym_got) @@ -782,7 +785,9 @@ static void relaxPCHi20Lo12(Ctx &ctx, const InputSection &sec, size_t i, (rHi20.type == R_LARCH_TLS_GD_PC_HI20 && rLo12.type == R_LARCH_GOT_PC_LO12) || (rHi20.type == R_LARCH_TLS_LD_PC_HI20 && - rLo12.type == R_LARCH_GOT_PC_LO12))) + rLo12.type == R_LARCH_GOT_PC_LO12) || +(rHi20.type == R_LARCH_TLS_DESC_PC_HI20 && + rLo12.type == R_LARCH_TLS_DESC_PC_LO12))) return; // GOT references to absolute symbols can't be relaxed to use pcaddi in @@ -804,6 +809,8 @@ static void relaxPCHi20Lo12(Ctx &ctx, const InputSection &sec, size_t i, symBase = rHi20.sym->getVA(ctx); else if (rHi20.expr == RE_LOONGARCH_TLSGD_PAGE_PC) symBase = ctx.in.got->getGlobalDynAddr(*rHi20.sym); + else if (rHi20.expr == RE_LOONGARCH_TLSDESC_PAGE_PC) +symBase = ctx.in.got->getTlsDescAddr(*rHi20.sym); else { Err(ctx) << getErrorLoc(ctx, (const uint8_t *)loc) << "unknown expr (" << rHi20.expr << ") against symbol " << rHi20.sym @@ -837,6 +844,8 @@ static void relaxPCHi20Lo12(Ctx &ctx, const InputSection &sec, size_t i, sec.relaxAux->relocTypes[i + 2] = R_LARCH_TLS_GD_PCREL20_S2; else if (rHi20.type == R_LARCH_TLS_LD_PC_HI20) sec.relaxAux->relocTypes[i + 2] = R_LARCH_TLS_LD_PCREL20_S2; + else if (rHi20.type == R_LARCH_TLS_DESC_PC_HI20) +sec.relaxAux->relocTypes[i + 2] = R_LARCH_TLS_DESC_PCREL20_S2; else sec.relaxAux->relocTypes[i + 2] = R_LARCH_PCREL20_S2; sec.relaxAux->writes.push_back(insn(PCADDI, getD5(nextInsn), 0, 0)); @@ -945,6 +954,7 @@ static bool relax(Ctx &ctx, InputSection &sec) { case R_LARCH_GOT_PC_HI20: case R_LARCH_TLS_GD_PC_HI20: case R_LARCH_TLS_LD_PC_HI20: +case R_LARCH_TLS_DESC_PC_HI20: // The overflow check for i+2 will be carried out in isPairRelaxable. if (isPairRelaxable(relocs, i)) relaxPCHi20Lo12(ctx, sec, i, loc, r, relocs[i + 2], remove); @@ -1078,6 +1088,11 @@ void LoongArch::finalizeRelax(int passes) const { write32le(p, aux.writes[writesIdx++]); r.expr = R_TLSGD_PC; break; + case R_LARCH_TLS_DESC_PCREL20_S2: +skip = 4; +write32le(p, aux.writes[writesIdx++]); +r.expr = R_TLSDESC_PC; +break; default: llvm_unreachable("unsupported type"); } diff --git a/lld/test/ELF/loongarch-relax-tlsdesc.s b/lld/test/ELF/loongarch-relax-tlsdesc.s new file mode 100644 index 00..f9d984ad6387a3 --- /dev/null +++ b/lld/test/ELF/loongarch-relax-tlsdesc.s @@ -0,0 +1,280 @@ +# REQUIRES: loongarch +# RUN: rm -rf %t && split-file %s %t && cd %t +# RUN: llvm-mc -filetype=obj -triple=loongarch64 -mattr=+relax a.s -o a.64.o +# RUN: llvm-mc -filetype=obj -triple=loongarch64 -mattr=+relax c.s -o c.64.o +# RUN: ld.lld --relax -shared -soname=c.64.so c.64.o -o c.64.so + +## Test the TLSDESC relaxation. +# RUN: ld.lld --relax -shared -z now a.64.o c.64.o -o a.64.so +# RUN: llvm-readobj -r -x .got a.64.so | FileCheck --check-prefix=GD64-RELA %s +# RUN: llvm-objdump --no-show-raw-insn -dr -h a.64.so | FileCheck %s --check-prefix=GD64 + +## FIXME: The transition frome TLSDESC to IE/LE has not yet been implemented. +## Keep the dynamic relocations and hand them over to dynamic linker. + +# RUN: ld.lld --relax -e 0 -z now a
[llvm-branch-commits] [lld] [lld][LoongArch] Support relaxation during IE to LE conversion. (PR #123702)
https://github.com/ylzsx created https://github.com/llvm/llvm-project/pull/123702 Complement https://github.com/llvm/llvm-project/pull/123680. When relaxation enable, remove redundant NOPs. >From 83d8b7e00b16c97b11f3c19ed45bb70eeae95428 Mon Sep 17 00:00:00 2001 From: yangzhaoxin Date: Fri, 3 Jan 2025 11:15:56 +0800 Subject: [PATCH 1/3] Support relaxation during IE to LE conversion. Complement https://. When relaxation enable, remove redundant NOPs. --- lld/ELF/Arch/LoongArch.cpp | 13 +++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/lld/ELF/Arch/LoongArch.cpp b/lld/ELF/Arch/LoongArch.cpp index dc98dbec872c0c..ef25e741901d93 100644 --- a/lld/ELF/Arch/LoongArch.cpp +++ b/lld/ELF/Arch/LoongArch.cpp @@ -975,6 +975,11 @@ static bool relax(Ctx &ctx, InputSection &sec) { if (relaxable(relocs, i)) relaxTlsLe(ctx, sec, i, loc, r, remove); break; +case R_LARCH_TLS_IE_PC_HI20: + if (relaxable(relocs, i) && r.expr == R_RELAX_TLS_IE_TO_LE && + isUInt<12>(r.sym->getVA(ctx, r.addend))) +remove = 4; + break; } // For all anchors whose offsets are <= r.offset, they are preceded by @@ -1049,7 +1054,7 @@ void LoongArch::relocateAlloc(InputSectionBase &sec, uint8_t *buf) const { secAddr += s->outSecOff; else if (auto *ehIn = dyn_cast(&sec)) secAddr += ehIn->getParent()->outSecOff; - bool isExtreme = false; + bool isExtreme = false, isRelax = false; const MutableArrayRef relocs = sec.relocs(); for (size_t i = 0, size = relocs.size(); i != size; ++i) { Relocation &rel = relocs[i]; @@ -1077,8 +1082,12 @@ void LoongArch::relocateAlloc(InputSectionBase &sec, uint8_t *buf) const { val = SignExtend64(sec.getRelocTargetVA(ctx, rel, secAddr + rel.offset), bits); relocateNoSym(loc, rel.type, val); - } else + } else { +isRelax = relaxable(relocs, i); +if (isRelax && rel.type == R_LARCH_TLS_IE_PC_HI20 && isUInt<12>(val)) + continue; tlsIeToLe(loc, rel, val); + } continue; default: break; >From 6b79432dcb9172906deab445055d8197fdeed425 Mon Sep 17 00:00:00 2001 From: yangzhaoxin Date: Fri, 3 Jan 2025 11:20:10 +0800 Subject: [PATCH 2/3] Modify loongarch-relax-tls-ie.s when relaxation is enabled. --- lld/test/ELF/loongarch-relax-tls-ie.s | 28 --- 1 file changed, 25 insertions(+), 3 deletions(-) diff --git a/lld/test/ELF/loongarch-relax-tls-ie.s b/lld/test/ELF/loongarch-relax-tls-ie.s index f5375ae3a3b400..be76263f2978e0 100644 --- a/lld/test/ELF/loongarch-relax-tls-ie.s +++ b/lld/test/ELF/loongarch-relax-tls-ie.s @@ -3,11 +3,10 @@ # RUN: llvm-mc --filetype=obj --triple=loongarch64 -mattr=+relax %s -o %t.o -## FIXME: IE relaxation has not yet been implemented. -## --relax/--no-relax has the same result. Also check --emit-relocs. +## Also check --emit-relocs. # RUN: ld.lld --emit-relocs %t.o -o %t # RUN: llvm-readelf -x .got %t 2>&1 | FileCheck --check-prefix=LE-GOT %s -# RUN: llvm-objdump -dr --no-show-raw-insn %t | FileCheck --check-prefixes=LE %s +# RUN: llvm-objdump -dr --no-show-raw-insn %t | FileCheck --check-prefixes=LER %s # RUN: ld.lld --emit-relocs --no-relax %t.o -o %t.norelax # RUN: llvm-readelf -x .got %t.norelax 2>&1 | FileCheck --check-prefix=LE-GOT %s @@ -42,6 +41,29 @@ # LE-NEXT:add.d $a2, $a2, $tp # LE-NEXT:add.d $a3, $a3, $tp +# LER: 20158: ori $a0, $zero, 4095 +# LER-NEXT: R_LARCH_TLS_IE_PC_HI20 a +# LER-NEXT: R_LARCH_RELAX *ABS* +# LER-NEXT: R_LARCH_TLS_IE_PC_LO12 a +# LER-NEXT: R_LARCH_RELAX *ABS* +# LER-NEXT:add.d $a0, $a0, $tp +# LER-NEXT: 20160: lu12i.w $a1, 1 +# LER-NEXT: R_LARCH_TLS_IE_PC_HI20 b +# LER-NEXT:ori $a1, $a1, 0 +# LER-NEXT: R_LARCH_TLS_IE_PC_LO12 b +# LER-NEXT:add.d $a1, $a1, $tp +# LER-NEXT: 2016c: lu12i.w $a3, 1 +# LER-NEXT: R_LARCH_TLS_IE_PC_HI20 a +# LER-NEXT: R_LARCH_RELAX *ABS* +# LER-NEXT: R_LARCH_TLS_IE_PC_HI20 b +# LER-NEXT: R_LARCH_RELAX *ABS* +# LER-NEXT:ori $a2, $zero, 4095 +# LER-NEXT: R_LARCH_TLS_IE_PC_LO12 a +# LER-NEXT:ori $a3, $a3, 0 +# LER-NEXT: R_LARCH_TLS_IE_PC_LO12 b +# LER-NEXT:add.d $a2, $a2, $tp +# LER-NEXT:add.d $a3, $a3, $tp + la.tls.ie $a0, a# relax add.d $a0, $a0, $tp >From 9d99de87229b84f65f392dc5eb73bf1aef16fa65 Mon Sep 17 00:00:00 2001 From: yangzhaoxin Date: Fri, 17 Jan 2025 09:00:44 +0800 Subject: [PATCH 3/3] Modify test. Add --relax option. --- lld/test/ELF/loongarch-relax-tls-ie.s | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lld/test/ELF/loongarch-relax-tls-ie.s b/lld/test/ELF/loongarch-relax-tls-ie.s index be76263f2978e0..2c95a342251f20 100644 --- a/lld/test/ELF/loongarch-relax-tls-ie.s +++ b/lld/test/ELF/loongarch-relax-tls-ie.s
[llvm-branch-commits] [lld] [lld][LoongArch] Support relaxation during IE to LE conversion. (PR #123702)
llvmbot wrote: @llvm/pr-subscribers-lld Author: Zhaoxin Yang (ylzsx) Changes Complement https://github.com/llvm/llvm-project/pull/123680. When relaxation enable, remove redundant NOPs. --- Full diff: https://github.com/llvm/llvm-project/pull/123702.diff 2 Files Affected: - (modified) lld/ELF/Arch/LoongArch.cpp (+11-2) - (modified) lld/test/ELF/loongarch-relax-tls-ie.s (+26-4) ``diff diff --git a/lld/ELF/Arch/LoongArch.cpp b/lld/ELF/Arch/LoongArch.cpp index dc98dbec872c0c..ef25e741901d93 100644 --- a/lld/ELF/Arch/LoongArch.cpp +++ b/lld/ELF/Arch/LoongArch.cpp @@ -975,6 +975,11 @@ static bool relax(Ctx &ctx, InputSection &sec) { if (relaxable(relocs, i)) relaxTlsLe(ctx, sec, i, loc, r, remove); break; +case R_LARCH_TLS_IE_PC_HI20: + if (relaxable(relocs, i) && r.expr == R_RELAX_TLS_IE_TO_LE && + isUInt<12>(r.sym->getVA(ctx, r.addend))) +remove = 4; + break; } // For all anchors whose offsets are <= r.offset, they are preceded by @@ -1049,7 +1054,7 @@ void LoongArch::relocateAlloc(InputSectionBase &sec, uint8_t *buf) const { secAddr += s->outSecOff; else if (auto *ehIn = dyn_cast(&sec)) secAddr += ehIn->getParent()->outSecOff; - bool isExtreme = false; + bool isExtreme = false, isRelax = false; const MutableArrayRef relocs = sec.relocs(); for (size_t i = 0, size = relocs.size(); i != size; ++i) { Relocation &rel = relocs[i]; @@ -1077,8 +1082,12 @@ void LoongArch::relocateAlloc(InputSectionBase &sec, uint8_t *buf) const { val = SignExtend64(sec.getRelocTargetVA(ctx, rel, secAddr + rel.offset), bits); relocateNoSym(loc, rel.type, val); - } else + } else { +isRelax = relaxable(relocs, i); +if (isRelax && rel.type == R_LARCH_TLS_IE_PC_HI20 && isUInt<12>(val)) + continue; tlsIeToLe(loc, rel, val); + } continue; default: break; diff --git a/lld/test/ELF/loongarch-relax-tls-ie.s b/lld/test/ELF/loongarch-relax-tls-ie.s index f5375ae3a3b400..2c95a342251f20 100644 --- a/lld/test/ELF/loongarch-relax-tls-ie.s +++ b/lld/test/ELF/loongarch-relax-tls-ie.s @@ -3,11 +3,10 @@ # RUN: llvm-mc --filetype=obj --triple=loongarch64 -mattr=+relax %s -o %t.o -## FIXME: IE relaxation has not yet been implemented. -## --relax/--no-relax has the same result. Also check --emit-relocs. -# RUN: ld.lld --emit-relocs %t.o -o %t +## Also check --emit-relocs. +# RUN: ld.lld --emit-relocs --relax %t.o -o %t # RUN: llvm-readelf -x .got %t 2>&1 | FileCheck --check-prefix=LE-GOT %s -# RUN: llvm-objdump -dr --no-show-raw-insn %t | FileCheck --check-prefixes=LE %s +# RUN: llvm-objdump -dr --no-show-raw-insn %t | FileCheck --check-prefixes=LER %s # RUN: ld.lld --emit-relocs --no-relax %t.o -o %t.norelax # RUN: llvm-readelf -x .got %t.norelax 2>&1 | FileCheck --check-prefix=LE-GOT %s @@ -42,6 +41,29 @@ # LE-NEXT:add.d $a2, $a2, $tp # LE-NEXT:add.d $a3, $a3, $tp +# LER: 20158: ori $a0, $zero, 4095 +# LER-NEXT: R_LARCH_TLS_IE_PC_HI20 a +# LER-NEXT: R_LARCH_RELAX *ABS* +# LER-NEXT: R_LARCH_TLS_IE_PC_LO12 a +# LER-NEXT: R_LARCH_RELAX *ABS* +# LER-NEXT:add.d $a0, $a0, $tp +# LER-NEXT: 20160: lu12i.w $a1, 1 +# LER-NEXT: R_LARCH_TLS_IE_PC_HI20 b +# LER-NEXT:ori $a1, $a1, 0 +# LER-NEXT: R_LARCH_TLS_IE_PC_LO12 b +# LER-NEXT:add.d $a1, $a1, $tp +# LER-NEXT: 2016c: lu12i.w $a3, 1 +# LER-NEXT: R_LARCH_TLS_IE_PC_HI20 a +# LER-NEXT: R_LARCH_RELAX *ABS* +# LER-NEXT: R_LARCH_TLS_IE_PC_HI20 b +# LER-NEXT: R_LARCH_RELAX *ABS* +# LER-NEXT:ori $a2, $zero, 4095 +# LER-NEXT: R_LARCH_TLS_IE_PC_LO12 a +# LER-NEXT:ori $a3, $a3, 0 +# LER-NEXT: R_LARCH_TLS_IE_PC_LO12 b +# LER-NEXT:add.d $a2, $a2, $tp +# LER-NEXT:add.d $a3, $a3, $tp + la.tls.ie $a0, a# relax add.d $a0, $a0, $tp `` https://github.com/llvm/llvm-project/pull/123702 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [lld] [lld][LoongArch] Convert TLS IE to LE in the normal or medium code model. (PR #123680)
https://github.com/ylzsx updated https://github.com/llvm/llvm-project/pull/123680 >From a39c190e5c8351227178b6e5041bbd97fc6926a9 Mon Sep 17 00:00:00 2001 From: yangzhaoxin Date: Thu, 2 Jan 2025 20:58:56 +0800 Subject: [PATCH 1/5] Convert TLS IE to LE in the normal or medium code model. Original code sequence: * pcalau12i $a0, %ie_pc_hi20(sym) * ld.d $a0, $a0, %ie_pc_lo12(sym) The code sequence converted is as follows: * lu12i.w $a0, %ie_pc_hi20(sym) # le_hi20 != 0, otherwise NOP * ori $a0 $a0, %ie_pc_lo12(sym) FIXME: When relaxation enables, redundant NOP can be removed. This will be implemented in a future patch. Note: In the normal or medium code model, original code sequence with relocations can appear interleaved, because converted code sequence calculates the absolute offset. However, in extreme code model, to identify the current code model, the first four instructions with relocations must appear consecutively. --- lld/ELF/Arch/LoongArch.cpp | 87 ++ lld/ELF/Relocations.cpp| 15 ++- 2 files changed, 101 insertions(+), 1 deletion(-) diff --git a/lld/ELF/Arch/LoongArch.cpp b/lld/ELF/Arch/LoongArch.cpp index ec09437404eddc..dc98dbec872c0c 100644 --- a/lld/ELF/Arch/LoongArch.cpp +++ b/lld/ELF/Arch/LoongArch.cpp @@ -39,7 +39,11 @@ class LoongArch final : public TargetInfo { void relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const override; bool relaxOnce(int pass) const override; + void relocateAlloc(InputSectionBase &sec, uint8_t *buf) const override; void finalizeRelax(int passes) const override; + +private: + void tlsIeToLe(uint8_t *loc, const Relocation &rel, uint64_t val) const; }; } // end anonymous namespace @@ -53,6 +57,8 @@ enum Op { ADDI_W = 0x0280, ADDI_D = 0x02c0, ANDI = 0x0340, + ORI = 0x0380, + LU12I_W = 0x1400, PCADDI = 0x1800, PCADDU12I = 0x1c00, LD_W = 0x2880, @@ -1000,6 +1006,87 @@ static bool relax(Ctx &ctx, InputSection &sec) { return changed; } +// Convert TLS IE to LE in the normal or medium code model. +// Original code sequence: +// * pcalau12i $a0, %ie_pc_hi20(sym) +// * ld.d $a0, $a0, %ie_pc_lo12(sym) +// +// The code sequence converted is as follows: +// * lu12i.w $a0, %le_hi20(sym) # le_hi20 != 0, otherwise NOP +// * ori $a0 $a0, %le_lo12(sym) +// +// When relaxation enables, redundant NOPs can be removed. +void LoongArch::tlsIeToLe(uint8_t *loc, const Relocation &rel, + uint64_t val) const { + assert(isInt<32>(val) && + "val exceeds the range of medium code model in tlsIeToLe"); + + bool isUInt12 = isUInt<12>(val); + const uint32_t currInsn = read32le(loc); + switch (rel.type) { + case R_LARCH_TLS_IE_PC_HI20: +if (isUInt12) + write32le(loc, insn(ANDI, R_ZERO, R_ZERO, 0)); // nop +else + write32le(loc, insn(LU12I_W, getD5(currInsn), extractBits(val, 31, 12), + 0)); // lu12i.w $a0, %le_hi20 +break; + case R_LARCH_TLS_IE_PC_LO12: +if (isUInt12) + write32le(loc, insn(ORI, getD5(currInsn), R_ZERO, + val)); // ori $a0, $r0, %le_lo12 +else + write32le(loc, insn(ORI, getD5(currInsn), getJ5(currInsn), + lo12(val))); // ori $a0, $a0, %le_lo12 +break; + } +} + +void LoongArch::relocateAlloc(InputSectionBase &sec, uint8_t *buf) const { + const unsigned bits = ctx.arg.is64 ? 64 : 32; + uint64_t secAddr = sec.getOutputSection()->addr; + if (auto *s = dyn_cast(&sec)) +secAddr += s->outSecOff; + else if (auto *ehIn = dyn_cast(&sec)) +secAddr += ehIn->getParent()->outSecOff; + bool isExtreme = false; + const MutableArrayRef relocs = sec.relocs(); + for (size_t i = 0, size = relocs.size(); i != size; ++i) { +Relocation &rel = relocs[i]; +uint8_t *loc = buf + rel.offset; +uint64_t val = SignExtend64( +sec.getRelocTargetVA(ctx, rel, secAddr + rel.offset), bits); + +switch (rel.expr) { +case R_RELAX_HINT: + continue; +case R_RELAX_TLS_IE_TO_LE: + if (rel.type == R_LARCH_TLS_IE_PC_HI20) { +// LoongArch does not support IE to LE optimize in the extreme code +// model. In this case, the relocs are as follows: +// +// * i -- R_LARCH_TLS_IE_PC_HI20 +// * i+1 -- R_LARCH_TLS_IE_PC_LO12 +// * i+2 -- R_LARCH_TLS_IE64_PC_LO20 +// * i+3 -- R_LARCH_TLS_IE64_PC_HI12 +isExtreme = +(i + 2 < size && relocs[i + 2].type == R_LARCH_TLS_IE64_PC_LO20); + } + if (isExtreme) { +rel.expr = getRelExpr(rel.type, *rel.sym, loc); +val = SignExtend64(sec.getRelocTargetVA(ctx, rel, secAddr + rel.offset), + bits); +relocateNoSym(loc, rel.type, val); + } else +tlsIeToLe(loc, rel, val); + continue; +default: + break; +} +relocate(loc, rel, val); + } +} + //
[llvm-branch-commits] [lld] [lld][LoongArch] Convert TLS IE to LE in the normal or medium code model. (PR #123680)
https://github.com/ylzsx converted_to_draft https://github.com/llvm/llvm-project/pull/123680 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [lld] [lld][LoongArch] Convert TLS IE to LE in the normal or medium code model. (PR #123680)
https://github.com/ylzsx ready_for_review https://github.com/llvm/llvm-project/pull/123680 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Make vector_shuffle legal for v2i32 with v_pk_mov_b32 (PR #123684)
github-actions[bot] wrote: :warning: undef deprecator found issues in your code. :warning: You can test this locally with the following command: ``bash git diff -U0 --pickaxe-regex -S '([^a-zA-Z0-9#_-]undef[^a-zA-Z0-9_-]|UndefValue::get)' 2d8035aff3d44bd59f4ff3af60f87c7d6e6219ea c5caf560857f3c4f71416940a528df5ce75212bc llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h llvm/lib/Target/AMDGPU/SIISelLowering.cpp llvm/test/CodeGen/AMDGPU/shufflevector.v2f32.v2f32.ll llvm/test/CodeGen/AMDGPU/shufflevector.v2f32.v3f32.ll llvm/test/CodeGen/AMDGPU/shufflevector.v2f32.v4f32.ll llvm/test/CodeGen/AMDGPU/shufflevector.v2f32.v8f32.ll llvm/test/CodeGen/AMDGPU/shufflevector.v2i32.v2i32.ll llvm/test/CodeGen/AMDGPU/shufflevector.v2i32.v3i32.ll llvm/test/CodeGen/AMDGPU/shufflevector.v2i32.v4i32.ll llvm/test/CodeGen/AMDGPU/shufflevector.v2i32.v8i32.ll llvm/test/CodeGen/AMDGPU/shufflevector.v2p3.v2p3.ll llvm/test/CodeGen/AMDGPU/shufflevector.v2p3.v3p3.ll llvm/test/CodeGen/AMDGPU/shufflevector.v2p3.v4p3.ll llvm/test/CodeGen/AMDGPU/shufflevector.v2p3.v8p3.ll llvm/test/CodeGen/AMDGPU/shufflevector.v4i64.v3i64.ll llvm/test/CodeGen/AMDGPU/shufflevector.v4p0.v3p0.ll llvm/test/CodeGen/AMDGPU/vector_shuffle.packed.ll llvm/test/Transforms/InferAddressSpaces/AMDGPU/flat_atomic.ll `` The following files introduce new uses of undef: - llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp [Undef](https://llvm.org/docs/LangRef.html#undefined-values) is now deprecated and should only be used in the rare cases where no replacement is possible. For example, a load of uninitialized memory yields `undef`. You should use `poison` values for placeholders instead. In tests, avoid using `undef` and having tests that trigger undefined behavior. If you need an operand with some unimportant value, you can add a new argument to the function and use that instead. For example, this is considered a bad practice: ```llvm define void @fn() { ... br i1 undef, ... } ``` Please use the following instead: ```llvm define void @fn(i1 %cond) { ... br i1 %cond, ... } ``` Please refer to the [Undefined Behavior Manual](https://llvm.org/docs/UndefinedBehavior.html) for more information. https://github.com/llvm/llvm-project/pull/123684 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [lld] [lld][LoongArch] Convert TLS IE to LE in the normal or medium code model. (PR #123680)
https://github.com/ylzsx created https://github.com/llvm/llvm-project/pull/123680 Original code sequence: * pcalau12i $a0, %ie_pc_hi20(sym) * ld.d $a0, $a0, %ie_pc_lo12(sym) The code sequence converted is as follows: * lu12i.w $a0, %ie_pc_hi20(sym) # le_hi20 != 0, otherwise NOP * ori $a0 $a0, %ie_pc_lo12(sym) FIXME: When relaxation enables, redundant NOP can be removed. This will be implemented in a future patch. Note: In the normal or medium code model, original code sequence with relocations allow interleaving, because converted code sequence calculates the absolute offset. However, in extreme code model, to identify the current code model, the first four instructions with relocations must appear consecutively. >From a39c190e5c8351227178b6e5041bbd97fc6926a9 Mon Sep 17 00:00:00 2001 From: yangzhaoxin Date: Thu, 2 Jan 2025 20:58:56 +0800 Subject: [PATCH 1/4] Convert TLS IE to LE in the normal or medium code model. Original code sequence: * pcalau12i $a0, %ie_pc_hi20(sym) * ld.d $a0, $a0, %ie_pc_lo12(sym) The code sequence converted is as follows: * lu12i.w $a0, %ie_pc_hi20(sym) # le_hi20 != 0, otherwise NOP * ori $a0 $a0, %ie_pc_lo12(sym) FIXME: When relaxation enables, redundant NOP can be removed. This will be implemented in a future patch. Note: In the normal or medium code model, original code sequence with relocations can appear interleaved, because converted code sequence calculates the absolute offset. However, in extreme code model, to identify the current code model, the first four instructions with relocations must appear consecutively. --- lld/ELF/Arch/LoongArch.cpp | 87 ++ lld/ELF/Relocations.cpp| 15 ++- 2 files changed, 101 insertions(+), 1 deletion(-) diff --git a/lld/ELF/Arch/LoongArch.cpp b/lld/ELF/Arch/LoongArch.cpp index ec09437404eddc..dc98dbec872c0c 100644 --- a/lld/ELF/Arch/LoongArch.cpp +++ b/lld/ELF/Arch/LoongArch.cpp @@ -39,7 +39,11 @@ class LoongArch final : public TargetInfo { void relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const override; bool relaxOnce(int pass) const override; + void relocateAlloc(InputSectionBase &sec, uint8_t *buf) const override; void finalizeRelax(int passes) const override; + +private: + void tlsIeToLe(uint8_t *loc, const Relocation &rel, uint64_t val) const; }; } // end anonymous namespace @@ -53,6 +57,8 @@ enum Op { ADDI_W = 0x0280, ADDI_D = 0x02c0, ANDI = 0x0340, + ORI = 0x0380, + LU12I_W = 0x1400, PCADDI = 0x1800, PCADDU12I = 0x1c00, LD_W = 0x2880, @@ -1000,6 +1006,87 @@ static bool relax(Ctx &ctx, InputSection &sec) { return changed; } +// Convert TLS IE to LE in the normal or medium code model. +// Original code sequence: +// * pcalau12i $a0, %ie_pc_hi20(sym) +// * ld.d $a0, $a0, %ie_pc_lo12(sym) +// +// The code sequence converted is as follows: +// * lu12i.w $a0, %le_hi20(sym) # le_hi20 != 0, otherwise NOP +// * ori $a0 $a0, %le_lo12(sym) +// +// When relaxation enables, redundant NOPs can be removed. +void LoongArch::tlsIeToLe(uint8_t *loc, const Relocation &rel, + uint64_t val) const { + assert(isInt<32>(val) && + "val exceeds the range of medium code model in tlsIeToLe"); + + bool isUInt12 = isUInt<12>(val); + const uint32_t currInsn = read32le(loc); + switch (rel.type) { + case R_LARCH_TLS_IE_PC_HI20: +if (isUInt12) + write32le(loc, insn(ANDI, R_ZERO, R_ZERO, 0)); // nop +else + write32le(loc, insn(LU12I_W, getD5(currInsn), extractBits(val, 31, 12), + 0)); // lu12i.w $a0, %le_hi20 +break; + case R_LARCH_TLS_IE_PC_LO12: +if (isUInt12) + write32le(loc, insn(ORI, getD5(currInsn), R_ZERO, + val)); // ori $a0, $r0, %le_lo12 +else + write32le(loc, insn(ORI, getD5(currInsn), getJ5(currInsn), + lo12(val))); // ori $a0, $a0, %le_lo12 +break; + } +} + +void LoongArch::relocateAlloc(InputSectionBase &sec, uint8_t *buf) const { + const unsigned bits = ctx.arg.is64 ? 64 : 32; + uint64_t secAddr = sec.getOutputSection()->addr; + if (auto *s = dyn_cast(&sec)) +secAddr += s->outSecOff; + else if (auto *ehIn = dyn_cast(&sec)) +secAddr += ehIn->getParent()->outSecOff; + bool isExtreme = false; + const MutableArrayRef relocs = sec.relocs(); + for (size_t i = 0, size = relocs.size(); i != size; ++i) { +Relocation &rel = relocs[i]; +uint8_t *loc = buf + rel.offset; +uint64_t val = SignExtend64( +sec.getRelocTargetVA(ctx, rel, secAddr + rel.offset), bits); + +switch (rel.expr) { +case R_RELAX_HINT: + continue; +case R_RELAX_TLS_IE_TO_LE: + if (rel.type == R_LARCH_TLS_IE_PC_HI20) { +// LoongArch does not support IE to LE optimize in the extreme code +// model. In this case, the relocs are as follows: +// +/
[llvm-branch-commits] [lld] [lld][LoongArch] Convert TLS IE to LE in the normal or medium code model. (PR #123680)
llvmbot wrote: @llvm/pr-subscribers-lld Author: Zhaoxin Yang (ylzsx) Changes Original code sequence: * pcalau12i $a0, %ie_pc_hi20(sym) * ld.d $a0, $a0, %ie_pc_lo12(sym) The code sequence converted is as follows: * lu12i.w $a0, %ie_pc_hi20(sym) # le_hi20 != 0, otherwise NOP * ori $a0 $a0, %ie_pc_lo12(sym) FIXME: When relaxation enables, redundant NOP can be removed. This will be implemented in a future patch. Note: In the normal or medium code model, original code sequence with relocations allow interleaving, because converted code sequence calculates the absolute offset. However, in extreme code model, to identify the current code model, the first four instructions with relocations must appear consecutively. --- Full diff: https://github.com/llvm/llvm-project/pull/123680.diff 4 Files Affected: - (modified) lld/ELF/Arch/LoongArch.cpp (+87) - (modified) lld/ELF/Relocations.cpp (+15-1) - (added) lld/test/ELF/loongarch-relax-tls-ie.s (+70) - (modified) lld/test/ELF/loongarch-tls-ie.s (+12-18) ``diff diff --git a/lld/ELF/Arch/LoongArch.cpp b/lld/ELF/Arch/LoongArch.cpp index ec09437404eddc..dc98dbec872c0c 100644 --- a/lld/ELF/Arch/LoongArch.cpp +++ b/lld/ELF/Arch/LoongArch.cpp @@ -39,7 +39,11 @@ class LoongArch final : public TargetInfo { void relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const override; bool relaxOnce(int pass) const override; + void relocateAlloc(InputSectionBase &sec, uint8_t *buf) const override; void finalizeRelax(int passes) const override; + +private: + void tlsIeToLe(uint8_t *loc, const Relocation &rel, uint64_t val) const; }; } // end anonymous namespace @@ -53,6 +57,8 @@ enum Op { ADDI_W = 0x0280, ADDI_D = 0x02c0, ANDI = 0x0340, + ORI = 0x0380, + LU12I_W = 0x1400, PCADDI = 0x1800, PCADDU12I = 0x1c00, LD_W = 0x2880, @@ -1000,6 +1006,87 @@ static bool relax(Ctx &ctx, InputSection &sec) { return changed; } +// Convert TLS IE to LE in the normal or medium code model. +// Original code sequence: +// * pcalau12i $a0, %ie_pc_hi20(sym) +// * ld.d $a0, $a0, %ie_pc_lo12(sym) +// +// The code sequence converted is as follows: +// * lu12i.w $a0, %le_hi20(sym) # le_hi20 != 0, otherwise NOP +// * ori $a0 $a0, %le_lo12(sym) +// +// When relaxation enables, redundant NOPs can be removed. +void LoongArch::tlsIeToLe(uint8_t *loc, const Relocation &rel, + uint64_t val) const { + assert(isInt<32>(val) && + "val exceeds the range of medium code model in tlsIeToLe"); + + bool isUInt12 = isUInt<12>(val); + const uint32_t currInsn = read32le(loc); + switch (rel.type) { + case R_LARCH_TLS_IE_PC_HI20: +if (isUInt12) + write32le(loc, insn(ANDI, R_ZERO, R_ZERO, 0)); // nop +else + write32le(loc, insn(LU12I_W, getD5(currInsn), extractBits(val, 31, 12), + 0)); // lu12i.w $a0, %le_hi20 +break; + case R_LARCH_TLS_IE_PC_LO12: +if (isUInt12) + write32le(loc, insn(ORI, getD5(currInsn), R_ZERO, + val)); // ori $a0, $r0, %le_lo12 +else + write32le(loc, insn(ORI, getD5(currInsn), getJ5(currInsn), + lo12(val))); // ori $a0, $a0, %le_lo12 +break; + } +} + +void LoongArch::relocateAlloc(InputSectionBase &sec, uint8_t *buf) const { + const unsigned bits = ctx.arg.is64 ? 64 : 32; + uint64_t secAddr = sec.getOutputSection()->addr; + if (auto *s = dyn_cast(&sec)) +secAddr += s->outSecOff; + else if (auto *ehIn = dyn_cast(&sec)) +secAddr += ehIn->getParent()->outSecOff; + bool isExtreme = false; + const MutableArrayRef relocs = sec.relocs(); + for (size_t i = 0, size = relocs.size(); i != size; ++i) { +Relocation &rel = relocs[i]; +uint8_t *loc = buf + rel.offset; +uint64_t val = SignExtend64( +sec.getRelocTargetVA(ctx, rel, secAddr + rel.offset), bits); + +switch (rel.expr) { +case R_RELAX_HINT: + continue; +case R_RELAX_TLS_IE_TO_LE: + if (rel.type == R_LARCH_TLS_IE_PC_HI20) { +// LoongArch does not support IE to LE optimize in the extreme code +// model. In this case, the relocs are as follows: +// +// * i -- R_LARCH_TLS_IE_PC_HI20 +// * i+1 -- R_LARCH_TLS_IE_PC_LO12 +// * i+2 -- R_LARCH_TLS_IE64_PC_LO20 +// * i+3 -- R_LARCH_TLS_IE64_PC_HI12 +isExtreme = +(i + 2 < size && relocs[i + 2].type == R_LARCH_TLS_IE64_PC_LO20); + } + if (isExtreme) { +rel.expr = getRelExpr(rel.type, *rel.sym, loc); +val = SignExtend64(sec.getRelocTargetVA(ctx, rel, secAddr + rel.offset), + bits); +relocateNoSym(loc, rel.type, val); + } else +tlsIeToLe(loc, rel, val); + continue; +default: + break; +} +relocate(loc, rel, val); + } +} + // When relaxing just R_LARCH_ALIGN, relocDeltas is usually
[llvm-branch-commits] [llvm] AMDGPU: Make vector_shuffle legal for v2i32 with v_pk_mov_b32 (PR #123684)
llvmbot wrote: @llvm/pr-subscribers-backend-amdgpu Author: Matt Arsenault (arsenm) Changes For VALU shuffles, this saves an instruction in some case. --- Patch is 285.82 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/123684.diff 19 Files Affected: - (modified) llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp (+114) - (modified) llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h (+1) - (modified) llvm/lib/Target/AMDGPU/SIISelLowering.cpp (+7) - (modified) llvm/test/CodeGen/AMDGPU/shufflevector.v2f32.v2f32.ll (+21-28) - (modified) llvm/test/CodeGen/AMDGPU/shufflevector.v2f32.v3f32.ll (+17-23) - (modified) llvm/test/CodeGen/AMDGPU/shufflevector.v2f32.v4f32.ll (+34-50) - (modified) llvm/test/CodeGen/AMDGPU/shufflevector.v2f32.v8f32.ll (+112-160) - (modified) llvm/test/CodeGen/AMDGPU/shufflevector.v2i32.v2i32.ll (+21-28) - (modified) llvm/test/CodeGen/AMDGPU/shufflevector.v2i32.v3i32.ll (+17-23) - (modified) llvm/test/CodeGen/AMDGPU/shufflevector.v2i32.v4i32.ll (+34-50) - (modified) llvm/test/CodeGen/AMDGPU/shufflevector.v2i32.v8i32.ll (+112-160) - (modified) llvm/test/CodeGen/AMDGPU/shufflevector.v2p3.v2p3.ll (+21-28) - (modified) llvm/test/CodeGen/AMDGPU/shufflevector.v2p3.v3p3.ll (+17-23) - (modified) llvm/test/CodeGen/AMDGPU/shufflevector.v2p3.v4p3.ll (+34-50) - (modified) llvm/test/CodeGen/AMDGPU/shufflevector.v2p3.v8p3.ll (+112-160) - (modified) llvm/test/CodeGen/AMDGPU/shufflevector.v4i64.v3i64.ll (+500-287) - (modified) llvm/test/CodeGen/AMDGPU/shufflevector.v4p0.v3p0.ll (+500-287) - (modified) llvm/test/CodeGen/AMDGPU/vector_shuffle.packed.ll (+48-48) - (modified) llvm/test/Transforms/InferAddressSpaces/AMDGPU/flat_atomic.ll (+1-2) ``diff diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp index 6d5c3b5e0742b3..8d03fde8911242 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -489,6 +489,90 @@ void AMDGPUDAGToDAGISel::SelectBuildVector(SDNode *N, unsigned RegClassID) { CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(), RegSeqArgs); } +void AMDGPUDAGToDAGISel::SelectVectorShuffle(SDNode *N) { + EVT VT = N->getValueType(0); + EVT EltVT = VT.getVectorElementType(); + + // TODO: Handle 16-bit element vectors with even aligned masks. + if (!Subtarget->hasPkMovB32() || !EltVT.bitsEq(MVT::i32) || + VT.getVectorNumElements() != 2) { +SelectCode(N); +return; + } + + auto *SVN = cast(N); + + SDValue Src0 = SVN->getOperand(0); + SDValue Src1 = SVN->getOperand(1); + ArrayRef Mask = SVN->getMask(); + SDLoc DL(N); + + assert(Src0.getValueType().getVectorNumElements() == 2 && Mask.size() == 2 && + Mask[0] < 4 && Mask[1] < 4); + + SDValue VSrc0 = Mask[0] < 2 ? Src0 : Src1; + SDValue VSrc1 = Mask[1] < 2 ? Src0 : Src1; + unsigned Src0SubReg = Mask[0] & 1 ? AMDGPU::sub1 : AMDGPU::sub0; + unsigned Src1SubReg = Mask[1] & 1 ? AMDGPU::sub1 : AMDGPU::sub0; + + if (Mask[0] < 0) { +Src0SubReg = Src1SubReg; +MachineSDNode *ImpDef = +CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, VT); +VSrc0 = SDValue(ImpDef, 0); + } + + if (Mask[1] < 0) { +Src1SubReg = Src0SubReg; +MachineSDNode *ImpDef = +CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, VT); +VSrc1 = SDValue(ImpDef, 0); + } + + // SGPR case needs to lower to copies. + // + // Also use subregister extract when we can directly blend the registers with + // a simple subregister copy. + // + // TODO: Maybe we should fold this out earlier + if (N->isDivergent() && Src0SubReg == AMDGPU::sub1 && + Src1SubReg == AMDGPU::sub0) { +// The low element of the result always comes from src0. +// The high element of the result always comes from src1. +// op_sel selects the high half of src0. +// op_sel_hi selects the high half of src1. + +unsigned Src0OpSel = +Src0SubReg == AMDGPU::sub1 ? SISrcMods::OP_SEL_0 : SISrcMods::NONE; +unsigned Src1OpSel = +Src1SubReg == AMDGPU::sub1 ? SISrcMods::OP_SEL_0 : SISrcMods::NONE; + +SDValue Src0OpSelVal = CurDAG->getTargetConstant(Src0OpSel, DL, MVT::i32); +SDValue Src1OpSelVal = CurDAG->getTargetConstant(Src1OpSel, DL, MVT::i32); +SDValue ZeroMods = CurDAG->getTargetConstant(0, DL, MVT::i32); + +CurDAG->SelectNodeTo(N, AMDGPU::V_PK_MOV_B32, N->getVTList(), + {Src0OpSelVal, VSrc0, Src1OpSelVal, VSrc1, + ZeroMods, // clamp + ZeroMods, // op_sel + ZeroMods, // op_sel_hi + ZeroMods, // neg_lo + ZeroMods}); // neg_hi +return; + } + + SDValue ResultElt0 = + CurDAG->getTargetExtractSubreg(Src0SubReg, DL, EltVT, VSrc0); + SDValue ResultElt1 = + CurDAG->getTargetExtractSubreg(Src1SubReg, DL, EltVT, VSrc1); + + const SDValu
[llvm-branch-commits] [llvm] AMDGPU: Make vector_shuffle legal for v2i32 with v_pk_mov_b32 (PR #123684)
arsenm wrote: > [!WARNING] > This pull request is not mergeable via GitHub because a downstack PR is > open. Once all requirements are satisfied, merge this PR as a stack href="https://app.graphite.dev/github/pr/llvm/llvm-project/123684?utm_source=stack-comment-downstack-mergeability-warning"; > >on Graphite. > https://graphite.dev/docs/merge-pull-requests";>Learn more * **#123684** https://app.graphite.dev/github/pr/llvm/llvm-project/123684?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> 👈 https://app.graphite.dev/github/pr/llvm/llvm-project/123684?utm_source=stack-comment-view-in-graphite"; target="_blank">(View in Graphite) * **#123596** https://app.graphite.dev/github/pr/llvm/llvm-project/123596?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * `main` This stack of pull requests is managed by https://graphite.dev?utm-source=stack-comment";>Graphite. Learn more about https://stacking.dev/?utm_source=stack-comment";>stacking. https://github.com/llvm/llvm-project/pull/123684 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Make vector_shuffle legal for v2i32 with v_pk_mov_b32 (PR #123684)
https://github.com/arsenm ready_for_review https://github.com/llvm/llvm-project/pull/123684 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [Flang] Optionally do not compile the runtime in-tree (PR #122336)
https://github.com/Meinersbur updated https://github.com/llvm/llvm-project/pull/122336 >From dd3ac2e6d8d8d57cd639c25bea3b8d5c99a2f81e Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Thu, 9 Jan 2025 15:58:48 +0100 Subject: [PATCH 1/9] Introduce FLANG_INCLUDE_RUNTIME --- flang/CMakeLists.txt| 7 +++- flang/test/CMakeLists.txt | 6 +++- flang/test/Driver/ctofortran.f90| 1 + flang/test/Driver/exec.f90 | 1 + flang/test/Runtime/no-cpp-dep.c | 2 +- flang/test/lit.cfg.py | 5 ++- flang/test/lit.site.cfg.py.in | 1 + flang/tools/f18/CMakeLists.txt | 4 +-- flang/unittests/CMakeLists.txt | 6 ++-- flang/unittests/Evaluate/CMakeLists.txt | 46 ++--- 10 files changed, 50 insertions(+), 29 deletions(-) diff --git a/flang/CMakeLists.txt b/flang/CMakeLists.txt index 68947eaa9c9bd7..69e963a43d0b97 100644 --- a/flang/CMakeLists.txt +++ b/flang/CMakeLists.txt @@ -233,6 +233,9 @@ else() include_directories(SYSTEM ${MLIR_TABLEGEN_OUTPUT_DIR}) endif() +option(FLANG_INCLUDE_RUNTIME "Build the runtime in-tree (deprecated; to be replaced with LLVM_ENABLE_RUNTIMES=flang-rt)" ON) +pythonize_bool(FLANG_INCLUDE_RUNTIME) + set(FLANG_TOOLS_INSTALL_DIR "${CMAKE_INSTALL_BINDIR}" CACHE PATH "Path for binary subdirectory (defaults to '${CMAKE_INSTALL_BINDIR}')") mark_as_advanced(FLANG_TOOLS_INSTALL_DIR) @@ -473,7 +476,9 @@ if (FLANG_CUF_RUNTIME) find_package(CUDAToolkit REQUIRED) endif() -add_subdirectory(runtime) +if (FLANG_INCLUDE_RUNTIME) + add_subdirectory(runtime) +endif () if (LLVM_INCLUDE_EXAMPLES) add_subdirectory(examples) diff --git a/flang/test/CMakeLists.txt b/flang/test/CMakeLists.txt index cab214c2ef4c8c..e398e0786147aa 100644 --- a/flang/test/CMakeLists.txt +++ b/flang/test/CMakeLists.txt @@ -71,9 +71,13 @@ set(FLANG_TEST_DEPENDS llvm-objdump llvm-readobj split-file - FortranRuntime FortranDecimal ) + +if (FLANG_INCLUDE_RUNTIME) + list(APPEND FLANG_TEST_DEPENDS FortranRuntime) +endif () + if (LLVM_ENABLE_PLUGINS AND NOT WIN32) list(APPEND FLANG_TEST_DEPENDS Bye) endif() diff --git a/flang/test/Driver/ctofortran.f90 b/flang/test/Driver/ctofortran.f90 index 78eac32133b18e..10c7adaccc9588 100644 --- a/flang/test/Driver/ctofortran.f90 +++ b/flang/test/Driver/ctofortran.f90 @@ -1,4 +1,5 @@ ! UNSUPPORTED: system-windows +! REQUIRES: flang-rt ! RUN: split-file %s %t ! RUN: chmod +x %t/runtest.sh ! RUN: %t/runtest.sh %t %t/ffile.f90 %t/cfile.c %flang | FileCheck %s diff --git a/flang/test/Driver/exec.f90 b/flang/test/Driver/exec.f90 index fd174005ddf62a..9ca91ee24011c9 100644 --- a/flang/test/Driver/exec.f90 +++ b/flang/test/Driver/exec.f90 @@ -1,4 +1,5 @@ ! UNSUPPORTED: system-windows +! REQUIRES: flang-rt ! Verify that flang can correctly build executables. ! RUN: %flang %s -o %t diff --git a/flang/test/Runtime/no-cpp-dep.c b/flang/test/Runtime/no-cpp-dep.c index b1a5fa004014cc..7303ce63fdec41 100644 --- a/flang/test/Runtime/no-cpp-dep.c +++ b/flang/test/Runtime/no-cpp-dep.c @@ -3,7 +3,7 @@ This test makes sure that flang's runtime does not depend on the C++ runtime library. It tries to link this simple file against libFortranRuntime.a with a C compiler. -REQUIRES: c-compiler +REQUIRES: c-compiler, flang-rt RUN: %if system-aix %{ export OBJECT_MODE=64 %} RUN: %cc -std=c99 %s -I%include %libruntime -lm \ diff --git a/flang/test/lit.cfg.py b/flang/test/lit.cfg.py index c452b6d231c89f..78378bf5f413e8 100644 --- a/flang/test/lit.cfg.py +++ b/flang/test/lit.cfg.py @@ -163,10 +163,13 @@ ToolSubst("%not_todo_abort_cmd", command=FindTool("not"), unresolved="fatal") ) +if config.flang_include_runtime: + config.available_features.add("flang-rt") + # Define some variables to help us test that the flang runtime doesn't depend on # the C++ runtime libraries. For this we need a C compiler. If for some reason # we don't have one, we can just disable the test. -if config.cc: +if config.flang_include_runtime and config.cc: libruntime = os.path.join(config.flang_lib_dir, "libFortranRuntime.a") include = os.path.join(config.flang_src_dir, "include") diff --git a/flang/test/lit.site.cfg.py.in b/flang/test/lit.site.cfg.py.in index d1a0ac763cf8a0..19f9330f93ae14 100644 --- a/flang/test/lit.site.cfg.py.in +++ b/flang/test/lit.site.cfg.py.in @@ -32,6 +32,7 @@ else: config.openmp_module_dir = None config.flang_runtime_f128_math_lib = "@FLANG_RUNTIME_F128_MATH_LIB@" config.have_ldbl_mant_dig_113 = "@HAVE_LDBL_MANT_DIG_113@" +config.flang_include_runtime = @FLANG_INCLUDE_RUNTIME@ import lit.llvm lit.llvm.initialize(lit_config, config) diff --git a/flang/tools/f18/CMakeLists.txt b/flang/tools/f18/CMakeLists.txt index 4362fcf0537616..022c346aabdbde 100644 --- a/flang/tools/f18/CMakeLists.txt +++ b/flang/tools/f18/CMakeLists.txt @@ -72,7 +72,7 @@ if (NOT CMAKE_CROSSCOMPILING) set(depends ${FLANG_
[llvm-branch-commits] [flang] [Flang] Promote FortranEvaluateTesting library (PR #122334)
@@ -1,47 +1,34 @@ set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) -add_library(FortranEvaluateTesting Meinersbur wrote: @jeanPerier I added the change to #122336: 8732b2771bd422b939da120c5425b82798ee95f2 Thanks. https://github.com/llvm/llvm-project/pull/122334 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [llvm] [Flang-RT] Build libflang_rt.so (PR #121782)
@@ -8,6 +8,12 @@ macro(enable_cuda_compilation name files) if (FLANG_RT_EXPERIMENTAL_OFFLOAD_SUPPORT STREQUAL "CUDA") +if (NOT FLANG_RT_ENABLE_STATIC) + message(FATAL_ERROR +"FLANG_RT_ENABLE_STATIC is required for CUDA build of Flang-RT" +) +endif() + enable_language(CUDA) set_target_properties(${name} Meinersbur wrote: Should be fixed in 0450959120e5ebae63b1e26b8232e7193c403099. Noticed myself when testing all the build configuratons. Unfortuntelty ALIAS targets do not solve everything, they cannot be used to modify a target. https://github.com/llvm/llvm-project/pull/121782 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] ddb9cc5 - Revert "Revert "Revert "Revert "[Flang][Driver] Add a flag to control zero in…"
Author: Kiran Chandramohan Date: 2025-01-20T12:43:24Z New Revision: ddb9cc530ec0667c3b4207d13bc6ec26f64241d4 URL: https://github.com/llvm/llvm-project/commit/ddb9cc530ec0667c3b4207d13bc6ec26f64241d4 DIFF: https://github.com/llvm/llvm-project/commit/ddb9cc530ec0667c3b4207d13bc6ec26f64241d4.diff LOG: Revert "Revert "Revert "Revert "[Flang][Driver] Add a flag to control zero in…" This reverts commit 8a229f595a5c0ff354cdfa05cda974a9d56674df. Added: flang/test/Driver/fno-zero-init.f90 flang/test/Lower/zero_init.f90 flang/test/Lower/zero_init_default_init.f90 Modified: clang/include/clang/Driver/Options.td clang/lib/Driver/ToolChains/Flang.cpp flang/include/flang/Lower/LoweringOptions.def flang/lib/Frontend/CompilerInvocation.cpp flang/lib/Lower/ConvertVariable.cpp flang/tools/bbc/bbc.cpp Removed: diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index d38dd2b4e3cf09..c4b9743597bb2e 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -3505,6 +3505,11 @@ def fno_struct_path_tbaa : Flag<["-"], "fno-struct-path-tbaa">, Group; def fno_strict_enums : Flag<["-"], "fno-strict-enums">, Group; def fno_strict_overflow : Flag<["-"], "fno-strict-overflow">, Group, Visibility<[ClangOption, FlangOption]>; +defm init_global_zero : BoolOptionWithoutMarshalling<"f", "init-global-zero", + PosFlag, + NegFlag>; def fno_pointer_tbaa : Flag<["-"], "fno-pointer-tbaa">, Group; def fno_temp_file : Flag<["-"], "fno-temp-file">, Group, Visibility<[ClangOption, CC1Option, CLOption, DXCOption]>, HelpText< diff --git a/clang/lib/Driver/ToolChains/Flang.cpp b/clang/lib/Driver/ToolChains/Flang.cpp index 86ed25badfa2b7..9c1fd28a3a8a26 100644 --- a/clang/lib/Driver/ToolChains/Flang.cpp +++ b/clang/lib/Driver/ToolChains/Flang.cpp @@ -155,8 +155,10 @@ void Flang::addCodegenOptions(const ArgList &Args, options::OPT_flang_deprecated_no_hlfir, options::OPT_fno_ppc_native_vec_elem_order, options::OPT_fppc_native_vec_elem_order, - options::OPT_ftime_report, options::OPT_ftime_report_EQ, - options::OPT_funroll_loops, options::OPT_fno_unroll_loops}); + options::OPT_finit_global_zero, + options::OPT_fno_init_global_zero, options::OPT_ftime_report, + options::OPT_ftime_report_EQ, options::OPT_funroll_loops, + options::OPT_fno_unroll_loops}); } void Flang::addPicOptions(const ArgList &Args, ArgStringList &CmdArgs) const { diff --git a/flang/include/flang/Lower/LoweringOptions.def b/flang/include/flang/Lower/LoweringOptions.def index 5a6debfdffe030..396c91948be36b 100644 --- a/flang/include/flang/Lower/LoweringOptions.def +++ b/flang/include/flang/Lower/LoweringOptions.def @@ -44,5 +44,8 @@ ENUM_LOWERINGOPT(IntegerWrapAround, unsigned, 1, 0) /// If false, assume that the shapes/types/allocation-status match. ENUM_LOWERINGOPT(ReallocateLHS, unsigned, 1, 1) +/// If true, initialize globals without initialization to zero. +/// On by default. +ENUM_LOWERINGOPT(InitGlobalZero, unsigned, 1, 1) #undef LOWERINGOPT #undef ENUM_LOWERINGOPT diff --git a/flang/lib/Frontend/CompilerInvocation.cpp b/flang/lib/Frontend/CompilerInvocation.cpp index 15b1e1e0a24881..3c6da4687f65d3 100644 --- a/flang/lib/Frontend/CompilerInvocation.cpp +++ b/flang/lib/Frontend/CompilerInvocation.cpp @@ -1377,6 +1377,14 @@ bool CompilerInvocation::createFromArgs( invoc.loweringOpts.setNoPPCNativeVecElemOrder(true); } + // -f[no-]init-global-zero + if (args.hasFlag(clang::driver::options::OPT_finit_global_zero, + clang::driver::options::OPT_fno_init_global_zero, + /*default=*/true)) +invoc.loweringOpts.setInitGlobalZero(true); + else +invoc.loweringOpts.setInitGlobalZero(false); + // Preserve all the remark options requested, i.e. -Rpass, -Rpass-missed or // -Rpass-analysis. This will be used later when processing and outputting the // remarks generated by LLVM in ExecuteCompilerInvocation.cpp. diff --git a/flang/lib/Lower/ConvertVariable.cpp b/flang/lib/Lower/ConvertVariable.cpp index 9ee42d5cd88002..87236dc293ebbc 100644 --- a/flang/lib/Lower/ConvertVariable.cpp +++ b/flang/lib/Lower/ConvertVariable.cpp @@ -635,7 +635,11 @@ static fir::GlobalOp defineGlobal(Fortran::lower::AbstractConverter &converter, global.setLinkName(builder.createCommonLinkage()); Fortran::lower::createGlobalInitialization( builder, global, [&](fir::FirOpBuilder &builder) { - mlir::Value initValue = builder.create(loc, symTy); + mlir::Value initValue; + if (converter.getLoweringOptions().getInitGlobalZero()) +initValue = builder.create(loc, symTy); + else +
[llvm-branch-commits] [clang] [Driver] Change linker job in Baremetal toolchain object accomodate GCCInstallation.(2/3) (PR #121830)
https://github.com/quic-garvgupt updated https://github.com/llvm/llvm-project/pull/121830 >From 8fa748d05c4f3464427df5cc117c196a5006e5a9 Mon Sep 17 00:00:00 2001 From: Garvit Gupta Date: Mon, 6 Jan 2025 09:21:11 -0800 Subject: [PATCH] [RISCV] Change linker job in Baremetal toolchain object to accomodate valid GCCInstallation.(2/3) This patch adds the defaults for CXXSstdlib type and other runtime libs. Additionally, this patch also modifes the linker job and extend it to support valid GCCInstallation. This PR preserves the behavior of both toolchain objects in the linker job with the only new change being that the presence of `--gcc-toolchain` or `--gcc-install-dir` will imply that GNU linker is the default linker unless otherwise a differnt linker is passed through `-fuse-ld` flag. This is the second PR in the series of 3 PRs for merging and extending Baremetal toolchain object. The division of the PRs is as follows: - Teach Baremetal toolchain about GCC installation and make sysroot and assembler related changes. - Changes related to linker job and defaults for CXXStdlib and other runtime libs. - Finally removing the call to RISCVToolchain object. RFC: https://discourse.llvm.org/t/merging-riscvtoolchain-and-baremetal-toolchains/75524 Change-Id: I8fdb3490a3888001b1bb999e7ee8df90a187d18d --- clang/lib/Driver/ToolChains/BareMetal.cpp | 110 +++- clang/lib/Driver/ToolChains/BareMetal.h | 20 ++-- clang/test/Driver/aarch64-toolchain-extra.c | 6 ++ clang/test/Driver/aarch64-toolchain.c | 73 + clang/test/Driver/arm-toolchain-extra.c | 7 +- clang/test/Driver/arm-toolchain.c | 77 +- clang/test/Driver/baremetal-multilib.yaml | 2 +- clang/test/Driver/baremetal-sysroot.cpp | 2 +- clang/test/Driver/baremetal.cpp | 77 +++--- clang/test/Driver/sanitizer-ld.c| 2 +- 10 files changed, 298 insertions(+), 78 deletions(-) diff --git a/clang/lib/Driver/ToolChains/BareMetal.cpp b/clang/lib/Driver/ToolChains/BareMetal.cpp index 7b0f2bc2fd3895..38de25e20ea8df 100644 --- a/clang/lib/Driver/ToolChains/BareMetal.cpp +++ b/clang/lib/Driver/ToolChains/BareMetal.cpp @@ -172,6 +172,8 @@ BareMetal::BareMetal(const Driver &D, const llvm::Triple &Triple, : Generic_ELF(D, Triple, Args) { GCCInstallation.init(Triple, Args); SysRoot = computeSysRoot(); + UseLD = + Args.getLastArgValue(options::OPT_fuse_ld_EQ).equals_insensitive("ld"); if (GCCInstallation.isValid()) { Multilibs = GCCInstallation.getMultilibs(); SelectedMultilibs.assign({GCCInstallation.getMultilib()}); @@ -342,6 +344,32 @@ BareMetal::OrderedMultilibs BareMetal::getOrderedMultilibs() const { return llvm::reverse(Default); } +ToolChain::CXXStdlibType BareMetal::GetDefaultCXXStdlibType() const { + if (getTriple().isRISCV() && GCCInstallation.isValid()) +return ToolChain::CST_Libstdcxx; + return ToolChain::CST_Libcxx; +} + +ToolChain::RuntimeLibType BareMetal::GetDefaultRuntimeLibType() const { + if (getTriple().isRISCV() && GCCInstallation.isValid()) +return ToolChain::RLT_Libgcc; + return ToolChain::RLT_CompilerRT; +} + +ToolChain::UnwindLibType +BareMetal::GetUnwindLibType(const llvm::opt::ArgList &Args) const { + if (getTriple().isRISCV()) +return ToolChain::UNW_None; + + return ToolChain::GetUnwindLibType(Args); +} + +const char *BareMetal::getDefaultLinker() const { + if (isUsingLD()) +return "ld"; + return "ld.lld"; +} + void BareMetal::AddClangSystemIncludeArgs(const ArgList &DriverArgs, ArgStringList &CC1Args) const { if (DriverArgs.hasArg(options::OPT_nostdinc)) @@ -535,12 +563,21 @@ void baremetal::Linker::ConstructJob(Compilation &C, const JobAction &JA, const llvm::Triple::ArchType Arch = TC.getArch(); const llvm::Triple &Triple = getToolChain().getEffectiveTriple(); - AddLinkerInputs(TC, Inputs, Args, CmdArgs, JA); + if (!D.SysRoot.empty()) +CmdArgs.push_back(Args.MakeArgString("--sysroot=" + D.SysRoot)); CmdArgs.push_back("-Bstatic"); - if (TC.getTriple().isRISCV() && Args.hasArg(options::OPT_mno_relax)) -CmdArgs.push_back("--no-relax"); + if (Triple.isRISCV()) { +if (Args.hasArg(options::OPT_mno_relax)) + CmdArgs.push_back("--no-relax"); +if (TC.isUsingLD()) { + CmdArgs.push_back("-m"); + CmdArgs.push_back(Arch == llvm::Triple::riscv64 ? "elf64lriscv" + : "elf32lriscv"); +} +CmdArgs.push_back("-X"); + } if (Triple.isARM() || Triple.isThumb()) { bool IsBigEndian = arm::isARMBigEndian(Triple, Args); @@ -551,19 +588,54 @@ void baremetal::Linker::ConstructJob(Compilation &C, const JobAction &JA, CmdArgs.push_back(Arch == llvm::Triple::aarch64_be ? "-EB" : "-EL"); } - if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles, - options::OPT_r)) { -CmdArgs.pu
[llvm-branch-commits] [clang] [Driver][RISCV] Integrate RISCV target in baremetal toolchain object and deprecate RISCVToolchain object.(3/3) (PR #121831)
https://github.com/quic-garvgupt updated https://github.com/llvm/llvm-project/pull/121831 >From 87abf1b761051e3f2cd22d64ac8104bb6a5412ab Mon Sep 17 00:00:00 2001 From: Garvit Gupta Date: Mon, 6 Jan 2025 10:05:08 -0800 Subject: [PATCH] [RISCV] Integrate RISCV target in baremetal toolchain object and deprecate RISCVToolchain object.(3/3) This PR is last in the series of merging RISCVToolchain object into BareMetal toolchain object. This PR make change to call BareMetal toolchain object for riscv targets as well and remove RISCVToolChain object. The division of the PRs is as follows: - Teach Baremetal toolchain about GCC installation and make sysroot and assembler related changes. - Changes related to linker job and defaults for CXXStdlib and other runtime libs. - Finally removing the call to RISCVToolchain object. RFC: https://discourse.llvm.org/t/merging-riscvtoolchain-and-baremetal-toolchains/75524 Change-Id: I2877ac328f55a7638cc185d6034866cbd2ac4203 --- clang/lib/Driver/CMakeLists.txt | 1 - clang/lib/Driver/Driver.cpp | 7 +- clang/test/Driver/riscv-args.c | 2 +- clang/test/Driver/riscv32-toolchain-extra.c | 7 +++--- clang/test/Driver/riscv32-toolchain.c | 26 ++--- clang/test/Driver/riscv64-toolchain-extra.c | 7 +++--- clang/test/Driver/riscv64-toolchain.c | 20 7 files changed, 31 insertions(+), 39 deletions(-) diff --git a/clang/lib/Driver/CMakeLists.txt b/clang/lib/Driver/CMakeLists.txt index 5bdb6614389cff..eee29af5d181a1 100644 --- a/clang/lib/Driver/CMakeLists.txt +++ b/clang/lib/Driver/CMakeLists.txt @@ -74,7 +74,6 @@ add_clang_library(clangDriver ToolChains/OHOS.cpp ToolChains/OpenBSD.cpp ToolChains/PS4CPU.cpp - ToolChains/RISCVToolchain.cpp ToolChains/Solaris.cpp ToolChains/SPIRV.cpp ToolChains/SPIRVOpenMP.cpp diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index 36d6c93c43321f..4dda9a34b08d99 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -41,7 +41,6 @@ #include "ToolChains/PPCFreeBSD.h" #include "ToolChains/PPCLinux.h" #include "ToolChains/PS4CPU.h" -#include "ToolChains/RISCVToolchain.h" #include "ToolChains/SPIRV.h" #include "ToolChains/SPIRVOpenMP.h" #include "ToolChains/SYCL.h" @@ -6665,11 +6664,7 @@ const ToolChain &Driver::getToolChain(const ArgList &Args, break; case llvm::Triple::riscv32: case llvm::Triple::riscv64: -if (toolchains::RISCVToolChain::hasGCCToolchain(*this, Args)) - TC = - std::make_unique(*this, Target, Args); -else - TC = std::make_unique(*this, Target, Args); +TC = std::make_unique(*this, Target, Args); break; case llvm::Triple::ve: TC = std::make_unique(*this, Target, Args); diff --git a/clang/test/Driver/riscv-args.c b/clang/test/Driver/riscv-args.c index cab08e5b0f811e..fc35407baf2cc6 100644 --- a/clang/test/Driver/riscv-args.c +++ b/clang/test/Driver/riscv-args.c @@ -3,4 +3,4 @@ // Make sure -T is the last with gcc-toolchain option // RUN: %clang -### --target=riscv32 --gcc-toolchain= -Xlinker --defsym=FOO=10 -T a.lds -u foo %s 2>&1 \ // RUN: | FileCheck -check-prefix=CHECK-LD %s -// CHECK-LD: {{.*}} "--defsym=FOO=10" {{.*}} "-u" "foo" {{.*}} "-T" "a.lds" +// CHECK-LD: {{.*}} "-T" "a.lds" "-u" "foo" {{.*}} "--defsym=FOO=10" diff --git a/clang/test/Driver/riscv32-toolchain-extra.c b/clang/test/Driver/riscv32-toolchain-extra.c index cbb3c23ebb3421..420f7b52036090 100644 --- a/clang/test/Driver/riscv32-toolchain-extra.c +++ b/clang/test/Driver/riscv32-toolchain-extra.c @@ -18,12 +18,12 @@ // RUN: ln -s %S/Inputs/basic_riscv32_nogcc_tree/riscv32-unknown-elf %t/riscv32-nogcc/riscv32-unknown-elf // RUN: %t/riscv32-nogcc/bin/clang %s -### -no-canonical-prefixes \ // RUN:--gcc-toolchain=%t/riscv32-nogcc/invalid \ -// RUN:--target=riscv32-unknown-elf --rtlib=platform -fuse-ld= 2>&1 \ +// RUN:--target=riscv32-unknown-elf --rtlib=platform -fuse-ld=ld 2>&1 \ // RUN:| FileCheck -check-prefix=C-RV32-BAREMETAL-ILP32-NOGCC %s // RUN: %t/riscv32-nogcc/bin/clang %s -### -no-canonical-prefixes \ // RUN:--sysroot=%t/riscv32-nogcc/bin/../riscv32-unknown-elf \ -// RUN:--target=riscv32-unknown-elf --rtlib=platform -fuse-ld= 2>&1 \ +// RUN:--target=riscv32-unknown-elf --rtlib=platform -fuse-ld=ld 2>&1 \ // RUN:| FileCheck -check-prefix=C-RV32-BAREMETAL-ILP32-NOGCC %s // C-RV32-BAREMETAL-ILP32-NOGCC: "-internal-isystem" "{{.*}}/riscv32-nogcc/bin/../riscv32-unknown-elf/include" @@ -31,6 +31,5 @@ // C-RV32-BAREMETAL-ILP32-NOGCC: "{{.*}}/riscv32-nogcc/bin/../riscv32-unknown-elf/lib/crt0.o" // C-RV32-BAREMETAL-ILP32-NOGCC: "{{.*}}/riscv32-nogcc/{{.*}}/riscv32-unknown-unknown-elf/clang_rt.crtbegin.o" // C-RV32-BAREMETAL-ILP32-NOGCC: "{{.*}}/riscv32-nogcc/bin/../riscv32-unknown-elf/lib" -// C-RV32-BAREMETAL-ILP32-NOGCC: "--start-group" "-lc" "
[llvm-branch-commits] [lld] [lld][LoongArch] Relax TLS LE/GD/LD. (PR #123600)
https://github.com/ylzsx created https://github.com/llvm/llvm-project/pull/123600 In local-exec form, the code sequence is converted as follows: ``` From: lu12i.w $rd, %le_hi20_r(sym) R_LARCH_TLS_LE_HI20_R, R_LARCH_RELAX add.w/d $rd, $rd, $tp, %le_add_r(sym) R_LARCH_TLS_LE_ADD_R, R_LARCH_RELAX addi/ld/st.w/d $rd, $rd, %le_lo12_r(sym) R_LARCH_TLS_LE_LO12_R, R_LARCH_RELAX To: addi/ld/st.w/d $rd, $tp, %le_lo12_r(sym) R_LARCH_TLS_LE_LO12_R ``` In global-dynamic or local-dynamic, the code sequence is converted as follows: ``` From: pcalau12i $a0, %ld_pc_hi20(sym) | %gd_pc_hi20(sym) R_LARCH_TLS_GD_PC_HI20 | R_LARCH_TLS_LD_PC_HI20, R_LARCH_RELAX addi.w/d $a0, $a0, %got_pc_lo12(sym) | %got_pc_lo12(sym) R_LARCH_GOT_PC_LO12, R_LARCH_RELAX To: pcaddi$a0, %got_pc_lo12(sym) | %got_pc_lo12(sym) R_LARCH_TLS_GD_PCREL20_S2 | R_LARCH_TLS_LD_PCREL20_S2 ``` Note: For initial-exec form, since it involves the conversion from IE to LE, we will implement it in a future patch. >From 7993434e2973437b010034051003f8c03d8eff71 Mon Sep 17 00:00:00 2001 From: yangzhaoxin Date: Fri, 27 Dec 2024 19:29:32 +0800 Subject: [PATCH 1/5] Relax TLS LE/GD/LD. In local-exec form, the code sequence is converted as follows: ``` From: lu12i.w $rd, %le_hi20_r(sym) R_LARCH_TLS_LE_HI20_R, R_LARCH_RELAX add.w/d $rd, $rd, $tp, %le_add_r(sym) R_LARCH_TLS_LE_ADD_R, R_LARCH_RELAX addi/ld/st.w/d $rd, $rd, %le_lo12_r(sym) R_LARCH_TLS_LE_LO12_R, R_LARCH_RELAX To: addi/ld/st.w/d $rd, $tp, %le_lo12_r(sym) R_LARCH_TLS_LE_LO12_R ``` In global-dynamic or local-dynamic, the code sequence is converted as follows: ``` From: pcalau12i $a0, %ld_pc_hi20(sym) | %gd_pc_hi20(sym) R_LARCH_TLS_GD_PC_HI20 | R_LARCH_TLS_LD_PC_HI20, R_LARCH_RELAX addi.w/d $a0, $a0, %got_pc_lo12(sym) | %got_pc_lo12(sym) R_LARCH_GOT_PC_LO12, R_LARCH_RELAX To: pcaddi$a0, %got_pc_lo12(sym) | %got_pc_lo12(sym) R_LARCH_TLS_GD_PCREL20_S2 | R_LARCH_TLS_LD_PCREL20_S2 ``` Note: For initial-exec form, since it involves the conversion from IE to LE, we will implement it in a future patch. --- lld/ELF/Arch/LoongArch.cpp | 68 +++--- 1 file changed, 64 insertions(+), 4 deletions(-) diff --git a/lld/ELF/Arch/LoongArch.cpp b/lld/ELF/Arch/LoongArch.cpp index 0aa0cf5b657a0f..2d6d86d2ca63b2 100644 --- a/lld/ELF/Arch/LoongArch.cpp +++ b/lld/ELF/Arch/LoongArch.cpp @@ -761,10 +761,10 @@ static bool isPairRelaxable(ArrayRef relocs, size_t i) { // Relax code sequence. // From: -// pcalau12i $a0, %pc_hi20(sym) -// addi.w/d $a0, $a0, %pc_lo12(sym) +// pcalau12i $a0, %pc_hi20(sym) | %ld_pc_hi20(sym) | %gd_pc_hi20(sym) +// addi.w/d $a0, $a0, %pc_lo12(sym) | %got_pc_lo12(sym) | %got_pc_lo12(sym) // To: -// pcaddi $a0, %pc_lo12(sym) +// pcaddi $a0, %pc_lo12(sym) | %got_pc_lo12(sym) | %got_pc_lo12(sym) // // From: // pcalau12i $a0, %got_pc_hi20(sym_got) @@ -778,6 +778,10 @@ static void relaxPCHi20Lo12(Ctx &ctx, const InputSection &sec, size_t i, if (!((rHi20.type == R_LARCH_PCALA_HI20 && rLo12.type == R_LARCH_PCALA_LO12) || (rHi20.type == R_LARCH_GOT_PC_HI20 && + rLo12.type == R_LARCH_GOT_PC_LO12) || +(rHi20.type == R_LARCH_TLS_GD_PC_HI20 && + rLo12.type == R_LARCH_GOT_PC_LO12) || +(rHi20.type == R_LARCH_TLS_LD_PC_HI20 && rLo12.type == R_LARCH_GOT_PC_LO12))) return; @@ -798,6 +802,8 @@ static void relaxPCHi20Lo12(Ctx &ctx, const InputSection &sec, size_t i, else if (rHi20.expr == RE_LOONGARCH_PAGE_PC || rHi20.expr == RE_LOONGARCH_GOT_PAGE_PC) symBase = rHi20.sym->getVA(ctx); + else if (rHi20.expr == RE_LOONGARCH_TLSGD_PAGE_PC) +symBase = ctx.in.got->getGlobalDynAddr(*rHi20.sym); else { Err(ctx) << getErrorLoc(ctx, (const uint8_t *)loc) << "unknown expr (" << rHi20.expr << ") against symbol " << rHi20.sym @@ -827,7 +833,12 @@ static void relaxPCHi20Lo12(Ctx &ctx, const InputSection &sec, size_t i, return; sec.relaxAux->relocTypes[i] = R_LARCH_RELAX; - sec.relaxAux->relocTypes[i + 2] = R_LARCH_PCREL20_S2; + if (rHi20.type == R_LARCH_TLS_GD_PC_HI20) +sec.relaxAux->relocTypes[i + 2] = R_LARCH_TLS_GD_PCREL20_S2; + else if (rHi20.type == R_LARCH_TLS_LD_PC_HI20) +sec.relaxAux->relocTypes[i + 2] = R_LARCH_TLS_LD_PCREL20_S2; + else +sec.relaxAux->relocTypes[i + 2] = R_LARCH_PCREL20_S2; sec.relaxAux->writes.push_back(insn(PCADDI, getD5(nextInsn), 0, 0)); remove = 4; } @@ -863,6 +874,35 @@ static void relaxCall36(Ctx &ctx, const InputSection &sec, size_t i, } } +// Relax code sequence. +// From: +// lu12i.w $rd, %le_hi20_r(sym) +// add.w/d $rd, $rd, $tp, %le_add_r(sym) +// addi/ld/st.w/d $rd, $rd, %le_lo12_r(sym) +// To: +// addi/ld/st.w/d $rd, $tp, %le_lo12_r(sym) +static void relaxTlsLe(Ctx &ctx, const InputSection &sec, size_t i, + uint64_t loc, Relocation &r, uint32_t &remove) { + uint64_t val
[llvm-branch-commits] [lld] [lld][LoongArch] Relax TLS LE/GD/LD. (PR #123600)
llvmbot wrote: @llvm/pr-subscribers-lld-elf @llvm/pr-subscribers-lld Author: Zhaoxin Yang (ylzsx) Changes In local-exec form, the code sequence is converted as follows: ``` From: lu12i.w $rd, %le_hi20_r(sym) R_LARCH_TLS_LE_HI20_R, R_LARCH_RELAX add.w/d $rd, $rd, $tp, %le_add_r(sym) R_LARCH_TLS_LE_ADD_R, R_LARCH_RELAX addi/ld/st.w/d $rd, $rd, %le_lo12_r(sym) R_LARCH_TLS_LE_LO12_R, R_LARCH_RELAX To: addi/ld/st.w/d $rd, $tp, %le_lo12_r(sym) R_LARCH_TLS_LE_LO12_R ``` In global-dynamic or local-dynamic, the code sequence is converted as follows: ``` From: pcalau12i $a0, %ld_pc_hi20(sym) | %gd_pc_hi20(sym) R_LARCH_TLS_GD_PC_HI20 | R_LARCH_TLS_LD_PC_HI20, R_LARCH_RELAX addi.w/d $a0, $a0, %got_pc_lo12(sym) | %got_pc_lo12(sym) R_LARCH_GOT_PC_LO12, R_LARCH_RELAX To: pcaddi$a0, %got_pc_lo12(sym) | %got_pc_lo12(sym) R_LARCH_TLS_GD_PCREL20_S2 | R_LARCH_TLS_LD_PCREL20_S2 ``` Note: For initial-exec form, since it involves the conversion from IE to LE, we will implement it in a future patch. --- Patch is 24.67 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/123600.diff 6 Files Affected: - (modified) lld/ELF/Arch/LoongArch.cpp (+64-4) - (modified) lld/test/ELF/loongarch-relax-emit-relocs.s (+107-5) - (added) lld/test/ELF/loongarch-relax-tls-le.s (+115) - (modified) lld/test/ELF/loongarch-tls-gd.s (+41-2) - (modified) lld/test/ELF/loongarch-tls-ld.s (+38-2) - (modified) lld/test/ELF/loongarch-tls-le.s (+16) ``diff diff --git a/lld/ELF/Arch/LoongArch.cpp b/lld/ELF/Arch/LoongArch.cpp index 0aa0cf5b657a0f..2d6d86d2ca63b2 100644 --- a/lld/ELF/Arch/LoongArch.cpp +++ b/lld/ELF/Arch/LoongArch.cpp @@ -761,10 +761,10 @@ static bool isPairRelaxable(ArrayRef relocs, size_t i) { // Relax code sequence. // From: -// pcalau12i $a0, %pc_hi20(sym) -// addi.w/d $a0, $a0, %pc_lo12(sym) +// pcalau12i $a0, %pc_hi20(sym) | %ld_pc_hi20(sym) | %gd_pc_hi20(sym) +// addi.w/d $a0, $a0, %pc_lo12(sym) | %got_pc_lo12(sym) | %got_pc_lo12(sym) // To: -// pcaddi $a0, %pc_lo12(sym) +// pcaddi $a0, %pc_lo12(sym) | %got_pc_lo12(sym) | %got_pc_lo12(sym) // // From: // pcalau12i $a0, %got_pc_hi20(sym_got) @@ -778,6 +778,10 @@ static void relaxPCHi20Lo12(Ctx &ctx, const InputSection &sec, size_t i, if (!((rHi20.type == R_LARCH_PCALA_HI20 && rLo12.type == R_LARCH_PCALA_LO12) || (rHi20.type == R_LARCH_GOT_PC_HI20 && + rLo12.type == R_LARCH_GOT_PC_LO12) || +(rHi20.type == R_LARCH_TLS_GD_PC_HI20 && + rLo12.type == R_LARCH_GOT_PC_LO12) || +(rHi20.type == R_LARCH_TLS_LD_PC_HI20 && rLo12.type == R_LARCH_GOT_PC_LO12))) return; @@ -798,6 +802,8 @@ static void relaxPCHi20Lo12(Ctx &ctx, const InputSection &sec, size_t i, else if (rHi20.expr == RE_LOONGARCH_PAGE_PC || rHi20.expr == RE_LOONGARCH_GOT_PAGE_PC) symBase = rHi20.sym->getVA(ctx); + else if (rHi20.expr == RE_LOONGARCH_TLSGD_PAGE_PC) +symBase = ctx.in.got->getGlobalDynAddr(*rHi20.sym); else { Err(ctx) << getErrorLoc(ctx, (const uint8_t *)loc) << "unknown expr (" << rHi20.expr << ") against symbol " << rHi20.sym @@ -827,7 +833,12 @@ static void relaxPCHi20Lo12(Ctx &ctx, const InputSection &sec, size_t i, return; sec.relaxAux->relocTypes[i] = R_LARCH_RELAX; - sec.relaxAux->relocTypes[i + 2] = R_LARCH_PCREL20_S2; + if (rHi20.type == R_LARCH_TLS_GD_PC_HI20) +sec.relaxAux->relocTypes[i + 2] = R_LARCH_TLS_GD_PCREL20_S2; + else if (rHi20.type == R_LARCH_TLS_LD_PC_HI20) +sec.relaxAux->relocTypes[i + 2] = R_LARCH_TLS_LD_PCREL20_S2; + else +sec.relaxAux->relocTypes[i + 2] = R_LARCH_PCREL20_S2; sec.relaxAux->writes.push_back(insn(PCADDI, getD5(nextInsn), 0, 0)); remove = 4; } @@ -863,6 +874,35 @@ static void relaxCall36(Ctx &ctx, const InputSection &sec, size_t i, } } +// Relax code sequence. +// From: +// lu12i.w $rd, %le_hi20_r(sym) +// add.w/d $rd, $rd, $tp, %le_add_r(sym) +// addi/ld/st.w/d $rd, $rd, %le_lo12_r(sym) +// To: +// addi/ld/st.w/d $rd, $tp, %le_lo12_r(sym) +static void relaxTlsLe(Ctx &ctx, const InputSection &sec, size_t i, + uint64_t loc, Relocation &r, uint32_t &remove) { + uint64_t val = r.sym->getVA(ctx, r.addend); + // Check if the val exceeds the range of addi/ld/st. + if (!isInt<12>(val)) +return; + uint32_t currInsn = read32le(sec.content().data() + r.offset); + switch (r.type) { + case R_LARCH_TLS_LE_HI20_R: + case R_LARCH_TLS_LE_ADD_R: +sec.relaxAux->relocTypes[i] = R_LARCH_RELAX; +remove = 4; +break; + case R_LARCH_TLS_LE_LO12_R: +currInsn = +insn(extractBits(currInsn, 31, 22) << 22, getD5(currInsn), R_TP, 0); +sec.relaxAux->writes.push_back(currInsn); +sec.relaxAux->relocTypes[i] = R_LARCH_TLS_LE_LO12_R; +break; + } +} + static bool relax(Ctx &ctx, InputSection &sec) { const uint64_t secAddr = sec.get
[llvm-branch-commits] [clang] [Driver] Change linker job in Baremetal toolchain object accomodate GCCInstallation.(2/3) (PR #121830)
@@ -36,6 +36,7 @@ class LLVM_LIBRARY_VISIBILITY BareMetal : public Generic_ELF { Tool *buildStaticLibTool() const override; public: + virtual bool isUsingLD() const { return UseLD || GCCInstallation.isValid(); } quic-garvgupt wrote: There was a request in the original PR that there is some downstream toolchain that inherits from Baremetal toolchain and defaults to LLD. Therefore I made this virtual so that it cab be overridden however, in the recent patchset I have removed it since there is no need for it in community and can easily be maintained downstream. https://github.com/llvm/llvm-project/pull/121830 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [Flang] Introduce FortranSupport (PR #122069)
Meinersbur wrote: `FortranSupport` has just been introduced in #122894. https://github.com/llvm/llvm-project/pull/122069 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [lld] [lld][LoongArch] Relax call36/tail36: R_LARCH_CALL36 (PR #123576)
llvmbot wrote: @llvm/pr-subscribers-lld Author: Zhaoxin Yang (ylzsx) Changes Instructions with relocation `R_LARCH_CALL36` may be relax as follows: ``` From: pcaddu18i $dest, %call36(foo) R_LARCH_CALL36, R_LARCH_RELAX jirl $r, $dest, 0 To: b/bl foo # bl if r=$ra, b if r=$zero R_LARCH_B26 ``` --- Full diff: https://github.com/llvm/llvm-project/pull/123576.diff 4 Files Affected: - (modified) lld/ELF/Arch/LoongArch.cpp (+41) - (added) lld/test/ELF/loongarch-relax-call36-2.s (+63) - (added) lld/test/ELF/loongarch-relax-call36.s (+135) - (added) lld/test/ELF/loongarch-relax-emit-relocs-2.s (+61) ``diff diff --git a/lld/ELF/Arch/LoongArch.cpp b/lld/ELF/Arch/LoongArch.cpp index b999e7fd27ae9d..0aa0cf5b657a0f 100644 --- a/lld/ELF/Arch/LoongArch.cpp +++ b/lld/ELF/Arch/LoongArch.cpp @@ -58,6 +58,8 @@ enum Op { LD_W = 0x2880, LD_D = 0x28c0, JIRL = 0x4c00, + B = 0x5000, + BL = 0x5400, }; enum Reg { @@ -830,6 +832,37 @@ static void relaxPCHi20Lo12(Ctx &ctx, const InputSection &sec, size_t i, remove = 4; } +// Relax code sequence. +// From: +// pcaddu18i $ra, %call36(foo) +// jirl $ra, $ra, 0 +// To: +// b/bl foo +static void relaxCall36(Ctx &ctx, const InputSection &sec, size_t i, +uint64_t loc, Relocation &r, uint32_t &remove) { + const uint64_t symLocal = + (r.expr == R_PLT_PC ? r.sym->getPltVA(ctx) : r.sym->getVA(ctx)) + + r.addend; + + const int64_t distance = symLocal - loc; + // Check if the distance aligns 4 bytes or exceeds the range of b[l]. + if ((distance & 0x3) != 0 || !isInt<28>(distance)) +return; + + const uint32_t nextInsn = read32le(sec.content().data() + r.offset + 4); + if (getD5(nextInsn) == R_RA) { +// convert jirl to bl +sec.relaxAux->relocTypes[i] = R_LARCH_B26; +sec.relaxAux->writes.push_back(insn(BL, 0, 0, 0)); +remove = 4; + } else if (getD5(nextInsn) == R_ZERO) { +// convert jirl to b +sec.relaxAux->relocTypes[i] = R_LARCH_B26; +sec.relaxAux->writes.push_back(insn(B, 0, 0, 0)); +remove = 4; + } +} + static bool relax(Ctx &ctx, InputSection &sec) { const uint64_t secAddr = sec.getVA(); const MutableArrayRef relocs = sec.relocs(); @@ -874,6 +907,10 @@ static bool relax(Ctx &ctx, InputSection &sec) { if (isPairRelaxable(relocs, i)) relaxPCHi20Lo12(ctx, sec, i, loc, r, relocs[i + 2], remove); break; +case R_LARCH_CALL36: + if (relaxable(relocs, i)) +relaxCall36(ctx, sec, i, loc, r, remove); + break; } // For all anchors whose offsets are <= r.offset, they are preceded by @@ -977,6 +1014,10 @@ void LoongArch::finalizeRelax(int passes) const { // RelExpr is needed for relocating. r.expr = r.sym->hasFlag(NEEDS_PLT) ? R_PLT_PC : R_PC; break; + case R_LARCH_B26: +skip = 4; +write32le(p, aux.writes[writesIdx++]); +break; default: llvm_unreachable("unsupported type"); } diff --git a/lld/test/ELF/loongarch-relax-call36-2.s b/lld/test/ELF/loongarch-relax-call36-2.s new file mode 100644 index 00..71650aefe94321 --- /dev/null +++ b/lld/test/ELF/loongarch-relax-call36-2.s @@ -0,0 +1,63 @@ +# REQUIRES: loongarch +# RUN: rm -rf %t && split-file %s %t && cd %t +# RUN: llvm-mc -filetype=obj -triple=loongarch64 -mattr=+relax a.s -o a.o + +# RUN: ld.lld --relax -T lds a.o -o a +# RUN: llvm-objdump -d --no-show-raw-insn a | FileCheck %s --check-prefixes=RELAX,RELAX-MID + +## Unsure whether this needs a diagnostic. GNU ld allows this. +# RUN: ld.lld --relax -T lds -pie a.o -o a.pie +# RUN: llvm-objdump -d --no-show-raw-insn a.pie | FileCheck %s --check-prefixes=RELAX,RELAX-MID + +# RUN: ld.lld --relax -T lds -pie -z notext -z ifunc-noplt a.o -o a.ifunc-noplt +# RUN: llvm-objdump -d --no-show-raw-insn a.ifunc-noplt | FileCheck %s --check-prefixes=RELAX,NORELAX-MID + +# RELAX-LABEL: <_start>: +## offset = 0x1000 - 0x800 = 0x800(134217728), hi=512, lo18=0 +# RELAX-NEXT:800: pcaddu18i $ra, 512 +# RELAX-NEXT: jirl $ra, $ra, 0 +# RELAX-NEXT: bl 134217720 +# RELAX-NEXT: bl -134217728 +## offset = 12 - 0x810 = -0x804(-134217732), hi=512, lo18=-4 +# RELAX-NEXT:810: pcaddu18i $ra, -512 +# RELAX-NEXT: jirl $ra, $ra, -4 +# RELAX-EMPTY: + +# RELAX-MID-LABEL: <.mid>: +## offset = 0x801 - 0x8008000 = 32768 +# RELAX-MID-NEXT:8008000: bl 32768 +# RELAX-MID-NEXT: b 32764 +# RELAX-MID-EMPTY: + +# NORELAX-MID-LABEL: <.mid>: +# NORELAX-MID-NEXT: 8008000: pcaddu18i $ra, 0 +# NORELAX-MID-NEXT:jirl $ra, $ra, 0 +# NORELAX-MID-NEXT:pcaddu18i $t0, 0 +# NORELAX-MID-NEXT:jr $t0 +# NORELAX-MID-EMPTY: + +#--- a.s +.global _start, ifunc +_start: + call36 pos # exceed positive range (.text+0x7fc),
[llvm-branch-commits] [lld] [lld][LoongArch] Relax call36/tail36: R_LARCH_CALL36 (PR #123576)
https://github.com/ylzsx created https://github.com/llvm/llvm-project/pull/123576 Instructions with relocation `R_LARCH_CALL36` may be relax as follows: ``` From: pcaddu18i $dest, %call36(foo) R_LARCH_CALL36, R_LARCH_RELAX jirl $r, $dest, 0 To: b/bl foo # bl if r=$ra, b if r=$zero R_LARCH_B26 ``` >From f1f995b5fc8e90126b5825d52b9c75cd45d27cfc Mon Sep 17 00:00:00 2001 From: yangzhaoxin Date: Thu, 26 Dec 2024 11:32:33 +0800 Subject: [PATCH 1/3] Relax call36/tail36. Instructions with relocation `R_LARCH_CALL36` may be relax as follows: ``` From: pcaddu18i $dest, %call36(foo) R_LARCH_CALL36, R_LARCH_RELAX jirl $r, $dest, 0 To: b/bl foo # bl if r=$ra, b if r=$zero R_LARCH_B26 ``` --- lld/ELF/Arch/LoongArch.cpp | 41 ++ 1 file changed, 41 insertions(+) diff --git a/lld/ELF/Arch/LoongArch.cpp b/lld/ELF/Arch/LoongArch.cpp index b999e7fd27ae9d..0aa0cf5b657a0f 100644 --- a/lld/ELF/Arch/LoongArch.cpp +++ b/lld/ELF/Arch/LoongArch.cpp @@ -58,6 +58,8 @@ enum Op { LD_W = 0x2880, LD_D = 0x28c0, JIRL = 0x4c00, + B = 0x5000, + BL = 0x5400, }; enum Reg { @@ -830,6 +832,37 @@ static void relaxPCHi20Lo12(Ctx &ctx, const InputSection &sec, size_t i, remove = 4; } +// Relax code sequence. +// From: +// pcaddu18i $ra, %call36(foo) +// jirl $ra, $ra, 0 +// To: +// b/bl foo +static void relaxCall36(Ctx &ctx, const InputSection &sec, size_t i, +uint64_t loc, Relocation &r, uint32_t &remove) { + const uint64_t symLocal = + (r.expr == R_PLT_PC ? r.sym->getPltVA(ctx) : r.sym->getVA(ctx)) + + r.addend; + + const int64_t distance = symLocal - loc; + // Check if the distance aligns 4 bytes or exceeds the range of b[l]. + if ((distance & 0x3) != 0 || !isInt<28>(distance)) +return; + + const uint32_t nextInsn = read32le(sec.content().data() + r.offset + 4); + if (getD5(nextInsn) == R_RA) { +// convert jirl to bl +sec.relaxAux->relocTypes[i] = R_LARCH_B26; +sec.relaxAux->writes.push_back(insn(BL, 0, 0, 0)); +remove = 4; + } else if (getD5(nextInsn) == R_ZERO) { +// convert jirl to b +sec.relaxAux->relocTypes[i] = R_LARCH_B26; +sec.relaxAux->writes.push_back(insn(B, 0, 0, 0)); +remove = 4; + } +} + static bool relax(Ctx &ctx, InputSection &sec) { const uint64_t secAddr = sec.getVA(); const MutableArrayRef relocs = sec.relocs(); @@ -874,6 +907,10 @@ static bool relax(Ctx &ctx, InputSection &sec) { if (isPairRelaxable(relocs, i)) relaxPCHi20Lo12(ctx, sec, i, loc, r, relocs[i + 2], remove); break; +case R_LARCH_CALL36: + if (relaxable(relocs, i)) +relaxCall36(ctx, sec, i, loc, r, remove); + break; } // For all anchors whose offsets are <= r.offset, they are preceded by @@ -977,6 +1014,10 @@ void LoongArch::finalizeRelax(int passes) const { // RelExpr is needed for relocating. r.expr = r.sym->hasFlag(NEEDS_PLT) ? R_PLT_PC : R_PC; break; + case R_LARCH_B26: +skip = 4; +write32le(p, aux.writes[writesIdx++]); +break; default: llvm_unreachable("unsupported type"); } >From f227ae532236e20148a872c811721a8de4e16318 Mon Sep 17 00:00:00 2001 From: yangzhaoxin Date: Fri, 27 Dec 2024 14:37:40 +0800 Subject: [PATCH 2/3] modify test for call36/tail36. --- lld/test/ELF/loongarch-relax-call36-2.s | 63 + lld/test/ELF/loongarch-relax-call36.s| 135 +++ lld/test/ELF/loongarch-relax-emit-relocs-2.s | 61 + 3 files changed, 259 insertions(+) create mode 100644 lld/test/ELF/loongarch-relax-call36-2.s create mode 100644 lld/test/ELF/loongarch-relax-call36.s create mode 100644 lld/test/ELF/loongarch-relax-emit-relocs-2.s diff --git a/lld/test/ELF/loongarch-relax-call36-2.s b/lld/test/ELF/loongarch-relax-call36-2.s new file mode 100644 index 00..1c216a9bdc35ed --- /dev/null +++ b/lld/test/ELF/loongarch-relax-call36-2.s @@ -0,0 +1,63 @@ +# REQUIRES: loongarch +# RUN: rm -rf %t && split-file %s %t && cd %t +# RUN: llvm-mc -filetype=obj -triple=loongarch64 -mattr=+relax a.s -o a.o + +# RUN: ld.lld -T lds a.o -o a +# RUN: llvm-objdump -d --no-show-raw-insn a | FileCheck %s --check-prefixes=RELAX,RELAX-MID + +## Unsure whether this needs a diagnostic. GNU ld allows this. +# RUN: ld.lld -T lds -pie a.o -o a.pie +# RUN: llvm-objdump -d --no-show-raw-insn a.pie | FileCheck %s --check-prefixes=RELAX,RELAX-MID + +# RUN: ld.lld -T lds -pie -z notext -z ifunc-noplt a.o -o a.ifunc-noplt +# RUN: llvm-objdump -d --no-show-raw-insn a.ifunc-noplt | FileCheck %s --check-prefixes=RELAX,NORELAX-MID + +# RELAX-LABEL: <_start>: +## offset = 0x1000 - 0x800 = 0x800(134217728), hi=512, lo18=0 +# RELAX-NEXT:800: pcaddu18i $ra, 512 +# RELAX-NEXT: jirl $ra, $ra, 0 +# RELAX-NEXT: bl 134217720 +
[llvm-branch-commits] [flang] [llvm] [flang][OpenMP] Parse WHEN, OTHERWISE, MATCH clauses plus METADIRECTIVE (PR #121817)
https://github.com/kiranchandramohan approved this pull request. LG. https://github.com/llvm/llvm-project/pull/121817 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Skip out-of-range pending relocations (PR #116964)
paschalis-mpeis wrote: Forced-pushed to add the missing code. Also, this PR on now stacked top of #123635. Thanks for the comments @maks. I am not sure if your [concern](https://github.com/llvm/llvm-project/issues/116817#issuecomment-2602866672) on the issue still stands or not. https://github.com/llvm/llvm-project/pull/116964 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Skip out-of-range pending relocations (PR #116964)
https://github.com/paschalis-mpeis edited https://github.com/llvm/llvm-project/pull/116964 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [flang] [lld] [llvm] [Flang] Rename libFortranRuntime.a to libflang_rt.a (PR #122341)
https://github.com/Meinersbur ready_for_review https://github.com/llvm/llvm-project/pull/122341 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [llvm] [Flang][NFC] Move runtime library files to flang-rt. (PR #110298)
https://github.com/Meinersbur ready_for_review https://github.com/llvm/llvm-project/pull/110298 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [flang] [lld] [llvm] [Flang] Rename libFortranRuntime.a to libflang_rt.a (PR #122341)
https://github.com/jhuber6 approved this pull request. Straightforward renaming and more consistent with the clang runtimes. https://github.com/llvm/llvm-project/pull/122341 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [llvm] [Flang][NFC] Move runtime library files to flang-rt. (PR #110298)
Meinersbur wrote: > There are some missing files in in flang-rt/CUDA both headers and cpp files. Git tries to be helpful and move new files to the new location of where the sibling file went, but marking both locations of the same file as conflict. Not always obvious one is the "right" one. https://github.com/llvm/llvm-project/pull/110298 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] ea3aa97 - Avoid module name clashes by choosing unique names
Author: Kiran Chandramohan Date: 2025-01-20T16:23:54Z New Revision: ea3aa97c17ce30df40f8fc8c2ebb89332c83c5b8 URL: https://github.com/llvm/llvm-project/commit/ea3aa97c17ce30df40f8fc8c2ebb89332c83c5b8 DIFF: https://github.com/llvm/llvm-project/commit/ea3aa97c17ce30df40f8fc8c2ebb89332c83c5b8.diff LOG: Avoid module name clashes by choosing unique names Added: Modified: flang/test/Lower/zero_init.f90 flang/test/Lower/zero_init_default_init.f90 Removed: diff --git a/flang/test/Lower/zero_init.f90 b/flang/test/Lower/zero_init.f90 index 5ed6f2247de3b2..16b11158bfce27 100644 --- a/flang/test/Lower/zero_init.f90 +++ b/flang/test/Lower/zero_init.f90 @@ -5,16 +5,16 @@ ! RUN: bbc -finit-global-zero -emit-hlfir -o - %s | FileCheck --check-prefix=CHECK-DEFAULT %s ! RUN: bbc -finit-global-zero=false -emit-hlfir -o - %s | FileCheck --check-prefix=CHECK-NO-ZERO-INIT %s -module m1 +module zeroInitM1 real :: x -end module m1 +end module zeroInitM1 -!CHECK-DEFAULT: fir.global @_QMm1Ex : f32 { +!CHECK-DEFAULT: fir.global @_QMzeroinitm1Ex : f32 { !CHECK-DEFAULT: %[[UNDEF:.*]] = fir.zero_bits f32 !CHECK-DEFAULT: fir.has_value %[[UNDEF]] : f32 !CHECK-DEFAULT: } -!CHECK-NO-ZERO-INIT: fir.global @_QMm1Ex : f32 { +!CHECK-NO-ZERO-INIT: fir.global @_QMzeroinitm1Ex : f32 { !CHECK-NO-ZERO-INIT: %[[UNDEF:.*]] = fir.undefined f32 !CHECK-NO-ZERO-INIT: fir.has_value %[[UNDEF]] : f32 !CHECK-NO-ZERO-INIT: } diff --git a/flang/test/Lower/zero_init_default_init.f90 b/flang/test/Lower/zero_init_default_init.f90 index e2d1f545e35a57..8ca3b33b8ef5c1 100644 --- a/flang/test/Lower/zero_init_default_init.f90 +++ b/flang/test/Lower/zero_init_default_init.f90 @@ -7,16 +7,16 @@ ! Test that the flag does not affect globals with default init -module m2 +module zeroInitM2 type val integer :: my_val = 1 end type val type(val) :: v1 -end module m2 +end module zeroInitM2 -!CHECK: fir.global @_QMm2Ev1 : !fir.type<_QMm2Tval{my_val:i32}> { -!CHECK:%[[V1:.*]] = fir.undefined !fir.type<_QMm2Tval{my_val:i32}> +!CHECK: fir.global @_QMzeroinitm2Ev1 : !fir.type<_QMzeroinitm2Tval{my_val:i32}> { +!CHECK:%[[V1:.*]] = fir.undefined !fir.type<_QMzeroinitm2Tval{my_val:i32}> !CHECK:%[[ONE:.*]] = arith.constant 1 : i32 -!CHECK:%[[V1_INIT:.*]] = fir.insert_value %[[V1]], %[[ONE]], ["my_val", !fir.type<_QMm2Tval{my_val:i32}>] : (!fir.type<_QMm2Tval{my_val:i32}>, i32) -> !fir.type<_QMm2Tval{my_val:i32}> -!CHECK:fir.has_value %[[V1_INIT]] : !fir.type<_QMm2Tval{my_val:i32}> +!CHECK:%[[V1_INIT:.*]] = fir.insert_value %[[V1]], %[[ONE]], ["my_val", !fir.type<_QMzeroinitm2Tval{my_val:i32}>] : (!fir.type<_QMzeroinitm2Tval{my_val:i32}>, i32) -> !fir.type<_QMzeroinitm2Tval{my_val:i32}> +!CHECK:fir.has_value %[[V1_INIT]] : !fir.type<_QMzeroinitm2Tval{my_val:i32}> !CHECK: } ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [llvm] [mlir] [MLIR][OpenMP] Introduce overlapped record type map support (PR #119588)
agozillon wrote: Small ping for a review on this if anyone has a little spare time, it would be greatly appreciated, thank you very much ahead of time :-) https://github.com/llvm/llvm-project/pull/119588 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [llvm] [mlir] [Flang][OpenMP][MLIR] Initial declare target to for variables implementation (PR #119589)
agozillon wrote: Small ping for a review on this if anyone has a little spare time, it would be greatly appreciated, thank you very much ahead of time :-) https://github.com/llvm/llvm-project/pull/119589 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Skip out-of-range pending relocations (PR #116964)
https://github.com/paschalis-mpeis edited https://github.com/llvm/llvm-project/pull/116964 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Skip out-of-range pending relocations (PR #116964)
https://github.com/paschalis-mpeis edited https://github.com/llvm/llvm-project/pull/116964 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Skip out-of-range pending relocations (PR #116964)
https://github.com/paschalis-mpeis ready_for_review https://github.com/llvm/llvm-project/pull/116964 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Skip out-of-range pending relocations (PR #116964)
llvmbot wrote: @llvm/pr-subscribers-bolt Author: Paschalis Mpeis (paschalis-mpeis) Changes When a pending relocation is created it is also marked whether it is optional or not. It can be optional when such relocation is added as part of an optimization (i.e., `scanExternalRefs`). When bolt tries to `flushPendingRelocations`, it safely skips any optional relocations that cannot be encoded. Background: BOLT, as part of scanExternalRefs, identifies external references from calls and creates some pending relocations for them. Those when flushed will update references to point to the optimized functions. This optimization can be disabled using `--no-scan`. BOLT can assert if any of these pending relocations cannot be encoded. This patch does not disable this optimization but instead selectively applies it given that a pending relocation is optional. --- # Stacked PR on top of: - #123635 --- Full diff: https://github.com/llvm/llvm-project/pull/116964.diff 6 Files Affected: - (modified) bolt/include/bolt/Core/BinarySection.h (+8-5) - (modified) bolt/include/bolt/Core/Relocation.h (+4-1) - (modified) bolt/lib/Core/BinaryFunction.cpp (+1-1) - (modified) bolt/lib/Core/BinarySection.cpp (+13-1) - (modified) bolt/lib/Core/Relocation.cpp (+33) - (modified) bolt/unittests/Core/BinaryContext.cpp (+32) ``diff diff --git a/bolt/include/bolt/Core/BinarySection.h b/bolt/include/bolt/Core/BinarySection.h index dedee361882497..37d0932a5c142e 100644 --- a/bolt/include/bolt/Core/BinarySection.h +++ b/bolt/include/bolt/Core/BinarySection.h @@ -66,8 +66,10 @@ class BinarySection { // from the original section address. RelocationSetType DynamicRelocations; - // Pending relocations for this section. - std::vector PendingRelocations; + /// Pending relocations for this section and whether they are optional, i.e., + /// added as part of an optimization. In that case they can be safely omitted + /// if flushPendingRelocations discovers they cannot be encoded. + std::vector> PendingRelocations; struct BinaryPatch { uint64_t Offset; @@ -374,9 +376,10 @@ class BinarySection { DynamicRelocations.emplace(Reloc); } - /// Add relocation against the original contents of this section. - void addPendingRelocation(const Relocation &Rel) { -PendingRelocations.push_back(Rel); + /// Add relocation against the original contents of this section. When added + /// as part of an optimization it is marked as \p Optional. + void addPendingRelocation(const Relocation &Rel, bool Optional = false) { +PendingRelocations.push_back({Rel, Optional}); } /// Add patch to the input contents of this section. diff --git a/bolt/include/bolt/Core/Relocation.h b/bolt/include/bolt/Core/Relocation.h index 933f62a31f8fd7..177cc0c70431f4 100644 --- a/bolt/include/bolt/Core/Relocation.h +++ b/bolt/include/bolt/Core/Relocation.h @@ -64,9 +64,12 @@ struct Relocation { /// and \P Type mismatch occurred. static bool skipRelocationProcess(uint64_t &Type, uint64_t Contents); - // Adjust value depending on relocation type (make it PC relative or not) + /// Adjust value depending on relocation type (make it PC relative or not). static uint64_t encodeValue(uint64_t Type, uint64_t Value, uint64_t PC); + /// Return true if there are enough bits to encode the relocation value. + static bool canEncodeValue(uint64_t Type, uint64_t Value, uint64_t PC); + /// Extract current relocated value from binary contents. This is used for /// RISC architectures where values are encoded in specific bits depending /// on the relocation value. For X86, we limit to sign extending the value diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp index 5da777411ba7a1..5d1e5ca92ca131 100644 --- a/bolt/lib/Core/BinaryFunction.cpp +++ b/bolt/lib/Core/BinaryFunction.cpp @@ -1672,7 +1672,7 @@ bool BinaryFunction::scanExternalRefs() { // Add relocations unless disassembly failed for this function. if (!DisassemblyFailed) for (Relocation &Rel : FunctionRelocations) - getOriginSection()->addPendingRelocation(Rel); + getOriginSection()->addPendingRelocation(Rel, /*Optional*/ true); // Inform BinaryContext that this function symbols will not be defined and // relocations should not be created against them. diff --git a/bolt/lib/Core/BinarySection.cpp b/bolt/lib/Core/BinarySection.cpp index 9ad49ca1b3a038..a37cc7603df285 100644 --- a/bolt/lib/Core/BinarySection.cpp +++ b/bolt/lib/Core/BinarySection.cpp @@ -165,11 +165,19 @@ void BinarySection::flushPendingRelocations(raw_pwrite_stream &OS, OS.pwrite(Patch.Bytes.data(), Patch.Bytes.size(), SectionFileOffset + Patch.Offset); - for (Relocation &Reloc : PendingRelocations) { + uint64_t SkippedPendingRelocations = 0; + for (auto &[Reloc, Optional] : PendingRelocations) { uint64_t Value = Reloc.Addend; if (Reloc.Symbol) Value += Resolver(Reloc.Symbol); +//
[llvm-branch-commits] [llvm] [BOLT] Skip out-of-range pending relocations (PR #116964)
@@ -165,11 +165,17 @@ void BinarySection::flushPendingRelocations(raw_pwrite_stream &OS, OS.pwrite(Patch.Bytes.data(), Patch.Bytes.size(), SectionFileOffset + Patch.Offset); + uint64_t SkippedPendingRelocations = 0; for (Relocation &Reloc : PendingRelocations) { uint64_t Value = Reloc.Addend; if (Reloc.Symbol) Value += Resolver(Reloc.Symbol); +if (!Relocation::canEncodeValue(Reloc.Type, Value, paschalis-mpeis wrote: `addPendingRelocation` now becomes the only way now to add a pending relocation (Parent PR #123635). If it comes from the `scanExternalRefs` optimization, then it is marked as optional. Finally, those relocations can be safely skipped when it's time to flush them. https://github.com/llvm/llvm-project/pull/116964 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits