[llvm-branch-commits] [lld] [lld][LoongArch] Relax TLSDESC code sequence. (PR #123677)

2025-01-20 Thread Zhaoxin Yang via llvm-branch-commits

https://github.com/ylzsx created 
https://github.com/llvm/llvm-project/pull/123677

Relax TLSDESC code sequence.

Original code sequence:
  * pcalau12i  $a0, %desc_pc_hi20(sym_desc)
  * addi.d $a0, $a0, %desc_pc_lo12(sym_desc)
  * ld.d   $ra, $a0, %desc_ld(sym_desc)
  * jirl   $ra, $ra, %desc_call(sym_desc)

Cannot convert to LE/IE, but relax:
  * pcaddi $a0, %desc_pcrel_20(sym_desc)
  * ld.d   $ra, $a0, %desc_ld(sym_desc)
  * jirl   $ra, $ra, %desc_call(sym_desc)

FIXME: The conversion of TLSDESC GD/LD to LE/IE will implement in a future 
patch.

>From 56c24f9746ef42c449a4d1d5caf10f7cd1dd7d81 Mon Sep 17 00:00:00 2001
From: yangzhaoxin 
Date: Tue, 31 Dec 2024 15:51:43 +0800
Subject: [PATCH 1/6] Relax TLSDESC code sequence.

Original code sequence:
 * pcalau12i  $a0, %desc_pc_hi20(sym_desc)
 * addi.d $a0, $a0, %desc_pc_lo12(sym_desc)
 * ld.d   $ra, $a0, %desc_ld(sym_desc)
 * jirl   $ra, $ra, %desc_call(sym_desc)

Cannot convert to LE/IE, but relax:
 * pcaddi $a0, %desc_pcrel_20(sym_desc)
 * ld.d   $ra, $a0, %desc_ld(sym_desc)
 * jirl   $ra, $ra, %desc_call(sym_desc)

FIXME: The conversion of TLSDESC GD/LD to LE/IE will implement in a
future patch.
---
 lld/ELF/Arch/LoongArch.cpp | 44 +-
 1 file changed, 43 insertions(+), 1 deletion(-)

diff --git a/lld/ELF/Arch/LoongArch.cpp b/lld/ELF/Arch/LoongArch.cpp
index 2d6d86d2ca63b2..a6db15bbf1efd4 100644
--- a/lld/ELF/Arch/LoongArch.cpp
+++ b/lld/ELF/Arch/LoongArch.cpp
@@ -782,7 +782,9 @@ static void relaxPCHi20Lo12(Ctx &ctx, const InputSection 
&sec, size_t i,
 (rHi20.type == R_LARCH_TLS_GD_PC_HI20 &&
  rLo12.type == R_LARCH_GOT_PC_LO12) ||
 (rHi20.type == R_LARCH_TLS_LD_PC_HI20 &&
- rLo12.type == R_LARCH_GOT_PC_LO12)))
+ rLo12.type == R_LARCH_GOT_PC_LO12) ||
+(rHi20.type == R_LARCH_TLS_DESC_PC_HI20 &&
+ rLo12.type == R_LARCH_TLS_DESC_PC_LO12)))
 return;
 
   // GOT references to absolute symbols can't be relaxed to use pcaddi in
@@ -804,6 +806,8 @@ static void relaxPCHi20Lo12(Ctx &ctx, const InputSection 
&sec, size_t i,
 symBase = rHi20.sym->getVA(ctx);
   else if (rHi20.expr == RE_LOONGARCH_TLSGD_PAGE_PC)
 symBase = ctx.in.got->getGlobalDynAddr(*rHi20.sym);
+  else if (rHi20.expr == RE_LOONGARCH_TLSDESC_PAGE_PC)
+symBase = ctx.in.got->getTlsDescAddr(*rHi20.sym);
   else {
 Err(ctx) << getErrorLoc(ctx, (const uint8_t *)loc) << "unknown expr ("
  << rHi20.expr << ") against symbol " << rHi20.sym
@@ -837,6 +841,8 @@ static void relaxPCHi20Lo12(Ctx &ctx, const InputSection 
&sec, size_t i,
 sec.relaxAux->relocTypes[i + 2] = R_LARCH_TLS_GD_PCREL20_S2;
   else if (rHi20.type == R_LARCH_TLS_LD_PC_HI20)
 sec.relaxAux->relocTypes[i + 2] = R_LARCH_TLS_LD_PCREL20_S2;
+  else if (rHi20.type == R_LARCH_TLS_DESC_PC_HI20)
+sec.relaxAux->relocTypes[i + 2] = R_LARCH_TLS_DESC_PCREL20_S2;
   else
 sec.relaxAux->relocTypes[i + 2] = R_LARCH_PCREL20_S2;
   sec.relaxAux->writes.push_back(insn(PCADDI, getD5(nextInsn), 0, 0));
@@ -903,6 +909,33 @@ static void relaxTlsLe(Ctx &ctx, const InputSection &sec, 
size_t i,
   }
 }
 
+// Relax TLSDESC code sequence. In LoongArch, the conversion of TLSDESC GD/LD 
to
+// LE/IE is closely tied to relaxation, similar to how GCC handles it. (Due to
+// the lack of an efficient way for handling conversions in the extreme code
+// model and the difficulty in determining whether the extreme code model is
+// being used in handleTlsRelocation, this approach may seem like a 
workaround).
+// Consequently, the resulting code sequence depends on whether the conversion
+// to LE/IE is performed.
+//
+// Original code sequence:
+//  * pcalau12i  $a0, %desc_pc_hi20(sym_desc)
+//  * addi.d $a0, $a0, %desc_pc_lo12(sym_desc)
+//  * ld.d   $ra, $a0, %desc_ld(sym_desc)
+//  * jirl   $ra, $ra, %desc_call(sym_desc)
+//
+// Cannot convert to LE/IE, but relax:
+//  * pcaddi $a0, %desc_pcrel_20(sym_desc)
+//  * ld.d   $ra, $a0, %desc_ld(sym_desc)
+//  * jirl   $ra, $ra, %desc_call(sym_desc)
+//
+// FIXME: Implement TLSDESC GD/LD to LE/IE.
+static void relaxTlsdesc(Ctx &ctx, const InputSection &sec, size_t i,
+ uint64_t loc, Relocation &rHi20, Relocation &rLo12,
+ uint32_t &remove) {
+  if (ctx.arg.shared && rHi20.type == R_LARCH_TLS_DESC_PC_HI20)
+return relaxPCHi20Lo12(ctx, sec, i, loc, rHi20, rLo12, remove);
+}
+
 static bool relax(Ctx &ctx, InputSection &sec) {
   const uint64_t secAddr = sec.getVA();
   const MutableArrayRef relocs = sec.relocs();
@@ -959,6 +992,10 @@ static bool relax(Ctx &ctx, InputSection &sec) {
   if (relaxable(relocs, i))
 relaxTlsLe(ctx, sec, i, loc, r, remove);
   break;
+case R_LARCH_TLS_DESC_PC_HI20:
+  if (isPairRelaxable(relocs, i))
+relaxTlsdesc(ctx, sec, i, loc, r, relocs[i + 2], remove);
+  break;
 }
 
 // For all anchors whose of

[llvm-branch-commits] [lld] [lld][LoongArch] Relax TLSDESC code sequence. (PR #123677)

2025-01-20 Thread via llvm-branch-commits

llvmbot wrote:



@llvm/pr-subscribers-lld

@llvm/pr-subscribers-lld-elf

Author: Zhaoxin Yang (ylzsx)


Changes

Relax TLSDESC code sequence.

Original code sequence:
  * pcalau12i  $a0, %desc_pc_hi20(sym_desc)
  * addi.d $a0, $a0, %desc_pc_lo12(sym_desc)
  * ld.d   $ra, $a0, %desc_ld(sym_desc)
  * jirl   $ra, $ra, %desc_call(sym_desc)

Cannot convert to LE/IE, but relax:
  * pcaddi $a0, %desc_pcrel_20(sym_desc)
  * ld.d   $ra, $a0, %desc_ld(sym_desc)
  * jirl   $ra, $ra, %desc_call(sym_desc)

FIXME: The conversion of TLSDESC GD/LD to LE/IE will implement in a future 
patch.

---
Full diff: https://github.com/llvm/llvm-project/pull/123677.diff


2 Files Affected:

- (modified) lld/ELF/Arch/LoongArch.cpp (+17-2) 
- (added) lld/test/ELF/loongarch-relax-tlsdesc.s (+280) 


``diff
diff --git a/lld/ELF/Arch/LoongArch.cpp b/lld/ELF/Arch/LoongArch.cpp
index 2d6d86d2ca63b2..ec09437404eddc 100644
--- a/lld/ELF/Arch/LoongArch.cpp
+++ b/lld/ELF/Arch/LoongArch.cpp
@@ -762,9 +762,12 @@ static bool isPairRelaxable(ArrayRef relocs, 
size_t i) {
 // Relax code sequence.
 // From:
 //   pcalau12i $a0, %pc_hi20(sym) | %ld_pc_hi20(sym)  | %gd_pc_hi20(sym)
+//| %desc_pc_hi20(sym)
 //   addi.w/d $a0, $a0, %pc_lo12(sym) | %got_pc_lo12(sym) | %got_pc_lo12(sym)
+//| %desc_pc_lo12(sym)
 // To:
-//   pcaddi $a0, %pc_lo12(sym) | %got_pc_lo12(sym) | %got_pc_lo12(sym)
+//   pcaddi$a0, %pc_lo12(sym) | %got_pc_lo12(sym) | %got_pc_lo12(sym)
+//| %desc_pcrel_20(sym)
 //
 // From:
 //   pcalau12i $a0, %got_pc_hi20(sym_got)
@@ -782,7 +785,9 @@ static void relaxPCHi20Lo12(Ctx &ctx, const InputSection 
&sec, size_t i,
 (rHi20.type == R_LARCH_TLS_GD_PC_HI20 &&
  rLo12.type == R_LARCH_GOT_PC_LO12) ||
 (rHi20.type == R_LARCH_TLS_LD_PC_HI20 &&
- rLo12.type == R_LARCH_GOT_PC_LO12)))
+ rLo12.type == R_LARCH_GOT_PC_LO12) ||
+(rHi20.type == R_LARCH_TLS_DESC_PC_HI20 &&
+ rLo12.type == R_LARCH_TLS_DESC_PC_LO12)))
 return;
 
   // GOT references to absolute symbols can't be relaxed to use pcaddi in
@@ -804,6 +809,8 @@ static void relaxPCHi20Lo12(Ctx &ctx, const InputSection 
&sec, size_t i,
 symBase = rHi20.sym->getVA(ctx);
   else if (rHi20.expr == RE_LOONGARCH_TLSGD_PAGE_PC)
 symBase = ctx.in.got->getGlobalDynAddr(*rHi20.sym);
+  else if (rHi20.expr == RE_LOONGARCH_TLSDESC_PAGE_PC)
+symBase = ctx.in.got->getTlsDescAddr(*rHi20.sym);
   else {
 Err(ctx) << getErrorLoc(ctx, (const uint8_t *)loc) << "unknown expr ("
  << rHi20.expr << ") against symbol " << rHi20.sym
@@ -837,6 +844,8 @@ static void relaxPCHi20Lo12(Ctx &ctx, const InputSection 
&sec, size_t i,
 sec.relaxAux->relocTypes[i + 2] = R_LARCH_TLS_GD_PCREL20_S2;
   else if (rHi20.type == R_LARCH_TLS_LD_PC_HI20)
 sec.relaxAux->relocTypes[i + 2] = R_LARCH_TLS_LD_PCREL20_S2;
+  else if (rHi20.type == R_LARCH_TLS_DESC_PC_HI20)
+sec.relaxAux->relocTypes[i + 2] = R_LARCH_TLS_DESC_PCREL20_S2;
   else
 sec.relaxAux->relocTypes[i + 2] = R_LARCH_PCREL20_S2;
   sec.relaxAux->writes.push_back(insn(PCADDI, getD5(nextInsn), 0, 0));
@@ -945,6 +954,7 @@ static bool relax(Ctx &ctx, InputSection &sec) {
 case R_LARCH_GOT_PC_HI20:
 case R_LARCH_TLS_GD_PC_HI20:
 case R_LARCH_TLS_LD_PC_HI20:
+case R_LARCH_TLS_DESC_PC_HI20:
   // The overflow check for i+2 will be carried out in isPairRelaxable.
   if (isPairRelaxable(relocs, i))
 relaxPCHi20Lo12(ctx, sec, i, loc, r, relocs[i + 2], remove);
@@ -1078,6 +1088,11 @@ void LoongArch::finalizeRelax(int passes) const {
 write32le(p, aux.writes[writesIdx++]);
 r.expr = R_TLSGD_PC;
 break;
+  case R_LARCH_TLS_DESC_PCREL20_S2:
+skip = 4;
+write32le(p, aux.writes[writesIdx++]);
+r.expr = R_TLSDESC_PC;
+break;
   default:
 llvm_unreachable("unsupported type");
   }
diff --git a/lld/test/ELF/loongarch-relax-tlsdesc.s 
b/lld/test/ELF/loongarch-relax-tlsdesc.s
new file mode 100644
index 00..f9d984ad6387a3
--- /dev/null
+++ b/lld/test/ELF/loongarch-relax-tlsdesc.s
@@ -0,0 +1,280 @@
+# REQUIRES: loongarch
+# RUN: rm -rf %t && split-file %s %t && cd %t
+# RUN: llvm-mc -filetype=obj -triple=loongarch64 -mattr=+relax a.s -o a.64.o
+# RUN: llvm-mc -filetype=obj -triple=loongarch64 -mattr=+relax c.s -o c.64.o
+# RUN: ld.lld --relax -shared -soname=c.64.so c.64.o -o c.64.so
+
+## Test the TLSDESC relaxation.
+# RUN: ld.lld --relax -shared -z now a.64.o c.64.o -o a.64.so
+# RUN: llvm-readobj -r -x .got a.64.so | FileCheck --check-prefix=GD64-RELA %s
+# RUN: llvm-objdump --no-show-raw-insn -dr -h a.64.so | FileCheck %s 
--check-prefix=GD64
+
+## FIXME: The transition frome TLSDESC to IE/LE has not yet been implemented.
+## Keep the dynamic relocations and hand them over to dynamic linker.
+
+# RUN: ld.lld --relax -e 0 -z now a

[llvm-branch-commits] [lld] [lld][LoongArch] Support relaxation during IE to LE conversion. (PR #123702)

2025-01-20 Thread Zhaoxin Yang via llvm-branch-commits

https://github.com/ylzsx created 
https://github.com/llvm/llvm-project/pull/123702

Complement https://github.com/llvm/llvm-project/pull/123680. When relaxation 
enable, remove redundant NOPs.

>From 83d8b7e00b16c97b11f3c19ed45bb70eeae95428 Mon Sep 17 00:00:00 2001
From: yangzhaoxin 
Date: Fri, 3 Jan 2025 11:15:56 +0800
Subject: [PATCH 1/3] Support relaxation during IE to LE conversion.

Complement https://. When relaxation enable, remove redundant NOPs.
---
 lld/ELF/Arch/LoongArch.cpp | 13 +++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/lld/ELF/Arch/LoongArch.cpp b/lld/ELF/Arch/LoongArch.cpp
index dc98dbec872c0c..ef25e741901d93 100644
--- a/lld/ELF/Arch/LoongArch.cpp
+++ b/lld/ELF/Arch/LoongArch.cpp
@@ -975,6 +975,11 @@ static bool relax(Ctx &ctx, InputSection &sec) {
   if (relaxable(relocs, i))
 relaxTlsLe(ctx, sec, i, loc, r, remove);
   break;
+case R_LARCH_TLS_IE_PC_HI20:
+  if (relaxable(relocs, i) && r.expr == R_RELAX_TLS_IE_TO_LE &&
+  isUInt<12>(r.sym->getVA(ctx, r.addend)))
+remove = 4;
+  break;
 }
 
 // For all anchors whose offsets are <= r.offset, they are preceded by
@@ -1049,7 +1054,7 @@ void LoongArch::relocateAlloc(InputSectionBase &sec, 
uint8_t *buf) const {
 secAddr += s->outSecOff;
   else if (auto *ehIn = dyn_cast(&sec))
 secAddr += ehIn->getParent()->outSecOff;
-  bool isExtreme = false;
+  bool isExtreme = false, isRelax = false;
   const MutableArrayRef relocs = sec.relocs();
   for (size_t i = 0, size = relocs.size(); i != size; ++i) {
 Relocation &rel = relocs[i];
@@ -1077,8 +1082,12 @@ void LoongArch::relocateAlloc(InputSectionBase &sec, 
uint8_t *buf) const {
 val = SignExtend64(sec.getRelocTargetVA(ctx, rel, secAddr + 
rel.offset),
bits);
 relocateNoSym(loc, rel.type, val);
-  } else
+  } else {
+isRelax = relaxable(relocs, i);
+if (isRelax && rel.type == R_LARCH_TLS_IE_PC_HI20 && isUInt<12>(val))
+  continue;
 tlsIeToLe(loc, rel, val);
+  }
   continue;
 default:
   break;

>From 6b79432dcb9172906deab445055d8197fdeed425 Mon Sep 17 00:00:00 2001
From: yangzhaoxin 
Date: Fri, 3 Jan 2025 11:20:10 +0800
Subject: [PATCH 2/3] Modify loongarch-relax-tls-ie.s when relaxation is
 enabled.

---
 lld/test/ELF/loongarch-relax-tls-ie.s | 28 ---
 1 file changed, 25 insertions(+), 3 deletions(-)

diff --git a/lld/test/ELF/loongarch-relax-tls-ie.s 
b/lld/test/ELF/loongarch-relax-tls-ie.s
index f5375ae3a3b400..be76263f2978e0 100644
--- a/lld/test/ELF/loongarch-relax-tls-ie.s
+++ b/lld/test/ELF/loongarch-relax-tls-ie.s
@@ -3,11 +3,10 @@
 
 # RUN: llvm-mc --filetype=obj --triple=loongarch64 -mattr=+relax %s -o %t.o
 
-## FIXME: IE relaxation has not yet been implemented.
-## --relax/--no-relax has the same result. Also check --emit-relocs.
+## Also check --emit-relocs.
 # RUN: ld.lld --emit-relocs %t.o -o %t
 # RUN: llvm-readelf -x .got %t 2>&1 | FileCheck --check-prefix=LE-GOT %s
-# RUN: llvm-objdump -dr --no-show-raw-insn %t | FileCheck --check-prefixes=LE 
%s
+# RUN: llvm-objdump -dr --no-show-raw-insn %t | FileCheck --check-prefixes=LER 
%s
 
 # RUN: ld.lld --emit-relocs --no-relax %t.o -o %t.norelax
 # RUN: llvm-readelf -x .got %t.norelax 2>&1 | FileCheck --check-prefix=LE-GOT 
%s
@@ -42,6 +41,29 @@
 # LE-NEXT:add.d   $a2, $a2, $tp
 # LE-NEXT:add.d   $a3, $a3, $tp
 
+# LER:  20158: ori $a0, $zero, 4095
+# LER-NEXT:  R_LARCH_TLS_IE_PC_HI20 a
+# LER-NEXT:  R_LARCH_RELAX   *ABS*
+# LER-NEXT:  R_LARCH_TLS_IE_PC_LO12 a
+# LER-NEXT:  R_LARCH_RELAX   *ABS*
+# LER-NEXT:add.d   $a0, $a0, $tp
+# LER-NEXT: 20160: lu12i.w $a1, 1
+# LER-NEXT:  R_LARCH_TLS_IE_PC_HI20 b
+# LER-NEXT:ori $a1, $a1, 0
+# LER-NEXT:  R_LARCH_TLS_IE_PC_LO12 b
+# LER-NEXT:add.d   $a1, $a1, $tp
+# LER-NEXT: 2016c: lu12i.w $a3, 1
+# LER-NEXT:  R_LARCH_TLS_IE_PC_HI20 a
+# LER-NEXT:  R_LARCH_RELAX   *ABS*
+# LER-NEXT:  R_LARCH_TLS_IE_PC_HI20 b
+# LER-NEXT:  R_LARCH_RELAX   *ABS*
+# LER-NEXT:ori $a2, $zero, 4095
+# LER-NEXT:  R_LARCH_TLS_IE_PC_LO12 a
+# LER-NEXT:ori $a3, $a3, 0
+# LER-NEXT:  R_LARCH_TLS_IE_PC_LO12 b
+# LER-NEXT:add.d   $a2, $a2, $tp
+# LER-NEXT:add.d   $a3, $a3, $tp
+
 la.tls.ie $a0, a# relax
 add.d $a0, $a0, $tp
 

>From 9d99de87229b84f65f392dc5eb73bf1aef16fa65 Mon Sep 17 00:00:00 2001
From: yangzhaoxin 
Date: Fri, 17 Jan 2025 09:00:44 +0800
Subject: [PATCH 3/3] Modify test. Add --relax option.

---
 lld/test/ELF/loongarch-relax-tls-ie.s | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lld/test/ELF/loongarch-relax-tls-ie.s 
b/lld/test/ELF/loongarch-relax-tls-ie.s
index be76263f2978e0..2c95a342251f20 100644
--- a/lld/test/ELF/loongarch-relax-tls-ie.s
+++ b/lld/test/ELF/loongarch-relax-tls-ie.s

[llvm-branch-commits] [lld] [lld][LoongArch] Support relaxation during IE to LE conversion. (PR #123702)

2025-01-20 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-lld

Author: Zhaoxin Yang (ylzsx)


Changes

Complement https://github.com/llvm/llvm-project/pull/123680. When relaxation 
enable, remove redundant NOPs.

---
Full diff: https://github.com/llvm/llvm-project/pull/123702.diff


2 Files Affected:

- (modified) lld/ELF/Arch/LoongArch.cpp (+11-2) 
- (modified) lld/test/ELF/loongarch-relax-tls-ie.s (+26-4) 


``diff
diff --git a/lld/ELF/Arch/LoongArch.cpp b/lld/ELF/Arch/LoongArch.cpp
index dc98dbec872c0c..ef25e741901d93 100644
--- a/lld/ELF/Arch/LoongArch.cpp
+++ b/lld/ELF/Arch/LoongArch.cpp
@@ -975,6 +975,11 @@ static bool relax(Ctx &ctx, InputSection &sec) {
   if (relaxable(relocs, i))
 relaxTlsLe(ctx, sec, i, loc, r, remove);
   break;
+case R_LARCH_TLS_IE_PC_HI20:
+  if (relaxable(relocs, i) && r.expr == R_RELAX_TLS_IE_TO_LE &&
+  isUInt<12>(r.sym->getVA(ctx, r.addend)))
+remove = 4;
+  break;
 }
 
 // For all anchors whose offsets are <= r.offset, they are preceded by
@@ -1049,7 +1054,7 @@ void LoongArch::relocateAlloc(InputSectionBase &sec, 
uint8_t *buf) const {
 secAddr += s->outSecOff;
   else if (auto *ehIn = dyn_cast(&sec))
 secAddr += ehIn->getParent()->outSecOff;
-  bool isExtreme = false;
+  bool isExtreme = false, isRelax = false;
   const MutableArrayRef relocs = sec.relocs();
   for (size_t i = 0, size = relocs.size(); i != size; ++i) {
 Relocation &rel = relocs[i];
@@ -1077,8 +1082,12 @@ void LoongArch::relocateAlloc(InputSectionBase &sec, 
uint8_t *buf) const {
 val = SignExtend64(sec.getRelocTargetVA(ctx, rel, secAddr + 
rel.offset),
bits);
 relocateNoSym(loc, rel.type, val);
-  } else
+  } else {
+isRelax = relaxable(relocs, i);
+if (isRelax && rel.type == R_LARCH_TLS_IE_PC_HI20 && isUInt<12>(val))
+  continue;
 tlsIeToLe(loc, rel, val);
+  }
   continue;
 default:
   break;
diff --git a/lld/test/ELF/loongarch-relax-tls-ie.s 
b/lld/test/ELF/loongarch-relax-tls-ie.s
index f5375ae3a3b400..2c95a342251f20 100644
--- a/lld/test/ELF/loongarch-relax-tls-ie.s
+++ b/lld/test/ELF/loongarch-relax-tls-ie.s
@@ -3,11 +3,10 @@
 
 # RUN: llvm-mc --filetype=obj --triple=loongarch64 -mattr=+relax %s -o %t.o
 
-## FIXME: IE relaxation has not yet been implemented.
-## --relax/--no-relax has the same result. Also check --emit-relocs.
-# RUN: ld.lld --emit-relocs %t.o -o %t
+## Also check --emit-relocs.
+# RUN: ld.lld --emit-relocs --relax %t.o -o %t
 # RUN: llvm-readelf -x .got %t 2>&1 | FileCheck --check-prefix=LE-GOT %s
-# RUN: llvm-objdump -dr --no-show-raw-insn %t | FileCheck --check-prefixes=LE 
%s
+# RUN: llvm-objdump -dr --no-show-raw-insn %t | FileCheck --check-prefixes=LER 
%s
 
 # RUN: ld.lld --emit-relocs --no-relax %t.o -o %t.norelax
 # RUN: llvm-readelf -x .got %t.norelax 2>&1 | FileCheck --check-prefix=LE-GOT 
%s
@@ -42,6 +41,29 @@
 # LE-NEXT:add.d   $a2, $a2, $tp
 # LE-NEXT:add.d   $a3, $a3, $tp
 
+# LER:  20158: ori $a0, $zero, 4095
+# LER-NEXT:  R_LARCH_TLS_IE_PC_HI20 a
+# LER-NEXT:  R_LARCH_RELAX   *ABS*
+# LER-NEXT:  R_LARCH_TLS_IE_PC_LO12 a
+# LER-NEXT:  R_LARCH_RELAX   *ABS*
+# LER-NEXT:add.d   $a0, $a0, $tp
+# LER-NEXT: 20160: lu12i.w $a1, 1
+# LER-NEXT:  R_LARCH_TLS_IE_PC_HI20 b
+# LER-NEXT:ori $a1, $a1, 0
+# LER-NEXT:  R_LARCH_TLS_IE_PC_LO12 b
+# LER-NEXT:add.d   $a1, $a1, $tp
+# LER-NEXT: 2016c: lu12i.w $a3, 1
+# LER-NEXT:  R_LARCH_TLS_IE_PC_HI20 a
+# LER-NEXT:  R_LARCH_RELAX   *ABS*
+# LER-NEXT:  R_LARCH_TLS_IE_PC_HI20 b
+# LER-NEXT:  R_LARCH_RELAX   *ABS*
+# LER-NEXT:ori $a2, $zero, 4095
+# LER-NEXT:  R_LARCH_TLS_IE_PC_LO12 a
+# LER-NEXT:ori $a3, $a3, 0
+# LER-NEXT:  R_LARCH_TLS_IE_PC_LO12 b
+# LER-NEXT:add.d   $a2, $a2, $tp
+# LER-NEXT:add.d   $a3, $a3, $tp
+
 la.tls.ie $a0, a# relax
 add.d $a0, $a0, $tp
 

``




https://github.com/llvm/llvm-project/pull/123702
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [lld] [lld][LoongArch] Convert TLS IE to LE in the normal or medium code model. (PR #123680)

2025-01-20 Thread Zhaoxin Yang via llvm-branch-commits

https://github.com/ylzsx updated 
https://github.com/llvm/llvm-project/pull/123680

>From a39c190e5c8351227178b6e5041bbd97fc6926a9 Mon Sep 17 00:00:00 2001
From: yangzhaoxin 
Date: Thu, 2 Jan 2025 20:58:56 +0800
Subject: [PATCH 1/5] Convert TLS IE to LE in the normal or medium code model.

Original code sequence:
 * pcalau12i $a0, %ie_pc_hi20(sym)
 * ld.d  $a0, $a0, %ie_pc_lo12(sym)

The code sequence converted is as follows:
 * lu12i.w   $a0, %ie_pc_hi20(sym)  # le_hi20 != 0, otherwise NOP
 * ori $a0   $a0, %ie_pc_lo12(sym)

FIXME: When relaxation enables, redundant NOP can be removed. This will
be implemented in a future patch.

Note: In the normal or medium code model, original code sequence with
relocations can appear interleaved, because converted code sequence
calculates the absolute offset. However, in extreme code model, to
identify the current code model, the first four instructions with
relocations must appear consecutively.
---
 lld/ELF/Arch/LoongArch.cpp | 87 ++
 lld/ELF/Relocations.cpp| 15 ++-
 2 files changed, 101 insertions(+), 1 deletion(-)

diff --git a/lld/ELF/Arch/LoongArch.cpp b/lld/ELF/Arch/LoongArch.cpp
index ec09437404eddc..dc98dbec872c0c 100644
--- a/lld/ELF/Arch/LoongArch.cpp
+++ b/lld/ELF/Arch/LoongArch.cpp
@@ -39,7 +39,11 @@ class LoongArch final : public TargetInfo {
   void relocate(uint8_t *loc, const Relocation &rel,
 uint64_t val) const override;
   bool relaxOnce(int pass) const override;
+  void relocateAlloc(InputSectionBase &sec, uint8_t *buf) const override;
   void finalizeRelax(int passes) const override;
+
+private:
+  void tlsIeToLe(uint8_t *loc, const Relocation &rel, uint64_t val) const;
 };
 } // end anonymous namespace
 
@@ -53,6 +57,8 @@ enum Op {
   ADDI_W = 0x0280,
   ADDI_D = 0x02c0,
   ANDI = 0x0340,
+  ORI = 0x0380,
+  LU12I_W = 0x1400,
   PCADDI = 0x1800,
   PCADDU12I = 0x1c00,
   LD_W = 0x2880,
@@ -1000,6 +1006,87 @@ static bool relax(Ctx &ctx, InputSection &sec) {
   return changed;
 }
 
+// Convert TLS IE to LE in the normal or medium code model.
+// Original code sequence:
+//  * pcalau12i $a0, %ie_pc_hi20(sym)
+//  * ld.d  $a0, $a0, %ie_pc_lo12(sym)
+//
+// The code sequence converted is as follows:
+//  * lu12i.w   $a0, %le_hi20(sym)  # le_hi20 != 0, otherwise NOP
+//  * ori $a0   $a0, %le_lo12(sym)
+//
+// When relaxation enables, redundant NOPs can be removed.
+void LoongArch::tlsIeToLe(uint8_t *loc, const Relocation &rel,
+  uint64_t val) const {
+  assert(isInt<32>(val) &&
+ "val exceeds the range of medium code model in tlsIeToLe");
+
+  bool isUInt12 = isUInt<12>(val);
+  const uint32_t currInsn = read32le(loc);
+  switch (rel.type) {
+  case R_LARCH_TLS_IE_PC_HI20:
+if (isUInt12)
+  write32le(loc, insn(ANDI, R_ZERO, R_ZERO, 0)); // nop
+else
+  write32le(loc, insn(LU12I_W, getD5(currInsn), extractBits(val, 31, 12),
+  0)); // lu12i.w $a0, %le_hi20
+break;
+  case R_LARCH_TLS_IE_PC_LO12:
+if (isUInt12)
+  write32le(loc, insn(ORI, getD5(currInsn), R_ZERO,
+  val)); // ori $a0, $r0, %le_lo12
+else
+  write32le(loc, insn(ORI, getD5(currInsn), getJ5(currInsn),
+  lo12(val))); // ori $a0, $a0, %le_lo12
+break;
+  }
+}
+
+void LoongArch::relocateAlloc(InputSectionBase &sec, uint8_t *buf) const {
+  const unsigned bits = ctx.arg.is64 ? 64 : 32;
+  uint64_t secAddr = sec.getOutputSection()->addr;
+  if (auto *s = dyn_cast(&sec))
+secAddr += s->outSecOff;
+  else if (auto *ehIn = dyn_cast(&sec))
+secAddr += ehIn->getParent()->outSecOff;
+  bool isExtreme = false;
+  const MutableArrayRef relocs = sec.relocs();
+  for (size_t i = 0, size = relocs.size(); i != size; ++i) {
+Relocation &rel = relocs[i];
+uint8_t *loc = buf + rel.offset;
+uint64_t val = SignExtend64(
+sec.getRelocTargetVA(ctx, rel, secAddr + rel.offset), bits);
+
+switch (rel.expr) {
+case R_RELAX_HINT:
+  continue;
+case R_RELAX_TLS_IE_TO_LE:
+  if (rel.type == R_LARCH_TLS_IE_PC_HI20) {
+// LoongArch does not support IE to LE optimize in the extreme code
+// model. In this case, the relocs are as follows:
+//
+//  * i   -- R_LARCH_TLS_IE_PC_HI20
+//  * i+1 -- R_LARCH_TLS_IE_PC_LO12
+//  * i+2 -- R_LARCH_TLS_IE64_PC_LO20
+//  * i+3 -- R_LARCH_TLS_IE64_PC_HI12
+isExtreme =
+(i + 2 < size && relocs[i + 2].type == R_LARCH_TLS_IE64_PC_LO20);
+  }
+  if (isExtreme) {
+rel.expr = getRelExpr(rel.type, *rel.sym, loc);
+val = SignExtend64(sec.getRelocTargetVA(ctx, rel, secAddr + 
rel.offset),
+   bits);
+relocateNoSym(loc, rel.type, val);
+  } else
+tlsIeToLe(loc, rel, val);
+  continue;
+default:
+  break;
+}
+relocate(loc, rel, val);
+  }
+}
+
 // 

[llvm-branch-commits] [lld] [lld][LoongArch] Convert TLS IE to LE in the normal or medium code model. (PR #123680)

2025-01-20 Thread Zhaoxin Yang via llvm-branch-commits

https://github.com/ylzsx converted_to_draft 
https://github.com/llvm/llvm-project/pull/123680
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [lld] [lld][LoongArch] Convert TLS IE to LE in the normal or medium code model. (PR #123680)

2025-01-20 Thread Zhaoxin Yang via llvm-branch-commits

https://github.com/ylzsx ready_for_review 
https://github.com/llvm/llvm-project/pull/123680
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU: Make vector_shuffle legal for v2i32 with v_pk_mov_b32 (PR #123684)

2025-01-20 Thread via llvm-branch-commits

github-actions[bot] wrote:




:warning: undef deprecator found issues in your code. :warning:



You can test this locally with the following command:


``bash
git diff -U0 --pickaxe-regex -S 
'([^a-zA-Z0-9#_-]undef[^a-zA-Z0-9_-]|UndefValue::get)' 
2d8035aff3d44bd59f4ff3af60f87c7d6e6219ea 
c5caf560857f3c4f71416940a528df5ce75212bc 
llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp 
llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h 
llvm/lib/Target/AMDGPU/SIISelLowering.cpp 
llvm/test/CodeGen/AMDGPU/shufflevector.v2f32.v2f32.ll 
llvm/test/CodeGen/AMDGPU/shufflevector.v2f32.v3f32.ll 
llvm/test/CodeGen/AMDGPU/shufflevector.v2f32.v4f32.ll 
llvm/test/CodeGen/AMDGPU/shufflevector.v2f32.v8f32.ll 
llvm/test/CodeGen/AMDGPU/shufflevector.v2i32.v2i32.ll 
llvm/test/CodeGen/AMDGPU/shufflevector.v2i32.v3i32.ll 
llvm/test/CodeGen/AMDGPU/shufflevector.v2i32.v4i32.ll 
llvm/test/CodeGen/AMDGPU/shufflevector.v2i32.v8i32.ll 
llvm/test/CodeGen/AMDGPU/shufflevector.v2p3.v2p3.ll 
llvm/test/CodeGen/AMDGPU/shufflevector.v2p3.v3p3.ll 
llvm/test/CodeGen/AMDGPU/shufflevector.v2p3.v4p3.ll 
llvm/test/CodeGen/AMDGPU/shufflevector.v2p3.v8p3.ll 
llvm/test/CodeGen/AMDGPU/shufflevector.v4i64.v3i64.ll 
llvm/test/CodeGen/AMDGPU/shufflevector.v4p0.v3p0.ll 
llvm/test/CodeGen/AMDGPU/vector_shuffle.packed.ll 
llvm/test/Transforms/InferAddressSpaces/AMDGPU/flat_atomic.ll
``




The following files introduce new uses of undef:
 - llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp

[Undef](https://llvm.org/docs/LangRef.html#undefined-values) is now deprecated 
and should only be used in the rare cases where no replacement is possible. For 
example, a load of uninitialized memory yields `undef`. You should use `poison` 
values for placeholders instead.

In tests, avoid using `undef` and having tests that trigger undefined behavior. 
If you need an operand with some unimportant value, you can add a new argument 
to the function and use that instead.

For example, this is considered a bad practice:
```llvm
define void @fn() {
  ...
  br i1 undef, ...
}
```

Please use the following instead:
```llvm
define void @fn(i1 %cond) {
  ...
  br i1 %cond, ...
}
```

Please refer to the [Undefined Behavior 
Manual](https://llvm.org/docs/UndefinedBehavior.html) for more information.



https://github.com/llvm/llvm-project/pull/123684
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [lld] [lld][LoongArch] Convert TLS IE to LE in the normal or medium code model. (PR #123680)

2025-01-20 Thread Zhaoxin Yang via llvm-branch-commits

https://github.com/ylzsx created 
https://github.com/llvm/llvm-project/pull/123680

Original code sequence:
* pcalau12i $a0, %ie_pc_hi20(sym)
* ld.d  $a0, $a0, %ie_pc_lo12(sym)

The code sequence converted is as follows:
* lu12i.w   $a0, %ie_pc_hi20(sym)  # le_hi20 != 0, otherwise NOP
* ori $a0   $a0, %ie_pc_lo12(sym)

FIXME: When relaxation enables, redundant NOP can be removed. This will be 
implemented in a future patch.

Note: In the normal or medium code model, original code sequence with 
relocations allow interleaving, because converted code sequence calculates the 
absolute offset. However, in extreme code model, to identify the current code 
model, the first four instructions with relocations must appear consecutively.

>From a39c190e5c8351227178b6e5041bbd97fc6926a9 Mon Sep 17 00:00:00 2001
From: yangzhaoxin 
Date: Thu, 2 Jan 2025 20:58:56 +0800
Subject: [PATCH 1/4] Convert TLS IE to LE in the normal or medium code model.

Original code sequence:
 * pcalau12i $a0, %ie_pc_hi20(sym)
 * ld.d  $a0, $a0, %ie_pc_lo12(sym)

The code sequence converted is as follows:
 * lu12i.w   $a0, %ie_pc_hi20(sym)  # le_hi20 != 0, otherwise NOP
 * ori $a0   $a0, %ie_pc_lo12(sym)

FIXME: When relaxation enables, redundant NOP can be removed. This will
be implemented in a future patch.

Note: In the normal or medium code model, original code sequence with
relocations can appear interleaved, because converted code sequence
calculates the absolute offset. However, in extreme code model, to
identify the current code model, the first four instructions with
relocations must appear consecutively.
---
 lld/ELF/Arch/LoongArch.cpp | 87 ++
 lld/ELF/Relocations.cpp| 15 ++-
 2 files changed, 101 insertions(+), 1 deletion(-)

diff --git a/lld/ELF/Arch/LoongArch.cpp b/lld/ELF/Arch/LoongArch.cpp
index ec09437404eddc..dc98dbec872c0c 100644
--- a/lld/ELF/Arch/LoongArch.cpp
+++ b/lld/ELF/Arch/LoongArch.cpp
@@ -39,7 +39,11 @@ class LoongArch final : public TargetInfo {
   void relocate(uint8_t *loc, const Relocation &rel,
 uint64_t val) const override;
   bool relaxOnce(int pass) const override;
+  void relocateAlloc(InputSectionBase &sec, uint8_t *buf) const override;
   void finalizeRelax(int passes) const override;
+
+private:
+  void tlsIeToLe(uint8_t *loc, const Relocation &rel, uint64_t val) const;
 };
 } // end anonymous namespace
 
@@ -53,6 +57,8 @@ enum Op {
   ADDI_W = 0x0280,
   ADDI_D = 0x02c0,
   ANDI = 0x0340,
+  ORI = 0x0380,
+  LU12I_W = 0x1400,
   PCADDI = 0x1800,
   PCADDU12I = 0x1c00,
   LD_W = 0x2880,
@@ -1000,6 +1006,87 @@ static bool relax(Ctx &ctx, InputSection &sec) {
   return changed;
 }
 
+// Convert TLS IE to LE in the normal or medium code model.
+// Original code sequence:
+//  * pcalau12i $a0, %ie_pc_hi20(sym)
+//  * ld.d  $a0, $a0, %ie_pc_lo12(sym)
+//
+// The code sequence converted is as follows:
+//  * lu12i.w   $a0, %le_hi20(sym)  # le_hi20 != 0, otherwise NOP
+//  * ori $a0   $a0, %le_lo12(sym)
+//
+// When relaxation enables, redundant NOPs can be removed.
+void LoongArch::tlsIeToLe(uint8_t *loc, const Relocation &rel,
+  uint64_t val) const {
+  assert(isInt<32>(val) &&
+ "val exceeds the range of medium code model in tlsIeToLe");
+
+  bool isUInt12 = isUInt<12>(val);
+  const uint32_t currInsn = read32le(loc);
+  switch (rel.type) {
+  case R_LARCH_TLS_IE_PC_HI20:
+if (isUInt12)
+  write32le(loc, insn(ANDI, R_ZERO, R_ZERO, 0)); // nop
+else
+  write32le(loc, insn(LU12I_W, getD5(currInsn), extractBits(val, 31, 12),
+  0)); // lu12i.w $a0, %le_hi20
+break;
+  case R_LARCH_TLS_IE_PC_LO12:
+if (isUInt12)
+  write32le(loc, insn(ORI, getD5(currInsn), R_ZERO,
+  val)); // ori $a0, $r0, %le_lo12
+else
+  write32le(loc, insn(ORI, getD5(currInsn), getJ5(currInsn),
+  lo12(val))); // ori $a0, $a0, %le_lo12
+break;
+  }
+}
+
+void LoongArch::relocateAlloc(InputSectionBase &sec, uint8_t *buf) const {
+  const unsigned bits = ctx.arg.is64 ? 64 : 32;
+  uint64_t secAddr = sec.getOutputSection()->addr;
+  if (auto *s = dyn_cast(&sec))
+secAddr += s->outSecOff;
+  else if (auto *ehIn = dyn_cast(&sec))
+secAddr += ehIn->getParent()->outSecOff;
+  bool isExtreme = false;
+  const MutableArrayRef relocs = sec.relocs();
+  for (size_t i = 0, size = relocs.size(); i != size; ++i) {
+Relocation &rel = relocs[i];
+uint8_t *loc = buf + rel.offset;
+uint64_t val = SignExtend64(
+sec.getRelocTargetVA(ctx, rel, secAddr + rel.offset), bits);
+
+switch (rel.expr) {
+case R_RELAX_HINT:
+  continue;
+case R_RELAX_TLS_IE_TO_LE:
+  if (rel.type == R_LARCH_TLS_IE_PC_HI20) {
+// LoongArch does not support IE to LE optimize in the extreme code
+// model. In this case, the relocs are as follows:
+//
+/

[llvm-branch-commits] [lld] [lld][LoongArch] Convert TLS IE to LE in the normal or medium code model. (PR #123680)

2025-01-20 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-lld

Author: Zhaoxin Yang (ylzsx)


Changes

Original code sequence:
* pcalau12i $a0, %ie_pc_hi20(sym)
* ld.d  $a0, $a0, %ie_pc_lo12(sym)

The code sequence converted is as follows:
* lu12i.w   $a0, %ie_pc_hi20(sym)  # le_hi20 != 0, otherwise NOP
* ori $a0   $a0, %ie_pc_lo12(sym)

FIXME: When relaxation enables, redundant NOP can be removed. This will be 
implemented in a future patch.

Note: In the normal or medium code model, original code sequence with 
relocations allow interleaving, because converted code sequence calculates the 
absolute offset. However, in extreme code model, to identify the current code 
model, the first four instructions with relocations must appear consecutively.

---
Full diff: https://github.com/llvm/llvm-project/pull/123680.diff


4 Files Affected:

- (modified) lld/ELF/Arch/LoongArch.cpp (+87) 
- (modified) lld/ELF/Relocations.cpp (+15-1) 
- (added) lld/test/ELF/loongarch-relax-tls-ie.s (+70) 
- (modified) lld/test/ELF/loongarch-tls-ie.s (+12-18) 


``diff
diff --git a/lld/ELF/Arch/LoongArch.cpp b/lld/ELF/Arch/LoongArch.cpp
index ec09437404eddc..dc98dbec872c0c 100644
--- a/lld/ELF/Arch/LoongArch.cpp
+++ b/lld/ELF/Arch/LoongArch.cpp
@@ -39,7 +39,11 @@ class LoongArch final : public TargetInfo {
   void relocate(uint8_t *loc, const Relocation &rel,
 uint64_t val) const override;
   bool relaxOnce(int pass) const override;
+  void relocateAlloc(InputSectionBase &sec, uint8_t *buf) const override;
   void finalizeRelax(int passes) const override;
+
+private:
+  void tlsIeToLe(uint8_t *loc, const Relocation &rel, uint64_t val) const;
 };
 } // end anonymous namespace
 
@@ -53,6 +57,8 @@ enum Op {
   ADDI_W = 0x0280,
   ADDI_D = 0x02c0,
   ANDI = 0x0340,
+  ORI = 0x0380,
+  LU12I_W = 0x1400,
   PCADDI = 0x1800,
   PCADDU12I = 0x1c00,
   LD_W = 0x2880,
@@ -1000,6 +1006,87 @@ static bool relax(Ctx &ctx, InputSection &sec) {
   return changed;
 }
 
+// Convert TLS IE to LE in the normal or medium code model.
+// Original code sequence:
+//  * pcalau12i $a0, %ie_pc_hi20(sym)
+//  * ld.d  $a0, $a0, %ie_pc_lo12(sym)
+//
+// The code sequence converted is as follows:
+//  * lu12i.w   $a0, %le_hi20(sym)  # le_hi20 != 0, otherwise NOP
+//  * ori $a0   $a0, %le_lo12(sym)
+//
+// When relaxation enables, redundant NOPs can be removed.
+void LoongArch::tlsIeToLe(uint8_t *loc, const Relocation &rel,
+  uint64_t val) const {
+  assert(isInt<32>(val) &&
+ "val exceeds the range of medium code model in tlsIeToLe");
+
+  bool isUInt12 = isUInt<12>(val);
+  const uint32_t currInsn = read32le(loc);
+  switch (rel.type) {
+  case R_LARCH_TLS_IE_PC_HI20:
+if (isUInt12)
+  write32le(loc, insn(ANDI, R_ZERO, R_ZERO, 0)); // nop
+else
+  write32le(loc, insn(LU12I_W, getD5(currInsn), extractBits(val, 31, 12),
+  0)); // lu12i.w $a0, %le_hi20
+break;
+  case R_LARCH_TLS_IE_PC_LO12:
+if (isUInt12)
+  write32le(loc, insn(ORI, getD5(currInsn), R_ZERO,
+  val)); // ori $a0, $r0, %le_lo12
+else
+  write32le(loc, insn(ORI, getD5(currInsn), getJ5(currInsn),
+  lo12(val))); // ori $a0, $a0, %le_lo12
+break;
+  }
+}
+
+void LoongArch::relocateAlloc(InputSectionBase &sec, uint8_t *buf) const {
+  const unsigned bits = ctx.arg.is64 ? 64 : 32;
+  uint64_t secAddr = sec.getOutputSection()->addr;
+  if (auto *s = dyn_cast(&sec))
+secAddr += s->outSecOff;
+  else if (auto *ehIn = dyn_cast(&sec))
+secAddr += ehIn->getParent()->outSecOff;
+  bool isExtreme = false;
+  const MutableArrayRef relocs = sec.relocs();
+  for (size_t i = 0, size = relocs.size(); i != size; ++i) {
+Relocation &rel = relocs[i];
+uint8_t *loc = buf + rel.offset;
+uint64_t val = SignExtend64(
+sec.getRelocTargetVA(ctx, rel, secAddr + rel.offset), bits);
+
+switch (rel.expr) {
+case R_RELAX_HINT:
+  continue;
+case R_RELAX_TLS_IE_TO_LE:
+  if (rel.type == R_LARCH_TLS_IE_PC_HI20) {
+// LoongArch does not support IE to LE optimize in the extreme code
+// model. In this case, the relocs are as follows:
+//
+//  * i   -- R_LARCH_TLS_IE_PC_HI20
+//  * i+1 -- R_LARCH_TLS_IE_PC_LO12
+//  * i+2 -- R_LARCH_TLS_IE64_PC_LO20
+//  * i+3 -- R_LARCH_TLS_IE64_PC_HI12
+isExtreme =
+(i + 2 < size && relocs[i + 2].type == R_LARCH_TLS_IE64_PC_LO20);
+  }
+  if (isExtreme) {
+rel.expr = getRelExpr(rel.type, *rel.sym, loc);
+val = SignExtend64(sec.getRelocTargetVA(ctx, rel, secAddr + 
rel.offset),
+   bits);
+relocateNoSym(loc, rel.type, val);
+  } else
+tlsIeToLe(loc, rel, val);
+  continue;
+default:
+  break;
+}
+relocate(loc, rel, val);
+  }
+}
+
 // When relaxing just R_LARCH_ALIGN, relocDeltas is usually

[llvm-branch-commits] [llvm] AMDGPU: Make vector_shuffle legal for v2i32 with v_pk_mov_b32 (PR #123684)

2025-01-20 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-backend-amdgpu

Author: Matt Arsenault (arsenm)


Changes

For VALU shuffles, this saves an instruction in some case.

---

Patch is 285.82 KiB, truncated to 20.00 KiB below, full version: 
https://github.com/llvm/llvm-project/pull/123684.diff


19 Files Affected:

- (modified) llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp (+114) 
- (modified) llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h (+1) 
- (modified) llvm/lib/Target/AMDGPU/SIISelLowering.cpp (+7) 
- (modified) llvm/test/CodeGen/AMDGPU/shufflevector.v2f32.v2f32.ll (+21-28) 
- (modified) llvm/test/CodeGen/AMDGPU/shufflevector.v2f32.v3f32.ll (+17-23) 
- (modified) llvm/test/CodeGen/AMDGPU/shufflevector.v2f32.v4f32.ll (+34-50) 
- (modified) llvm/test/CodeGen/AMDGPU/shufflevector.v2f32.v8f32.ll (+112-160) 
- (modified) llvm/test/CodeGen/AMDGPU/shufflevector.v2i32.v2i32.ll (+21-28) 
- (modified) llvm/test/CodeGen/AMDGPU/shufflevector.v2i32.v3i32.ll (+17-23) 
- (modified) llvm/test/CodeGen/AMDGPU/shufflevector.v2i32.v4i32.ll (+34-50) 
- (modified) llvm/test/CodeGen/AMDGPU/shufflevector.v2i32.v8i32.ll (+112-160) 
- (modified) llvm/test/CodeGen/AMDGPU/shufflevector.v2p3.v2p3.ll (+21-28) 
- (modified) llvm/test/CodeGen/AMDGPU/shufflevector.v2p3.v3p3.ll (+17-23) 
- (modified) llvm/test/CodeGen/AMDGPU/shufflevector.v2p3.v4p3.ll (+34-50) 
- (modified) llvm/test/CodeGen/AMDGPU/shufflevector.v2p3.v8p3.ll (+112-160) 
- (modified) llvm/test/CodeGen/AMDGPU/shufflevector.v4i64.v3i64.ll (+500-287) 
- (modified) llvm/test/CodeGen/AMDGPU/shufflevector.v4p0.v3p0.ll (+500-287) 
- (modified) llvm/test/CodeGen/AMDGPU/vector_shuffle.packed.ll (+48-48) 
- (modified) llvm/test/Transforms/InferAddressSpaces/AMDGPU/flat_atomic.ll 
(+1-2) 


``diff
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index 6d5c3b5e0742b3..8d03fde8911242 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -489,6 +489,90 @@ void AMDGPUDAGToDAGISel::SelectBuildVector(SDNode *N, 
unsigned RegClassID) {
   CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(), RegSeqArgs);
 }
 
+void AMDGPUDAGToDAGISel::SelectVectorShuffle(SDNode *N) {
+  EVT VT = N->getValueType(0);
+  EVT EltVT = VT.getVectorElementType();
+
+  // TODO: Handle 16-bit element vectors with even aligned masks.
+  if (!Subtarget->hasPkMovB32() || !EltVT.bitsEq(MVT::i32) ||
+  VT.getVectorNumElements() != 2) {
+SelectCode(N);
+return;
+  }
+
+  auto *SVN = cast(N);
+
+  SDValue Src0 = SVN->getOperand(0);
+  SDValue Src1 = SVN->getOperand(1);
+  ArrayRef Mask = SVN->getMask();
+  SDLoc DL(N);
+
+  assert(Src0.getValueType().getVectorNumElements() == 2 && Mask.size() == 2 &&
+ Mask[0] < 4 && Mask[1] < 4);
+
+  SDValue VSrc0 = Mask[0] < 2 ? Src0 : Src1;
+  SDValue VSrc1 = Mask[1] < 2 ? Src0 : Src1;
+  unsigned Src0SubReg = Mask[0] & 1 ? AMDGPU::sub1 : AMDGPU::sub0;
+  unsigned Src1SubReg = Mask[1] & 1 ? AMDGPU::sub1 : AMDGPU::sub0;
+
+  if (Mask[0] < 0) {
+Src0SubReg = Src1SubReg;
+MachineSDNode *ImpDef =
+CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, VT);
+VSrc0 = SDValue(ImpDef, 0);
+  }
+
+  if (Mask[1] < 0) {
+Src1SubReg = Src0SubReg;
+MachineSDNode *ImpDef =
+CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, VT);
+VSrc1 = SDValue(ImpDef, 0);
+  }
+
+  // SGPR case needs to lower to copies.
+  //
+  // Also use subregister extract when we can directly blend the registers with
+  // a simple subregister copy.
+  //
+  // TODO: Maybe we should fold this out earlier
+  if (N->isDivergent() && Src0SubReg == AMDGPU::sub1 &&
+  Src1SubReg == AMDGPU::sub0) {
+// The low element of the result always comes from src0.
+// The high element of the result always comes from src1.
+// op_sel selects the high half of src0.
+// op_sel_hi selects the high half of src1.
+
+unsigned Src0OpSel =
+Src0SubReg == AMDGPU::sub1 ? SISrcMods::OP_SEL_0 : SISrcMods::NONE;
+unsigned Src1OpSel =
+Src1SubReg == AMDGPU::sub1 ? SISrcMods::OP_SEL_0 : SISrcMods::NONE;
+
+SDValue Src0OpSelVal = CurDAG->getTargetConstant(Src0OpSel, DL, MVT::i32);
+SDValue Src1OpSelVal = CurDAG->getTargetConstant(Src1OpSel, DL, MVT::i32);
+SDValue ZeroMods = CurDAG->getTargetConstant(0, DL, MVT::i32);
+
+CurDAG->SelectNodeTo(N, AMDGPU::V_PK_MOV_B32, N->getVTList(),
+ {Src0OpSelVal, VSrc0, Src1OpSelVal, VSrc1,
+  ZeroMods,   // clamp
+  ZeroMods,   // op_sel
+  ZeroMods,   // op_sel_hi
+  ZeroMods,   // neg_lo
+  ZeroMods}); // neg_hi
+return;
+  }
+
+  SDValue ResultElt0 =
+  CurDAG->getTargetExtractSubreg(Src0SubReg, DL, EltVT, VSrc0);
+  SDValue ResultElt1 =
+  CurDAG->getTargetExtractSubreg(Src1SubReg, DL, EltVT, VSrc1);
+
+  const SDValu

[llvm-branch-commits] [llvm] AMDGPU: Make vector_shuffle legal for v2i32 with v_pk_mov_b32 (PR #123684)

2025-01-20 Thread Matt Arsenault via llvm-branch-commits

arsenm wrote:

> [!WARNING]
> This pull request is not mergeable via GitHub because a downstack PR is 
> open. Once all requirements are satisfied, merge this PR as a stack  href="https://app.graphite.dev/github/pr/llvm/llvm-project/123684?utm_source=stack-comment-downstack-mergeability-warning";
>  >on Graphite.
> https://graphite.dev/docs/merge-pull-requests";>Learn more

* **#123684** https://app.graphite.dev/github/pr/llvm/llvm-project/123684?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/> 👈 https://app.graphite.dev/github/pr/llvm/llvm-project/123684?utm_source=stack-comment-view-in-graphite";
 target="_blank">(View in Graphite)
* **#123596** https://app.graphite.dev/github/pr/llvm/llvm-project/123596?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* `main`




This stack of pull requests is managed by https://graphite.dev?utm-source=stack-comment";>Graphite. Learn 
more about https://stacking.dev/?utm_source=stack-comment";>stacking.


https://github.com/llvm/llvm-project/pull/123684
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU: Make vector_shuffle legal for v2i32 with v_pk_mov_b32 (PR #123684)

2025-01-20 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm ready_for_review 
https://github.com/llvm/llvm-project/pull/123684
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [flang] [Flang] Optionally do not compile the runtime in-tree (PR #122336)

2025-01-20 Thread Michael Kruse via llvm-branch-commits

https://github.com/Meinersbur updated 
https://github.com/llvm/llvm-project/pull/122336

>From dd3ac2e6d8d8d57cd639c25bea3b8d5c99a2f81e Mon Sep 17 00:00:00 2001
From: Michael Kruse 
Date: Thu, 9 Jan 2025 15:58:48 +0100
Subject: [PATCH 1/9] Introduce FLANG_INCLUDE_RUNTIME

---
 flang/CMakeLists.txt|  7 +++-
 flang/test/CMakeLists.txt   |  6 +++-
 flang/test/Driver/ctofortran.f90|  1 +
 flang/test/Driver/exec.f90  |  1 +
 flang/test/Runtime/no-cpp-dep.c |  2 +-
 flang/test/lit.cfg.py   |  5 ++-
 flang/test/lit.site.cfg.py.in   |  1 +
 flang/tools/f18/CMakeLists.txt  |  4 +--
 flang/unittests/CMakeLists.txt  |  6 ++--
 flang/unittests/Evaluate/CMakeLists.txt | 46 ++---
 10 files changed, 50 insertions(+), 29 deletions(-)

diff --git a/flang/CMakeLists.txt b/flang/CMakeLists.txt
index 68947eaa9c9bd7..69e963a43d0b97 100644
--- a/flang/CMakeLists.txt
+++ b/flang/CMakeLists.txt
@@ -233,6 +233,9 @@ else()
   include_directories(SYSTEM ${MLIR_TABLEGEN_OUTPUT_DIR})
 endif()
 
+option(FLANG_INCLUDE_RUNTIME "Build the runtime in-tree (deprecated; to be 
replaced with LLVM_ENABLE_RUNTIMES=flang-rt)" ON)
+pythonize_bool(FLANG_INCLUDE_RUNTIME)
+
 set(FLANG_TOOLS_INSTALL_DIR "${CMAKE_INSTALL_BINDIR}" CACHE PATH
 "Path for binary subdirectory (defaults to '${CMAKE_INSTALL_BINDIR}')")
 mark_as_advanced(FLANG_TOOLS_INSTALL_DIR)
@@ -473,7 +476,9 @@ if (FLANG_CUF_RUNTIME)
   find_package(CUDAToolkit REQUIRED)
 endif()
 
-add_subdirectory(runtime)
+if (FLANG_INCLUDE_RUNTIME)
+  add_subdirectory(runtime)
+endif ()
 
 if (LLVM_INCLUDE_EXAMPLES)
   add_subdirectory(examples)
diff --git a/flang/test/CMakeLists.txt b/flang/test/CMakeLists.txt
index cab214c2ef4c8c..e398e0786147aa 100644
--- a/flang/test/CMakeLists.txt
+++ b/flang/test/CMakeLists.txt
@@ -71,9 +71,13 @@ set(FLANG_TEST_DEPENDS
   llvm-objdump
   llvm-readobj
   split-file
-  FortranRuntime
   FortranDecimal
 )
+
+if (FLANG_INCLUDE_RUNTIME)
+  list(APPEND FLANG_TEST_DEPENDS FortranRuntime)
+endif ()
+
 if (LLVM_ENABLE_PLUGINS AND NOT WIN32)
   list(APPEND FLANG_TEST_DEPENDS Bye)
 endif()
diff --git a/flang/test/Driver/ctofortran.f90 b/flang/test/Driver/ctofortran.f90
index 78eac32133b18e..10c7adaccc9588 100644
--- a/flang/test/Driver/ctofortran.f90
+++ b/flang/test/Driver/ctofortran.f90
@@ -1,4 +1,5 @@
 ! UNSUPPORTED: system-windows
+! REQUIRES: flang-rt
 ! RUN: split-file %s %t
 ! RUN: chmod +x %t/runtest.sh
 ! RUN: %t/runtest.sh %t %t/ffile.f90 %t/cfile.c %flang | FileCheck %s
diff --git a/flang/test/Driver/exec.f90 b/flang/test/Driver/exec.f90
index fd174005ddf62a..9ca91ee24011c9 100644
--- a/flang/test/Driver/exec.f90
+++ b/flang/test/Driver/exec.f90
@@ -1,4 +1,5 @@
 ! UNSUPPORTED: system-windows
+! REQUIRES: flang-rt
 ! Verify that flang can correctly build executables.
 
 ! RUN: %flang %s -o %t
diff --git a/flang/test/Runtime/no-cpp-dep.c b/flang/test/Runtime/no-cpp-dep.c
index b1a5fa004014cc..7303ce63fdec41 100644
--- a/flang/test/Runtime/no-cpp-dep.c
+++ b/flang/test/Runtime/no-cpp-dep.c
@@ -3,7 +3,7 @@ This test makes sure that flang's runtime does not depend on 
the C++ runtime
 library. It tries to link this simple file against libFortranRuntime.a with
 a C compiler.
 
-REQUIRES: c-compiler
+REQUIRES: c-compiler, flang-rt
 
 RUN: %if system-aix %{ export OBJECT_MODE=64 %}
 RUN: %cc -std=c99 %s -I%include %libruntime -lm  \
diff --git a/flang/test/lit.cfg.py b/flang/test/lit.cfg.py
index c452b6d231c89f..78378bf5f413e8 100644
--- a/flang/test/lit.cfg.py
+++ b/flang/test/lit.cfg.py
@@ -163,10 +163,13 @@
 ToolSubst("%not_todo_abort_cmd", command=FindTool("not"), 
unresolved="fatal")
 )
 
+if config.flang_include_runtime:
+  config.available_features.add("flang-rt")
+
 # Define some variables to help us test that the flang runtime doesn't depend 
on
 # the C++ runtime libraries. For this we need a C compiler. If for some reason
 # we don't have one, we can just disable the test.
-if config.cc:
+if config.flang_include_runtime and config.cc:
 libruntime = os.path.join(config.flang_lib_dir, "libFortranRuntime.a")
 include = os.path.join(config.flang_src_dir, "include")
 
diff --git a/flang/test/lit.site.cfg.py.in b/flang/test/lit.site.cfg.py.in
index d1a0ac763cf8a0..19f9330f93ae14 100644
--- a/flang/test/lit.site.cfg.py.in
+++ b/flang/test/lit.site.cfg.py.in
@@ -32,6 +32,7 @@ else:
 config.openmp_module_dir = None
 config.flang_runtime_f128_math_lib = "@FLANG_RUNTIME_F128_MATH_LIB@"
 config.have_ldbl_mant_dig_113 = "@HAVE_LDBL_MANT_DIG_113@"
+config.flang_include_runtime = @FLANG_INCLUDE_RUNTIME@
 
 import lit.llvm
 lit.llvm.initialize(lit_config, config)
diff --git a/flang/tools/f18/CMakeLists.txt b/flang/tools/f18/CMakeLists.txt
index 4362fcf0537616..022c346aabdbde 100644
--- a/flang/tools/f18/CMakeLists.txt
+++ b/flang/tools/f18/CMakeLists.txt
@@ -72,7 +72,7 @@ if (NOT CMAKE_CROSSCOMPILING)
   set(depends ${FLANG_

[llvm-branch-commits] [flang] [Flang] Promote FortranEvaluateTesting library (PR #122334)

2025-01-20 Thread Michael Kruse via llvm-branch-commits


@@ -1,47 +1,34 @@
 set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
-add_library(FortranEvaluateTesting

Meinersbur wrote:

@jeanPerier I added the change to #122336: 
8732b2771bd422b939da120c5425b82798ee95f2

Thanks.

https://github.com/llvm/llvm-project/pull/122334
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [flang] [llvm] [Flang-RT] Build libflang_rt.so (PR #121782)

2025-01-20 Thread Michael Kruse via llvm-branch-commits


@@ -8,6 +8,12 @@
 
 macro(enable_cuda_compilation name files)
   if (FLANG_RT_EXPERIMENTAL_OFFLOAD_SUPPORT STREQUAL "CUDA")
+if (NOT FLANG_RT_ENABLE_STATIC)
+  message(FATAL_ERROR
+"FLANG_RT_ENABLE_STATIC is required for CUDA build of Flang-RT"
+)
+endif()
+
 enable_language(CUDA)
 
 set_target_properties(${name}

Meinersbur wrote:

Should be fixed in 0450959120e5ebae63b1e26b8232e7193c403099. Noticed myself 
when testing all the build configuratons. Unfortuntelty ALIAS targets do not 
solve everything, they cannot be used to modify a target.

https://github.com/llvm/llvm-project/pull/121782
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] ddb9cc5 - Revert "Revert "Revert "Revert "[Flang][Driver] Add a flag to control zero in…"

2025-01-20 Thread via llvm-branch-commits

Author: Kiran Chandramohan
Date: 2025-01-20T12:43:24Z
New Revision: ddb9cc530ec0667c3b4207d13bc6ec26f64241d4

URL: 
https://github.com/llvm/llvm-project/commit/ddb9cc530ec0667c3b4207d13bc6ec26f64241d4
DIFF: 
https://github.com/llvm/llvm-project/commit/ddb9cc530ec0667c3b4207d13bc6ec26f64241d4.diff

LOG: Revert "Revert "Revert "Revert "[Flang][Driver] Add a flag to control zero 
in…"

This reverts commit 8a229f595a5c0ff354cdfa05cda974a9d56674df.

Added: 
flang/test/Driver/fno-zero-init.f90
flang/test/Lower/zero_init.f90
flang/test/Lower/zero_init_default_init.f90

Modified: 
clang/include/clang/Driver/Options.td
clang/lib/Driver/ToolChains/Flang.cpp
flang/include/flang/Lower/LoweringOptions.def
flang/lib/Frontend/CompilerInvocation.cpp
flang/lib/Lower/ConvertVariable.cpp
flang/tools/bbc/bbc.cpp

Removed: 




diff  --git a/clang/include/clang/Driver/Options.td 
b/clang/include/clang/Driver/Options.td
index d38dd2b4e3cf09..c4b9743597bb2e 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -3505,6 +3505,11 @@ def fno_struct_path_tbaa : Flag<["-"], 
"fno-struct-path-tbaa">, Group;
 def fno_strict_enums : Flag<["-"], "fno-strict-enums">, Group;
 def fno_strict_overflow : Flag<["-"], "fno-strict-overflow">, Group,
   Visibility<[ClangOption, FlangOption]>;
+defm init_global_zero : BoolOptionWithoutMarshalling<"f", "init-global-zero",
+  PosFlag,
+  NegFlag>;
 def fno_pointer_tbaa : Flag<["-"], "fno-pointer-tbaa">, Group;
 def fno_temp_file : Flag<["-"], "fno-temp-file">, Group,
   Visibility<[ClangOption, CC1Option, CLOption, DXCOption]>, HelpText<

diff  --git a/clang/lib/Driver/ToolChains/Flang.cpp 
b/clang/lib/Driver/ToolChains/Flang.cpp
index 86ed25badfa2b7..9c1fd28a3a8a26 100644
--- a/clang/lib/Driver/ToolChains/Flang.cpp
+++ b/clang/lib/Driver/ToolChains/Flang.cpp
@@ -155,8 +155,10 @@ void Flang::addCodegenOptions(const ArgList &Args,
options::OPT_flang_deprecated_no_hlfir,
options::OPT_fno_ppc_native_vec_elem_order,
options::OPT_fppc_native_vec_elem_order,
-   options::OPT_ftime_report, options::OPT_ftime_report_EQ,
-   options::OPT_funroll_loops, options::OPT_fno_unroll_loops});
+   options::OPT_finit_global_zero,
+   options::OPT_fno_init_global_zero, 
options::OPT_ftime_report,
+   options::OPT_ftime_report_EQ, options::OPT_funroll_loops,
+   options::OPT_fno_unroll_loops});
 }
 
 void Flang::addPicOptions(const ArgList &Args, ArgStringList &CmdArgs) const {

diff  --git a/flang/include/flang/Lower/LoweringOptions.def 
b/flang/include/flang/Lower/LoweringOptions.def
index 5a6debfdffe030..396c91948be36b 100644
--- a/flang/include/flang/Lower/LoweringOptions.def
+++ b/flang/include/flang/Lower/LoweringOptions.def
@@ -44,5 +44,8 @@ ENUM_LOWERINGOPT(IntegerWrapAround, unsigned, 1, 0)
 /// If false, assume that the shapes/types/allocation-status match.
 ENUM_LOWERINGOPT(ReallocateLHS, unsigned, 1, 1)
 
+/// If true, initialize globals without initialization to zero.
+/// On by default.
+ENUM_LOWERINGOPT(InitGlobalZero, unsigned, 1, 1)
 #undef LOWERINGOPT
 #undef ENUM_LOWERINGOPT

diff  --git a/flang/lib/Frontend/CompilerInvocation.cpp 
b/flang/lib/Frontend/CompilerInvocation.cpp
index 15b1e1e0a24881..3c6da4687f65d3 100644
--- a/flang/lib/Frontend/CompilerInvocation.cpp
+++ b/flang/lib/Frontend/CompilerInvocation.cpp
@@ -1377,6 +1377,14 @@ bool CompilerInvocation::createFromArgs(
 invoc.loweringOpts.setNoPPCNativeVecElemOrder(true);
   }
 
+  // -f[no-]init-global-zero
+  if (args.hasFlag(clang::driver::options::OPT_finit_global_zero,
+   clang::driver::options::OPT_fno_init_global_zero,
+   /*default=*/true))
+invoc.loweringOpts.setInitGlobalZero(true);
+  else
+invoc.loweringOpts.setInitGlobalZero(false);
+
   // Preserve all the remark options requested, i.e. -Rpass, -Rpass-missed or
   // -Rpass-analysis. This will be used later when processing and outputting 
the
   // remarks generated by LLVM in ExecuteCompilerInvocation.cpp.

diff  --git a/flang/lib/Lower/ConvertVariable.cpp 
b/flang/lib/Lower/ConvertVariable.cpp
index 9ee42d5cd88002..87236dc293ebbc 100644
--- a/flang/lib/Lower/ConvertVariable.cpp
+++ b/flang/lib/Lower/ConvertVariable.cpp
@@ -635,7 +635,11 @@ static fir::GlobalOp 
defineGlobal(Fortran::lower::AbstractConverter &converter,
   global.setLinkName(builder.createCommonLinkage());
 Fortran::lower::createGlobalInitialization(
 builder, global, [&](fir::FirOpBuilder &builder) {
-  mlir::Value initValue = builder.create(loc, symTy);
+  mlir::Value initValue;
+  if (converter.getLoweringOptions().getInitGlobalZero())
+initValue = builder.create(loc, symTy);
+  else
+ 

[llvm-branch-commits] [clang] [Driver] Change linker job in Baremetal toolchain object accomodate GCCInstallation.(2/3) (PR #121830)

2025-01-20 Thread Garvit Gupta via llvm-branch-commits

https://github.com/quic-garvgupt updated 
https://github.com/llvm/llvm-project/pull/121830

>From 8fa748d05c4f3464427df5cc117c196a5006e5a9 Mon Sep 17 00:00:00 2001
From: Garvit Gupta 
Date: Mon, 6 Jan 2025 09:21:11 -0800
Subject: [PATCH] [RISCV] Change linker job in Baremetal toolchain object to
 accomodate valid GCCInstallation.(2/3)

This patch adds the defaults for CXXSstdlib type and other runtime libs.
Additionally, this patch also modifes the linker job and extend it to support
valid GCCInstallation.

This PR preserves the behavior of both toolchain objects in the linker job with
the only new change being that the presence of `--gcc-toolchain` or
`--gcc-install-dir` will imply that GNU linker is the default linker unless
otherwise a differnt linker is passed through `-fuse-ld` flag.

This is the second PR in the series of 3 PRs for merging and extending Baremetal
toolchain object. The division of the PRs is as follows:
- Teach Baremetal toolchain about GCC installation and make sysroot and
  assembler
  related changes.
- Changes related to linker job and defaults for CXXStdlib and other runtime
  libs.
- Finally removing the call to RISCVToolchain object.

RFC:
https://discourse.llvm.org/t/merging-riscvtoolchain-and-baremetal-toolchains/75524

Change-Id: I8fdb3490a3888001b1bb999e7ee8df90a187d18d
---
 clang/lib/Driver/ToolChains/BareMetal.cpp   | 110 +++-
 clang/lib/Driver/ToolChains/BareMetal.h |  20 ++--
 clang/test/Driver/aarch64-toolchain-extra.c |   6 ++
 clang/test/Driver/aarch64-toolchain.c   |  73 +
 clang/test/Driver/arm-toolchain-extra.c |   7 +-
 clang/test/Driver/arm-toolchain.c   |  77 +-
 clang/test/Driver/baremetal-multilib.yaml   |   2 +-
 clang/test/Driver/baremetal-sysroot.cpp |   2 +-
 clang/test/Driver/baremetal.cpp |  77 +++---
 clang/test/Driver/sanitizer-ld.c|   2 +-
 10 files changed, 298 insertions(+), 78 deletions(-)

diff --git a/clang/lib/Driver/ToolChains/BareMetal.cpp 
b/clang/lib/Driver/ToolChains/BareMetal.cpp
index 7b0f2bc2fd3895..38de25e20ea8df 100644
--- a/clang/lib/Driver/ToolChains/BareMetal.cpp
+++ b/clang/lib/Driver/ToolChains/BareMetal.cpp
@@ -172,6 +172,8 @@ BareMetal::BareMetal(const Driver &D, const llvm::Triple 
&Triple,
 : Generic_ELF(D, Triple, Args) {
   GCCInstallation.init(Triple, Args);
   SysRoot = computeSysRoot();
+  UseLD =
+  Args.getLastArgValue(options::OPT_fuse_ld_EQ).equals_insensitive("ld");
   if (GCCInstallation.isValid()) {
 Multilibs = GCCInstallation.getMultilibs();
 SelectedMultilibs.assign({GCCInstallation.getMultilib()});
@@ -342,6 +344,32 @@ BareMetal::OrderedMultilibs 
BareMetal::getOrderedMultilibs() const {
   return llvm::reverse(Default);
 }
 
+ToolChain::CXXStdlibType BareMetal::GetDefaultCXXStdlibType() const {
+  if (getTriple().isRISCV() && GCCInstallation.isValid())
+return ToolChain::CST_Libstdcxx;
+  return ToolChain::CST_Libcxx;
+}
+
+ToolChain::RuntimeLibType BareMetal::GetDefaultRuntimeLibType() const {
+  if (getTriple().isRISCV() && GCCInstallation.isValid())
+return ToolChain::RLT_Libgcc;
+  return ToolChain::RLT_CompilerRT;
+}
+
+ToolChain::UnwindLibType
+BareMetal::GetUnwindLibType(const llvm::opt::ArgList &Args) const {
+  if (getTriple().isRISCV())
+return ToolChain::UNW_None;
+
+  return ToolChain::GetUnwindLibType(Args);
+}
+
+const char *BareMetal::getDefaultLinker() const {
+  if (isUsingLD())
+return "ld";
+  return "ld.lld";
+}
+
 void BareMetal::AddClangSystemIncludeArgs(const ArgList &DriverArgs,
   ArgStringList &CC1Args) const {
   if (DriverArgs.hasArg(options::OPT_nostdinc))
@@ -535,12 +563,21 @@ void baremetal::Linker::ConstructJob(Compilation &C, 
const JobAction &JA,
   const llvm::Triple::ArchType Arch = TC.getArch();
   const llvm::Triple &Triple = getToolChain().getEffectiveTriple();
 
-  AddLinkerInputs(TC, Inputs, Args, CmdArgs, JA);
+  if (!D.SysRoot.empty())
+CmdArgs.push_back(Args.MakeArgString("--sysroot=" + D.SysRoot));
 
   CmdArgs.push_back("-Bstatic");
 
-  if (TC.getTriple().isRISCV() && Args.hasArg(options::OPT_mno_relax))
-CmdArgs.push_back("--no-relax");
+  if (Triple.isRISCV()) {
+if (Args.hasArg(options::OPT_mno_relax))
+  CmdArgs.push_back("--no-relax");
+if (TC.isUsingLD()) {
+  CmdArgs.push_back("-m");
+  CmdArgs.push_back(Arch == llvm::Triple::riscv64 ? "elf64lriscv"
+  : "elf32lriscv");
+}
+CmdArgs.push_back("-X");
+  }
 
   if (Triple.isARM() || Triple.isThumb()) {
 bool IsBigEndian = arm::isARMBigEndian(Triple, Args);
@@ -551,19 +588,54 @@ void baremetal::Linker::ConstructJob(Compilation &C, 
const JobAction &JA,
 CmdArgs.push_back(Arch == llvm::Triple::aarch64_be ? "-EB" : "-EL");
   }
 
-  if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles,
-   options::OPT_r)) {
-CmdArgs.pu

[llvm-branch-commits] [clang] [Driver][RISCV] Integrate RISCV target in baremetal toolchain object and deprecate RISCVToolchain object.(3/3) (PR #121831)

2025-01-20 Thread Garvit Gupta via llvm-branch-commits

https://github.com/quic-garvgupt updated 
https://github.com/llvm/llvm-project/pull/121831

>From 87abf1b761051e3f2cd22d64ac8104bb6a5412ab Mon Sep 17 00:00:00 2001
From: Garvit Gupta 
Date: Mon, 6 Jan 2025 10:05:08 -0800
Subject: [PATCH] [RISCV] Integrate RISCV target in baremetal toolchain object
 and deprecate RISCVToolchain object.(3/3)

This PR is last in the series of merging RISCVToolchain object into BareMetal
toolchain object. This PR make change to call BareMetal toolchain object for
riscv targets as well and remove RISCVToolChain object.

The division of the PRs is as follows:
- Teach Baremetal toolchain about GCC installation and make sysroot and
  assembler
  related changes.
- Changes related to linker job and defaults for CXXStdlib and other runtime
  libs.
- Finally removing the call to RISCVToolchain object.

RFC:
https://discourse.llvm.org/t/merging-riscvtoolchain-and-baremetal-toolchains/75524

Change-Id: I2877ac328f55a7638cc185d6034866cbd2ac4203
---
 clang/lib/Driver/CMakeLists.txt |  1 -
 clang/lib/Driver/Driver.cpp |  7 +-
 clang/test/Driver/riscv-args.c  |  2 +-
 clang/test/Driver/riscv32-toolchain-extra.c |  7 +++---
 clang/test/Driver/riscv32-toolchain.c   | 26 ++---
 clang/test/Driver/riscv64-toolchain-extra.c |  7 +++---
 clang/test/Driver/riscv64-toolchain.c   | 20 
 7 files changed, 31 insertions(+), 39 deletions(-)

diff --git a/clang/lib/Driver/CMakeLists.txt b/clang/lib/Driver/CMakeLists.txt
index 5bdb6614389cff..eee29af5d181a1 100644
--- a/clang/lib/Driver/CMakeLists.txt
+++ b/clang/lib/Driver/CMakeLists.txt
@@ -74,7 +74,6 @@ add_clang_library(clangDriver
   ToolChains/OHOS.cpp
   ToolChains/OpenBSD.cpp
   ToolChains/PS4CPU.cpp
-  ToolChains/RISCVToolchain.cpp
   ToolChains/Solaris.cpp
   ToolChains/SPIRV.cpp
   ToolChains/SPIRVOpenMP.cpp
diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp
index 36d6c93c43321f..4dda9a34b08d99 100644
--- a/clang/lib/Driver/Driver.cpp
+++ b/clang/lib/Driver/Driver.cpp
@@ -41,7 +41,6 @@
 #include "ToolChains/PPCFreeBSD.h"
 #include "ToolChains/PPCLinux.h"
 #include "ToolChains/PS4CPU.h"
-#include "ToolChains/RISCVToolchain.h"
 #include "ToolChains/SPIRV.h"
 #include "ToolChains/SPIRVOpenMP.h"
 #include "ToolChains/SYCL.h"
@@ -6665,11 +6664,7 @@ const ToolChain &Driver::getToolChain(const ArgList 
&Args,
 break;
   case llvm::Triple::riscv32:
   case llvm::Triple::riscv64:
-if (toolchains::RISCVToolChain::hasGCCToolchain(*this, Args))
-  TC =
-  std::make_unique(*this, Target, 
Args);
-else
-  TC = std::make_unique(*this, Target, Args);
+TC = std::make_unique(*this, Target, Args);
 break;
   case llvm::Triple::ve:
 TC = std::make_unique(*this, Target, Args);
diff --git a/clang/test/Driver/riscv-args.c b/clang/test/Driver/riscv-args.c
index cab08e5b0f811e..fc35407baf2cc6 100644
--- a/clang/test/Driver/riscv-args.c
+++ b/clang/test/Driver/riscv-args.c
@@ -3,4 +3,4 @@
 // Make sure -T is the last with gcc-toolchain option
 // RUN: %clang -### --target=riscv32 --gcc-toolchain= -Xlinker --defsym=FOO=10 
-T a.lds -u foo %s 2>&1 \
 // RUN:   | FileCheck -check-prefix=CHECK-LD %s
-// CHECK-LD: {{.*}} "--defsym=FOO=10" {{.*}} "-u" "foo" {{.*}} "-T" "a.lds"
+// CHECK-LD: {{.*}} "-T" "a.lds" "-u" "foo" {{.*}} "--defsym=FOO=10"
diff --git a/clang/test/Driver/riscv32-toolchain-extra.c 
b/clang/test/Driver/riscv32-toolchain-extra.c
index cbb3c23ebb3421..420f7b52036090 100644
--- a/clang/test/Driver/riscv32-toolchain-extra.c
+++ b/clang/test/Driver/riscv32-toolchain-extra.c
@@ -18,12 +18,12 @@
 // RUN: ln -s %S/Inputs/basic_riscv32_nogcc_tree/riscv32-unknown-elf 
%t/riscv32-nogcc/riscv32-unknown-elf
 // RUN: %t/riscv32-nogcc/bin/clang %s -### -no-canonical-prefixes \
 // RUN:--gcc-toolchain=%t/riscv32-nogcc/invalid \
-// RUN:--target=riscv32-unknown-elf --rtlib=platform -fuse-ld= 2>&1 \
+// RUN:--target=riscv32-unknown-elf --rtlib=platform -fuse-ld=ld 2>&1 \
 // RUN:| FileCheck -check-prefix=C-RV32-BAREMETAL-ILP32-NOGCC %s
 
 // RUN: %t/riscv32-nogcc/bin/clang %s -### -no-canonical-prefixes \
 // RUN:--sysroot=%t/riscv32-nogcc/bin/../riscv32-unknown-elf \
-// RUN:--target=riscv32-unknown-elf --rtlib=platform -fuse-ld= 2>&1 \
+// RUN:--target=riscv32-unknown-elf --rtlib=platform -fuse-ld=ld 2>&1 \
 // RUN:| FileCheck -check-prefix=C-RV32-BAREMETAL-ILP32-NOGCC %s
 
 // C-RV32-BAREMETAL-ILP32-NOGCC: "-internal-isystem" 
"{{.*}}/riscv32-nogcc/bin/../riscv32-unknown-elf/include"
@@ -31,6 +31,5 @@
 // C-RV32-BAREMETAL-ILP32-NOGCC: 
"{{.*}}/riscv32-nogcc/bin/../riscv32-unknown-elf/lib/crt0.o"
 // C-RV32-BAREMETAL-ILP32-NOGCC: 
"{{.*}}/riscv32-nogcc/{{.*}}/riscv32-unknown-unknown-elf/clang_rt.crtbegin.o"
 // C-RV32-BAREMETAL-ILP32-NOGCC: 
"{{.*}}/riscv32-nogcc/bin/../riscv32-unknown-elf/lib"
-// C-RV32-BAREMETAL-ILP32-NOGCC: "--start-group" "-lc" "

[llvm-branch-commits] [lld] [lld][LoongArch] Relax TLS LE/GD/LD. (PR #123600)

2025-01-20 Thread Zhaoxin Yang via llvm-branch-commits

https://github.com/ylzsx created 
https://github.com/llvm/llvm-project/pull/123600

In local-exec form, the code sequence is converted as follows:
```
From:
  lu12i.w $rd, %le_hi20_r(sym)
R_LARCH_TLS_LE_HI20_R, R_LARCH_RELAX
  add.w/d $rd, $rd, $tp, %le_add_r(sym)
R_LARCH_TLS_LE_ADD_R, R_LARCH_RELAX
  addi/ld/st.w/d $rd, $rd, %le_lo12_r(sym)
R_LARCH_TLS_LE_LO12_R, R_LARCH_RELAX
To:
  addi/ld/st.w/d $rd, $tp, %le_lo12_r(sym)
R_LARCH_TLS_LE_LO12_R
``` 

In global-dynamic or local-dynamic, the code sequence is converted as follows:
```
From:
  pcalau12i $a0, %ld_pc_hi20(sym)  | %gd_pc_hi20(sym)
R_LARCH_TLS_GD_PC_HI20 | R_LARCH_TLS_LD_PC_HI20, R_LARCH_RELAX
  addi.w/d $a0, $a0, %got_pc_lo12(sym) | %got_pc_lo12(sym)
R_LARCH_GOT_PC_LO12, R_LARCH_RELAX
To:
  pcaddi$a0, %got_pc_lo12(sym) | %got_pc_lo12(sym)
R_LARCH_TLS_GD_PCREL20_S2 | R_LARCH_TLS_LD_PCREL20_S2
``` 
Note: For initial-exec form, since it involves the conversion from IE to LE, we 
will implement it in a future patch.

>From 7993434e2973437b010034051003f8c03d8eff71 Mon Sep 17 00:00:00 2001
From: yangzhaoxin 
Date: Fri, 27 Dec 2024 19:29:32 +0800
Subject: [PATCH 1/5] Relax TLS LE/GD/LD.

In local-exec form, the code sequence is converted as follows:
```
From:
lu12i.w $rd, %le_hi20_r(sym)
  R_LARCH_TLS_LE_HI20_R, R_LARCH_RELAX
add.w/d $rd, $rd, $tp, %le_add_r(sym)
  R_LARCH_TLS_LE_ADD_R, R_LARCH_RELAX
addi/ld/st.w/d $rd, $rd, %le_lo12_r(sym)
  R_LARCH_TLS_LE_LO12_R, R_LARCH_RELAX
To:
addi/ld/st.w/d $rd, $tp, %le_lo12_r(sym)
  R_LARCH_TLS_LE_LO12_R
```

In global-dynamic or local-dynamic, the code sequence is converted as
follows:
```
From:
pcalau12i $a0, %ld_pc_hi20(sym)  | %gd_pc_hi20(sym)
  R_LARCH_TLS_GD_PC_HI20 | R_LARCH_TLS_LD_PC_HI20, R_LARCH_RELAX
addi.w/d $a0, $a0, %got_pc_lo12(sym) | %got_pc_lo12(sym)
  R_LARCH_GOT_PC_LO12, R_LARCH_RELAX
To:
pcaddi$a0, %got_pc_lo12(sym) | %got_pc_lo12(sym)
  R_LARCH_TLS_GD_PCREL20_S2 | R_LARCH_TLS_LD_PCREL20_S2
```

Note: For initial-exec form, since it involves the conversion from IE to
LE, we will implement it in a future patch.
---
 lld/ELF/Arch/LoongArch.cpp | 68 +++---
 1 file changed, 64 insertions(+), 4 deletions(-)

diff --git a/lld/ELF/Arch/LoongArch.cpp b/lld/ELF/Arch/LoongArch.cpp
index 0aa0cf5b657a0f..2d6d86d2ca63b2 100644
--- a/lld/ELF/Arch/LoongArch.cpp
+++ b/lld/ELF/Arch/LoongArch.cpp
@@ -761,10 +761,10 @@ static bool isPairRelaxable(ArrayRef relocs, 
size_t i) {
 
 // Relax code sequence.
 // From:
-//   pcalau12i $a0, %pc_hi20(sym)
-//   addi.w/d $a0, $a0, %pc_lo12(sym)
+//   pcalau12i $a0, %pc_hi20(sym) | %ld_pc_hi20(sym)  | %gd_pc_hi20(sym)
+//   addi.w/d $a0, $a0, %pc_lo12(sym) | %got_pc_lo12(sym) | %got_pc_lo12(sym)
 // To:
-//   pcaddi $a0, %pc_lo12(sym)
+//   pcaddi $a0, %pc_lo12(sym) | %got_pc_lo12(sym) | %got_pc_lo12(sym)
 //
 // From:
 //   pcalau12i $a0, %got_pc_hi20(sym_got)
@@ -778,6 +778,10 @@ static void relaxPCHi20Lo12(Ctx &ctx, const InputSection 
&sec, size_t i,
   if (!((rHi20.type == R_LARCH_PCALA_HI20 &&
  rLo12.type == R_LARCH_PCALA_LO12) ||
 (rHi20.type == R_LARCH_GOT_PC_HI20 &&
+ rLo12.type == R_LARCH_GOT_PC_LO12) ||
+(rHi20.type == R_LARCH_TLS_GD_PC_HI20 &&
+ rLo12.type == R_LARCH_GOT_PC_LO12) ||
+(rHi20.type == R_LARCH_TLS_LD_PC_HI20 &&
  rLo12.type == R_LARCH_GOT_PC_LO12)))
 return;
 
@@ -798,6 +802,8 @@ static void relaxPCHi20Lo12(Ctx &ctx, const InputSection 
&sec, size_t i,
   else if (rHi20.expr == RE_LOONGARCH_PAGE_PC ||
rHi20.expr == RE_LOONGARCH_GOT_PAGE_PC)
 symBase = rHi20.sym->getVA(ctx);
+  else if (rHi20.expr == RE_LOONGARCH_TLSGD_PAGE_PC)
+symBase = ctx.in.got->getGlobalDynAddr(*rHi20.sym);
   else {
 Err(ctx) << getErrorLoc(ctx, (const uint8_t *)loc) << "unknown expr ("
  << rHi20.expr << ") against symbol " << rHi20.sym
@@ -827,7 +833,12 @@ static void relaxPCHi20Lo12(Ctx &ctx, const InputSection 
&sec, size_t i,
 return;
 
   sec.relaxAux->relocTypes[i] = R_LARCH_RELAX;
-  sec.relaxAux->relocTypes[i + 2] = R_LARCH_PCREL20_S2;
+  if (rHi20.type == R_LARCH_TLS_GD_PC_HI20)
+sec.relaxAux->relocTypes[i + 2] = R_LARCH_TLS_GD_PCREL20_S2;
+  else if (rHi20.type == R_LARCH_TLS_LD_PC_HI20)
+sec.relaxAux->relocTypes[i + 2] = R_LARCH_TLS_LD_PCREL20_S2;
+  else
+sec.relaxAux->relocTypes[i + 2] = R_LARCH_PCREL20_S2;
   sec.relaxAux->writes.push_back(insn(PCADDI, getD5(nextInsn), 0, 0));
   remove = 4;
 }
@@ -863,6 +874,35 @@ static void relaxCall36(Ctx &ctx, const InputSection &sec, 
size_t i,
   }
 }
 
+// Relax code sequence.
+// From:
+//   lu12i.w $rd, %le_hi20_r(sym)
+//   add.w/d $rd, $rd, $tp, %le_add_r(sym)
+//   addi/ld/st.w/d $rd, $rd, %le_lo12_r(sym)
+// To:
+//   addi/ld/st.w/d $rd, $tp, %le_lo12_r(sym)
+static void relaxTlsLe(Ctx &ctx, const InputSection &sec, size_t i,
+   uint64_t loc, Relocation &r, uint32_t &remove) {
+  uint64_t val

[llvm-branch-commits] [lld] [lld][LoongArch] Relax TLS LE/GD/LD. (PR #123600)

2025-01-20 Thread via llvm-branch-commits

llvmbot wrote:



@llvm/pr-subscribers-lld-elf

@llvm/pr-subscribers-lld

Author: Zhaoxin Yang (ylzsx)


Changes

In local-exec form, the code sequence is converted as follows:
```
From:
  lu12i.w $rd, %le_hi20_r(sym)
R_LARCH_TLS_LE_HI20_R, R_LARCH_RELAX
  add.w/d $rd, $rd, $tp, %le_add_r(sym)
R_LARCH_TLS_LE_ADD_R, R_LARCH_RELAX
  addi/ld/st.w/d $rd, $rd, %le_lo12_r(sym)
R_LARCH_TLS_LE_LO12_R, R_LARCH_RELAX
To:
  addi/ld/st.w/d $rd, $tp, %le_lo12_r(sym)
R_LARCH_TLS_LE_LO12_R
``` 

In global-dynamic or local-dynamic, the code sequence is converted as follows:
```
From:
  pcalau12i $a0, %ld_pc_hi20(sym)  | %gd_pc_hi20(sym)
R_LARCH_TLS_GD_PC_HI20 | R_LARCH_TLS_LD_PC_HI20, R_LARCH_RELAX
  addi.w/d $a0, $a0, %got_pc_lo12(sym) | %got_pc_lo12(sym)
R_LARCH_GOT_PC_LO12, R_LARCH_RELAX
To:
  pcaddi$a0, %got_pc_lo12(sym) | %got_pc_lo12(sym)
R_LARCH_TLS_GD_PCREL20_S2 | R_LARCH_TLS_LD_PCREL20_S2
``` 
Note: For initial-exec form, since it involves the conversion from IE to LE, we 
will implement it in a future patch.

---

Patch is 24.67 KiB, truncated to 20.00 KiB below, full version: 
https://github.com/llvm/llvm-project/pull/123600.diff


6 Files Affected:

- (modified) lld/ELF/Arch/LoongArch.cpp (+64-4) 
- (modified) lld/test/ELF/loongarch-relax-emit-relocs.s (+107-5) 
- (added) lld/test/ELF/loongarch-relax-tls-le.s (+115) 
- (modified) lld/test/ELF/loongarch-tls-gd.s (+41-2) 
- (modified) lld/test/ELF/loongarch-tls-ld.s (+38-2) 
- (modified) lld/test/ELF/loongarch-tls-le.s (+16) 


``diff
diff --git a/lld/ELF/Arch/LoongArch.cpp b/lld/ELF/Arch/LoongArch.cpp
index 0aa0cf5b657a0f..2d6d86d2ca63b2 100644
--- a/lld/ELF/Arch/LoongArch.cpp
+++ b/lld/ELF/Arch/LoongArch.cpp
@@ -761,10 +761,10 @@ static bool isPairRelaxable(ArrayRef relocs, 
size_t i) {
 
 // Relax code sequence.
 // From:
-//   pcalau12i $a0, %pc_hi20(sym)
-//   addi.w/d $a0, $a0, %pc_lo12(sym)
+//   pcalau12i $a0, %pc_hi20(sym) | %ld_pc_hi20(sym)  | %gd_pc_hi20(sym)
+//   addi.w/d $a0, $a0, %pc_lo12(sym) | %got_pc_lo12(sym) | %got_pc_lo12(sym)
 // To:
-//   pcaddi $a0, %pc_lo12(sym)
+//   pcaddi $a0, %pc_lo12(sym) | %got_pc_lo12(sym) | %got_pc_lo12(sym)
 //
 // From:
 //   pcalau12i $a0, %got_pc_hi20(sym_got)
@@ -778,6 +778,10 @@ static void relaxPCHi20Lo12(Ctx &ctx, const InputSection 
&sec, size_t i,
   if (!((rHi20.type == R_LARCH_PCALA_HI20 &&
  rLo12.type == R_LARCH_PCALA_LO12) ||
 (rHi20.type == R_LARCH_GOT_PC_HI20 &&
+ rLo12.type == R_LARCH_GOT_PC_LO12) ||
+(rHi20.type == R_LARCH_TLS_GD_PC_HI20 &&
+ rLo12.type == R_LARCH_GOT_PC_LO12) ||
+(rHi20.type == R_LARCH_TLS_LD_PC_HI20 &&
  rLo12.type == R_LARCH_GOT_PC_LO12)))
 return;
 
@@ -798,6 +802,8 @@ static void relaxPCHi20Lo12(Ctx &ctx, const InputSection 
&sec, size_t i,
   else if (rHi20.expr == RE_LOONGARCH_PAGE_PC ||
rHi20.expr == RE_LOONGARCH_GOT_PAGE_PC)
 symBase = rHi20.sym->getVA(ctx);
+  else if (rHi20.expr == RE_LOONGARCH_TLSGD_PAGE_PC)
+symBase = ctx.in.got->getGlobalDynAddr(*rHi20.sym);
   else {
 Err(ctx) << getErrorLoc(ctx, (const uint8_t *)loc) << "unknown expr ("
  << rHi20.expr << ") against symbol " << rHi20.sym
@@ -827,7 +833,12 @@ static void relaxPCHi20Lo12(Ctx &ctx, const InputSection 
&sec, size_t i,
 return;
 
   sec.relaxAux->relocTypes[i] = R_LARCH_RELAX;
-  sec.relaxAux->relocTypes[i + 2] = R_LARCH_PCREL20_S2;
+  if (rHi20.type == R_LARCH_TLS_GD_PC_HI20)
+sec.relaxAux->relocTypes[i + 2] = R_LARCH_TLS_GD_PCREL20_S2;
+  else if (rHi20.type == R_LARCH_TLS_LD_PC_HI20)
+sec.relaxAux->relocTypes[i + 2] = R_LARCH_TLS_LD_PCREL20_S2;
+  else
+sec.relaxAux->relocTypes[i + 2] = R_LARCH_PCREL20_S2;
   sec.relaxAux->writes.push_back(insn(PCADDI, getD5(nextInsn), 0, 0));
   remove = 4;
 }
@@ -863,6 +874,35 @@ static void relaxCall36(Ctx &ctx, const InputSection &sec, 
size_t i,
   }
 }
 
+// Relax code sequence.
+// From:
+//   lu12i.w $rd, %le_hi20_r(sym)
+//   add.w/d $rd, $rd, $tp, %le_add_r(sym)
+//   addi/ld/st.w/d $rd, $rd, %le_lo12_r(sym)
+// To:
+//   addi/ld/st.w/d $rd, $tp, %le_lo12_r(sym)
+static void relaxTlsLe(Ctx &ctx, const InputSection &sec, size_t i,
+   uint64_t loc, Relocation &r, uint32_t &remove) {
+  uint64_t val = r.sym->getVA(ctx, r.addend);
+  // Check if the val exceeds the range of addi/ld/st.
+  if (!isInt<12>(val))
+return;
+  uint32_t currInsn = read32le(sec.content().data() + r.offset);
+  switch (r.type) {
+  case R_LARCH_TLS_LE_HI20_R:
+  case R_LARCH_TLS_LE_ADD_R:
+sec.relaxAux->relocTypes[i] = R_LARCH_RELAX;
+remove = 4;
+break;
+  case R_LARCH_TLS_LE_LO12_R:
+currInsn =
+insn(extractBits(currInsn, 31, 22) << 22, getD5(currInsn), R_TP, 0);
+sec.relaxAux->writes.push_back(currInsn);
+sec.relaxAux->relocTypes[i] = R_LARCH_TLS_LE_LO12_R;
+break;
+  }
+}
+
 static bool relax(Ctx &ctx, InputSection &sec) {
   const uint64_t secAddr = sec.get

[llvm-branch-commits] [clang] [Driver] Change linker job in Baremetal toolchain object accomodate GCCInstallation.(2/3) (PR #121830)

2025-01-20 Thread Garvit Gupta via llvm-branch-commits


@@ -36,6 +36,7 @@ class LLVM_LIBRARY_VISIBILITY BareMetal : public Generic_ELF {
   Tool *buildStaticLibTool() const override;
 
 public:
+  virtual bool isUsingLD() const { return UseLD || GCCInstallation.isValid(); }

quic-garvgupt wrote:

There was a request in the original PR that there is some downstream toolchain 
that inherits from Baremetal toolchain and defaults to LLD. Therefore I made 
this virtual so that it cab be overridden however, in the recent patchset I 
have removed it since there is no need for it in community and can easily be 
maintained downstream.

https://github.com/llvm/llvm-project/pull/121830
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [flang] [Flang] Introduce FortranSupport (PR #122069)

2025-01-20 Thread Michael Kruse via llvm-branch-commits

Meinersbur wrote:

`FortranSupport` has just been introduced in #122894.

https://github.com/llvm/llvm-project/pull/122069
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [lld] [lld][LoongArch] Relax call36/tail36: R_LARCH_CALL36 (PR #123576)

2025-01-20 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-lld

Author: Zhaoxin Yang (ylzsx)


Changes

Instructions with relocation `R_LARCH_CALL36` may be relax as follows:
```
From:
   pcaddu18i $dest, %call36(foo)
 R_LARCH_CALL36, R_LARCH_RELAX
   jirl $r, $dest, 0
To:
   b/bl foo  # bl if r=$ra, b if r=$zero
 R_LARCH_B26
```

---
Full diff: https://github.com/llvm/llvm-project/pull/123576.diff


4 Files Affected:

- (modified) lld/ELF/Arch/LoongArch.cpp (+41) 
- (added) lld/test/ELF/loongarch-relax-call36-2.s (+63) 
- (added) lld/test/ELF/loongarch-relax-call36.s (+135) 
- (added) lld/test/ELF/loongarch-relax-emit-relocs-2.s (+61) 


``diff
diff --git a/lld/ELF/Arch/LoongArch.cpp b/lld/ELF/Arch/LoongArch.cpp
index b999e7fd27ae9d..0aa0cf5b657a0f 100644
--- a/lld/ELF/Arch/LoongArch.cpp
+++ b/lld/ELF/Arch/LoongArch.cpp
@@ -58,6 +58,8 @@ enum Op {
   LD_W = 0x2880,
   LD_D = 0x28c0,
   JIRL = 0x4c00,
+  B = 0x5000,
+  BL = 0x5400,
 };
 
 enum Reg {
@@ -830,6 +832,37 @@ static void relaxPCHi20Lo12(Ctx &ctx, const InputSection 
&sec, size_t i,
   remove = 4;
 }
 
+// Relax code sequence.
+// From:
+//   pcaddu18i $ra, %call36(foo)
+//   jirl $ra, $ra, 0
+// To:
+//   b/bl foo
+static void relaxCall36(Ctx &ctx, const InputSection &sec, size_t i,
+uint64_t loc, Relocation &r, uint32_t &remove) {
+  const uint64_t symLocal =
+  (r.expr == R_PLT_PC ? r.sym->getPltVA(ctx) : r.sym->getVA(ctx)) +
+  r.addend;
+
+  const int64_t distance = symLocal - loc;
+  // Check if the distance aligns 4 bytes or exceeds the range of b[l].
+  if ((distance & 0x3) != 0 || !isInt<28>(distance))
+return;
+
+  const uint32_t nextInsn = read32le(sec.content().data() + r.offset + 4);
+  if (getD5(nextInsn) == R_RA) {
+// convert jirl to bl
+sec.relaxAux->relocTypes[i] = R_LARCH_B26;
+sec.relaxAux->writes.push_back(insn(BL, 0, 0, 0));
+remove = 4;
+  } else if (getD5(nextInsn) == R_ZERO) {
+// convert jirl to b
+sec.relaxAux->relocTypes[i] = R_LARCH_B26;
+sec.relaxAux->writes.push_back(insn(B, 0, 0, 0));
+remove = 4;
+  }
+}
+
 static bool relax(Ctx &ctx, InputSection &sec) {
   const uint64_t secAddr = sec.getVA();
   const MutableArrayRef relocs = sec.relocs();
@@ -874,6 +907,10 @@ static bool relax(Ctx &ctx, InputSection &sec) {
   if (isPairRelaxable(relocs, i))
 relaxPCHi20Lo12(ctx, sec, i, loc, r, relocs[i + 2], remove);
   break;
+case R_LARCH_CALL36:
+  if (relaxable(relocs, i))
+relaxCall36(ctx, sec, i, loc, r, remove);
+  break;
 }
 
 // For all anchors whose offsets are <= r.offset, they are preceded by
@@ -977,6 +1014,10 @@ void LoongArch::finalizeRelax(int passes) const {
 // RelExpr is needed for relocating.
 r.expr = r.sym->hasFlag(NEEDS_PLT) ? R_PLT_PC : R_PC;
 break;
+  case R_LARCH_B26:
+skip = 4;
+write32le(p, aux.writes[writesIdx++]);
+break;
   default:
 llvm_unreachable("unsupported type");
   }
diff --git a/lld/test/ELF/loongarch-relax-call36-2.s 
b/lld/test/ELF/loongarch-relax-call36-2.s
new file mode 100644
index 00..71650aefe94321
--- /dev/null
+++ b/lld/test/ELF/loongarch-relax-call36-2.s
@@ -0,0 +1,63 @@
+# REQUIRES: loongarch
+# RUN: rm -rf %t && split-file %s %t && cd %t
+# RUN: llvm-mc -filetype=obj -triple=loongarch64 -mattr=+relax a.s -o a.o
+
+# RUN: ld.lld --relax -T lds a.o -o a
+# RUN: llvm-objdump -d --no-show-raw-insn a | FileCheck %s 
--check-prefixes=RELAX,RELAX-MID
+
+## Unsure whether this needs a diagnostic. GNU ld allows this.
+# RUN: ld.lld --relax -T lds -pie a.o -o a.pie
+# RUN: llvm-objdump -d --no-show-raw-insn a.pie | FileCheck %s 
--check-prefixes=RELAX,RELAX-MID
+
+# RUN: ld.lld --relax -T lds -pie -z notext -z ifunc-noplt a.o -o a.ifunc-noplt
+# RUN: llvm-objdump -d --no-show-raw-insn a.ifunc-noplt | FileCheck %s 
--check-prefixes=RELAX,NORELAX-MID
+
+# RELAX-LABEL:  <_start>:
+## offset = 0x1000 - 0x800 = 0x800(134217728), hi=512, lo18=0
+# RELAX-NEXT:800:  pcaddu18i $ra, 512
+# RELAX-NEXT:  jirl   $ra, $ra, 0
+# RELAX-NEXT:  bl 134217720
+# RELAX-NEXT:  bl -134217728
+## offset = 12 - 0x810 = -0x804(-134217732), hi=512, lo18=-4
+# RELAX-NEXT:810:  pcaddu18i $ra, -512
+# RELAX-NEXT:  jirl   $ra, $ra, -4
+# RELAX-EMPTY:
+
+# RELAX-MID-LABEL:  <.mid>:
+## offset = 0x801 - 0x8008000 = 32768
+# RELAX-MID-NEXT:8008000:  bl 32768
+# RELAX-MID-NEXT:  b  32764
+# RELAX-MID-EMPTY:
+
+# NORELAX-MID-LABEL: <.mid>:
+# NORELAX-MID-NEXT:  8008000:  pcaddu18i $ra, 0
+# NORELAX-MID-NEXT:jirl   $ra, $ra, 0
+# NORELAX-MID-NEXT:pcaddu18i $t0, 0
+# NORELAX-MID-NEXT:jr $t0
+# NORELAX-MID-EMPTY:
+
+#--- a.s
+.global _start, ifunc
+_start:
+  call36 pos   # exceed positive range (.text+0x7fc),

[llvm-branch-commits] [lld] [lld][LoongArch] Relax call36/tail36: R_LARCH_CALL36 (PR #123576)

2025-01-20 Thread Zhaoxin Yang via llvm-branch-commits

https://github.com/ylzsx created 
https://github.com/llvm/llvm-project/pull/123576

Instructions with relocation `R_LARCH_CALL36` may be relax as follows:
```
From:
   pcaddu18i $dest, %call36(foo)
 R_LARCH_CALL36, R_LARCH_RELAX
   jirl $r, $dest, 0
To:
   b/bl foo  # bl if r=$ra, b if r=$zero
 R_LARCH_B26
```

>From f1f995b5fc8e90126b5825d52b9c75cd45d27cfc Mon Sep 17 00:00:00 2001
From: yangzhaoxin 
Date: Thu, 26 Dec 2024 11:32:33 +0800
Subject: [PATCH 1/3] Relax call36/tail36.

Instructions with relocation `R_LARCH_CALL36` may be relax as follows:
```
From:
  pcaddu18i $dest, %call36(foo)
R_LARCH_CALL36, R_LARCH_RELAX
  jirl $r, $dest, 0
To:
  b/bl foo  # bl if r=$ra, b if r=$zero
R_LARCH_B26
```
---
 lld/ELF/Arch/LoongArch.cpp | 41 ++
 1 file changed, 41 insertions(+)

diff --git a/lld/ELF/Arch/LoongArch.cpp b/lld/ELF/Arch/LoongArch.cpp
index b999e7fd27ae9d..0aa0cf5b657a0f 100644
--- a/lld/ELF/Arch/LoongArch.cpp
+++ b/lld/ELF/Arch/LoongArch.cpp
@@ -58,6 +58,8 @@ enum Op {
   LD_W = 0x2880,
   LD_D = 0x28c0,
   JIRL = 0x4c00,
+  B = 0x5000,
+  BL = 0x5400,
 };
 
 enum Reg {
@@ -830,6 +832,37 @@ static void relaxPCHi20Lo12(Ctx &ctx, const InputSection 
&sec, size_t i,
   remove = 4;
 }
 
+// Relax code sequence.
+// From:
+//   pcaddu18i $ra, %call36(foo)
+//   jirl $ra, $ra, 0
+// To:
+//   b/bl foo
+static void relaxCall36(Ctx &ctx, const InputSection &sec, size_t i,
+uint64_t loc, Relocation &r, uint32_t &remove) {
+  const uint64_t symLocal =
+  (r.expr == R_PLT_PC ? r.sym->getPltVA(ctx) : r.sym->getVA(ctx)) +
+  r.addend;
+
+  const int64_t distance = symLocal - loc;
+  // Check if the distance aligns 4 bytes or exceeds the range of b[l].
+  if ((distance & 0x3) != 0 || !isInt<28>(distance))
+return;
+
+  const uint32_t nextInsn = read32le(sec.content().data() + r.offset + 4);
+  if (getD5(nextInsn) == R_RA) {
+// convert jirl to bl
+sec.relaxAux->relocTypes[i] = R_LARCH_B26;
+sec.relaxAux->writes.push_back(insn(BL, 0, 0, 0));
+remove = 4;
+  } else if (getD5(nextInsn) == R_ZERO) {
+// convert jirl to b
+sec.relaxAux->relocTypes[i] = R_LARCH_B26;
+sec.relaxAux->writes.push_back(insn(B, 0, 0, 0));
+remove = 4;
+  }
+}
+
 static bool relax(Ctx &ctx, InputSection &sec) {
   const uint64_t secAddr = sec.getVA();
   const MutableArrayRef relocs = sec.relocs();
@@ -874,6 +907,10 @@ static bool relax(Ctx &ctx, InputSection &sec) {
   if (isPairRelaxable(relocs, i))
 relaxPCHi20Lo12(ctx, sec, i, loc, r, relocs[i + 2], remove);
   break;
+case R_LARCH_CALL36:
+  if (relaxable(relocs, i))
+relaxCall36(ctx, sec, i, loc, r, remove);
+  break;
 }
 
 // For all anchors whose offsets are <= r.offset, they are preceded by
@@ -977,6 +1014,10 @@ void LoongArch::finalizeRelax(int passes) const {
 // RelExpr is needed for relocating.
 r.expr = r.sym->hasFlag(NEEDS_PLT) ? R_PLT_PC : R_PC;
 break;
+  case R_LARCH_B26:
+skip = 4;
+write32le(p, aux.writes[writesIdx++]);
+break;
   default:
 llvm_unreachable("unsupported type");
   }

>From f227ae532236e20148a872c811721a8de4e16318 Mon Sep 17 00:00:00 2001
From: yangzhaoxin 
Date: Fri, 27 Dec 2024 14:37:40 +0800
Subject: [PATCH 2/3] modify test for call36/tail36.

---
 lld/test/ELF/loongarch-relax-call36-2.s  |  63 +
 lld/test/ELF/loongarch-relax-call36.s| 135 +++
 lld/test/ELF/loongarch-relax-emit-relocs-2.s |  61 +
 3 files changed, 259 insertions(+)
 create mode 100644 lld/test/ELF/loongarch-relax-call36-2.s
 create mode 100644 lld/test/ELF/loongarch-relax-call36.s
 create mode 100644 lld/test/ELF/loongarch-relax-emit-relocs-2.s

diff --git a/lld/test/ELF/loongarch-relax-call36-2.s 
b/lld/test/ELF/loongarch-relax-call36-2.s
new file mode 100644
index 00..1c216a9bdc35ed
--- /dev/null
+++ b/lld/test/ELF/loongarch-relax-call36-2.s
@@ -0,0 +1,63 @@
+# REQUIRES: loongarch
+# RUN: rm -rf %t && split-file %s %t && cd %t
+# RUN: llvm-mc -filetype=obj -triple=loongarch64 -mattr=+relax a.s -o a.o
+
+# RUN: ld.lld -T lds a.o -o a
+# RUN: llvm-objdump -d --no-show-raw-insn a | FileCheck %s 
--check-prefixes=RELAX,RELAX-MID
+
+## Unsure whether this needs a diagnostic. GNU ld allows this.
+# RUN: ld.lld -T lds -pie a.o -o a.pie
+# RUN: llvm-objdump -d --no-show-raw-insn a.pie | FileCheck %s 
--check-prefixes=RELAX,RELAX-MID
+
+# RUN: ld.lld -T lds -pie -z notext -z ifunc-noplt a.o -o a.ifunc-noplt
+# RUN: llvm-objdump -d --no-show-raw-insn a.ifunc-noplt | FileCheck %s 
--check-prefixes=RELAX,NORELAX-MID
+
+# RELAX-LABEL:  <_start>:
+## offset = 0x1000 - 0x800 = 0x800(134217728), hi=512, lo18=0
+# RELAX-NEXT:800:  pcaddu18i $ra, 512
+# RELAX-NEXT:  jirl   $ra, $ra, 0
+# RELAX-NEXT:  bl 134217720
+

[llvm-branch-commits] [flang] [llvm] [flang][OpenMP] Parse WHEN, OTHERWISE, MATCH clauses plus METADIRECTIVE (PR #121817)

2025-01-20 Thread Kiran Chandramohan via llvm-branch-commits

https://github.com/kiranchandramohan approved this pull request.

LG.

https://github.com/llvm/llvm-project/pull/121817
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [BOLT] Skip out-of-range pending relocations (PR #116964)

2025-01-20 Thread Paschalis Mpeis via llvm-branch-commits

paschalis-mpeis wrote:

Forced-pushed to add the missing code. Also, this PR on now stacked top of 
#123635.
Thanks for the comments @maks. I am not sure if your 
[concern](https://github.com/llvm/llvm-project/issues/116817#issuecomment-2602866672)
 on the issue still stands or not.

https://github.com/llvm/llvm-project/pull/116964
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [BOLT] Skip out-of-range pending relocations (PR #116964)

2025-01-20 Thread Paschalis Mpeis via llvm-branch-commits

https://github.com/paschalis-mpeis edited 
https://github.com/llvm/llvm-project/pull/116964
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [flang] [lld] [llvm] [Flang] Rename libFortranRuntime.a to libflang_rt.a (PR #122341)

2025-01-20 Thread Michael Kruse via llvm-branch-commits

https://github.com/Meinersbur ready_for_review 
https://github.com/llvm/llvm-project/pull/122341
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [flang] [llvm] [Flang][NFC] Move runtime library files to flang-rt. (PR #110298)

2025-01-20 Thread Michael Kruse via llvm-branch-commits

https://github.com/Meinersbur ready_for_review 
https://github.com/llvm/llvm-project/pull/110298
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [flang] [lld] [llvm] [Flang] Rename libFortranRuntime.a to libflang_rt.a (PR #122341)

2025-01-20 Thread Joseph Huber via llvm-branch-commits

https://github.com/jhuber6 approved this pull request.

Straightforward renaming and more consistent with the clang runtimes.

https://github.com/llvm/llvm-project/pull/122341
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [flang] [llvm] [Flang][NFC] Move runtime library files to flang-rt. (PR #110298)

2025-01-20 Thread Michael Kruse via llvm-branch-commits

Meinersbur wrote:

> There are some missing files in in flang-rt/CUDA both headers and cpp files.

Git tries to be helpful and move new files to the new location of where the 
sibling file went, but marking both locations of the same file as conflict. Not 
always obvious one is the "right" one.

https://github.com/llvm/llvm-project/pull/110298
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [flang] ea3aa97 - Avoid module name clashes by choosing unique names

2025-01-20 Thread Kiran Chandramohan via llvm-branch-commits

Author: Kiran Chandramohan
Date: 2025-01-20T16:23:54Z
New Revision: ea3aa97c17ce30df40f8fc8c2ebb89332c83c5b8

URL: 
https://github.com/llvm/llvm-project/commit/ea3aa97c17ce30df40f8fc8c2ebb89332c83c5b8
DIFF: 
https://github.com/llvm/llvm-project/commit/ea3aa97c17ce30df40f8fc8c2ebb89332c83c5b8.diff

LOG: Avoid module name clashes by choosing unique names

Added: 


Modified: 
flang/test/Lower/zero_init.f90
flang/test/Lower/zero_init_default_init.f90

Removed: 




diff  --git a/flang/test/Lower/zero_init.f90 b/flang/test/Lower/zero_init.f90
index 5ed6f2247de3b2..16b11158bfce27 100644
--- a/flang/test/Lower/zero_init.f90
+++ b/flang/test/Lower/zero_init.f90
@@ -5,16 +5,16 @@
 ! RUN: bbc -finit-global-zero -emit-hlfir -o - %s | FileCheck 
--check-prefix=CHECK-DEFAULT %s
 ! RUN: bbc -finit-global-zero=false -emit-hlfir -o - %s | FileCheck 
--check-prefix=CHECK-NO-ZERO-INIT %s
 
-module m1
+module zeroInitM1
   real :: x
-end module m1
+end module zeroInitM1
 
-!CHECK-DEFAULT: fir.global @_QMm1Ex : f32 {
+!CHECK-DEFAULT: fir.global @_QMzeroinitm1Ex : f32 {
 !CHECK-DEFAULT:   %[[UNDEF:.*]] = fir.zero_bits f32
 !CHECK-DEFAULT:   fir.has_value %[[UNDEF]] : f32
 !CHECK-DEFAULT: }
 
-!CHECK-NO-ZERO-INIT: fir.global @_QMm1Ex : f32 {
+!CHECK-NO-ZERO-INIT: fir.global @_QMzeroinitm1Ex : f32 {
 !CHECK-NO-ZERO-INIT:   %[[UNDEF:.*]] = fir.undefined f32
 !CHECK-NO-ZERO-INIT:   fir.has_value %[[UNDEF]] : f32
 !CHECK-NO-ZERO-INIT: }

diff  --git a/flang/test/Lower/zero_init_default_init.f90 
b/flang/test/Lower/zero_init_default_init.f90
index e2d1f545e35a57..8ca3b33b8ef5c1 100644
--- a/flang/test/Lower/zero_init_default_init.f90
+++ b/flang/test/Lower/zero_init_default_init.f90
@@ -7,16 +7,16 @@
 
 ! Test that the flag does not affect globals with default init
 
-module m2
+module zeroInitM2
   type val
 integer :: my_val = 1
   end type val
   type(val) :: v1
-end module m2
+end module zeroInitM2
 
-!CHECK:  fir.global @_QMm2Ev1 : !fir.type<_QMm2Tval{my_val:i32}> {
-!CHECK:%[[V1:.*]] = fir.undefined !fir.type<_QMm2Tval{my_val:i32}>
+!CHECK:  fir.global @_QMzeroinitm2Ev1 : 
!fir.type<_QMzeroinitm2Tval{my_val:i32}> {
+!CHECK:%[[V1:.*]] = fir.undefined !fir.type<_QMzeroinitm2Tval{my_val:i32}>
 !CHECK:%[[ONE:.*]] = arith.constant 1 : i32
-!CHECK:%[[V1_INIT:.*]] = fir.insert_value %[[V1]], %[[ONE]], ["my_val", 
!fir.type<_QMm2Tval{my_val:i32}>] : (!fir.type<_QMm2Tval{my_val:i32}>, i32) -> 
!fir.type<_QMm2Tval{my_val:i32}>
-!CHECK:fir.has_value %[[V1_INIT]] : !fir.type<_QMm2Tval{my_val:i32}>
+!CHECK:%[[V1_INIT:.*]] = fir.insert_value %[[V1]], %[[ONE]], ["my_val", 
!fir.type<_QMzeroinitm2Tval{my_val:i32}>] : 
(!fir.type<_QMzeroinitm2Tval{my_val:i32}>, i32) -> 
!fir.type<_QMzeroinitm2Tval{my_val:i32}>
+!CHECK:fir.has_value %[[V1_INIT]] : 
!fir.type<_QMzeroinitm2Tval{my_val:i32}>
 !CHECK:  }



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [flang] [llvm] [mlir] [MLIR][OpenMP] Introduce overlapped record type map support (PR #119588)

2025-01-20 Thread via llvm-branch-commits

agozillon wrote:

Small ping for a review on this if anyone has a little spare time, it would be 
greatly appreciated, thank you very much ahead of time :-)

https://github.com/llvm/llvm-project/pull/119588
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [flang] [llvm] [mlir] [Flang][OpenMP][MLIR] Initial declare target to for variables implementation (PR #119589)

2025-01-20 Thread via llvm-branch-commits

agozillon wrote:

Small ping for a review on this if anyone has a little spare time, it would be 
greatly appreciated, thank you very much ahead of time :-)

https://github.com/llvm/llvm-project/pull/119589
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [BOLT] Skip out-of-range pending relocations (PR #116964)

2025-01-20 Thread Paschalis Mpeis via llvm-branch-commits

https://github.com/paschalis-mpeis edited 
https://github.com/llvm/llvm-project/pull/116964
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [BOLT] Skip out-of-range pending relocations (PR #116964)

2025-01-20 Thread Paschalis Mpeis via llvm-branch-commits

https://github.com/paschalis-mpeis edited 
https://github.com/llvm/llvm-project/pull/116964
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [BOLT] Skip out-of-range pending relocations (PR #116964)

2025-01-20 Thread Paschalis Mpeis via llvm-branch-commits

https://github.com/paschalis-mpeis ready_for_review 
https://github.com/llvm/llvm-project/pull/116964
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [BOLT] Skip out-of-range pending relocations (PR #116964)

2025-01-20 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-bolt

Author: Paschalis Mpeis (paschalis-mpeis)


Changes

When a pending relocation is created it is also marked whether it is
optional or not. It can be optional when such relocation is added as
part of an optimization (i.e., `scanExternalRefs`).

When bolt tries to `flushPendingRelocations`, it safely skips any
optional relocations that cannot be encoded.

Background:
BOLT, as part of scanExternalRefs, identifies external references from
calls and creates some pending relocations for them. Those when flushed
will update references to point to the optimized functions. This
optimization can be disabled using `--no-scan`.

BOLT can assert if any of these pending relocations cannot be encoded.

This patch does not disable this optimization but instead selectively
applies it given that a pending relocation is optional.

---

# Stacked PR on top of:
- #123635 

---
Full diff: https://github.com/llvm/llvm-project/pull/116964.diff


6 Files Affected:

- (modified) bolt/include/bolt/Core/BinarySection.h (+8-5) 
- (modified) bolt/include/bolt/Core/Relocation.h (+4-1) 
- (modified) bolt/lib/Core/BinaryFunction.cpp (+1-1) 
- (modified) bolt/lib/Core/BinarySection.cpp (+13-1) 
- (modified) bolt/lib/Core/Relocation.cpp (+33) 
- (modified) bolt/unittests/Core/BinaryContext.cpp (+32) 


``diff
diff --git a/bolt/include/bolt/Core/BinarySection.h 
b/bolt/include/bolt/Core/BinarySection.h
index dedee361882497..37d0932a5c142e 100644
--- a/bolt/include/bolt/Core/BinarySection.h
+++ b/bolt/include/bolt/Core/BinarySection.h
@@ -66,8 +66,10 @@ class BinarySection {
   // from the original section address.
   RelocationSetType DynamicRelocations;
 
-  // Pending relocations for this section.
-  std::vector PendingRelocations;
+  /// Pending relocations for this section and whether they are optional, i.e.,
+  /// added as part of an optimization. In that case they can be safely omitted
+  /// if flushPendingRelocations discovers they cannot be encoded.
+  std::vector> PendingRelocations;
 
   struct BinaryPatch {
 uint64_t Offset;
@@ -374,9 +376,10 @@ class BinarySection {
 DynamicRelocations.emplace(Reloc);
   }
 
-  /// Add relocation against the original contents of this section.
-  void addPendingRelocation(const Relocation &Rel) {
-PendingRelocations.push_back(Rel);
+  /// Add relocation against the original contents of this section. When added
+  /// as part of an optimization it is marked as \p Optional.
+  void addPendingRelocation(const Relocation &Rel, bool Optional = false) {
+PendingRelocations.push_back({Rel, Optional});
   }
 
   /// Add patch to the input contents of this section.
diff --git a/bolt/include/bolt/Core/Relocation.h 
b/bolt/include/bolt/Core/Relocation.h
index 933f62a31f8fd7..177cc0c70431f4 100644
--- a/bolt/include/bolt/Core/Relocation.h
+++ b/bolt/include/bolt/Core/Relocation.h
@@ -64,9 +64,12 @@ struct Relocation {
   /// and \P Type mismatch occurred.
   static bool skipRelocationProcess(uint64_t &Type, uint64_t Contents);
 
-  // Adjust value depending on relocation type (make it PC relative or not)
+  /// Adjust value depending on relocation type (make it PC relative or not).
   static uint64_t encodeValue(uint64_t Type, uint64_t Value, uint64_t PC);
 
+  /// Return true if there are enough bits to encode the relocation value.
+  static bool canEncodeValue(uint64_t Type, uint64_t Value, uint64_t PC);
+
   /// Extract current relocated value from binary contents. This is used for
   /// RISC architectures where values are encoded in specific bits depending
   /// on the relocation value. For X86, we limit to sign extending the value
diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp
index 5da777411ba7a1..5d1e5ca92ca131 100644
--- a/bolt/lib/Core/BinaryFunction.cpp
+++ b/bolt/lib/Core/BinaryFunction.cpp
@@ -1672,7 +1672,7 @@ bool BinaryFunction::scanExternalRefs() {
   // Add relocations unless disassembly failed for this function.
   if (!DisassemblyFailed)
 for (Relocation &Rel : FunctionRelocations)
-  getOriginSection()->addPendingRelocation(Rel);
+  getOriginSection()->addPendingRelocation(Rel, /*Optional*/ true);
 
   // Inform BinaryContext that this function symbols will not be defined and
   // relocations should not be created against them.
diff --git a/bolt/lib/Core/BinarySection.cpp b/bolt/lib/Core/BinarySection.cpp
index 9ad49ca1b3a038..a37cc7603df285 100644
--- a/bolt/lib/Core/BinarySection.cpp
+++ b/bolt/lib/Core/BinarySection.cpp
@@ -165,11 +165,19 @@ void 
BinarySection::flushPendingRelocations(raw_pwrite_stream &OS,
 OS.pwrite(Patch.Bytes.data(), Patch.Bytes.size(),
   SectionFileOffset + Patch.Offset);
 
-  for (Relocation &Reloc : PendingRelocations) {
+  uint64_t SkippedPendingRelocations = 0;
+  for (auto &[Reloc, Optional] : PendingRelocations) {
 uint64_t Value = Reloc.Addend;
 if (Reloc.Symbol)
   Value += Resolver(Reloc.Symbol);
 
+// 

[llvm-branch-commits] [llvm] [BOLT] Skip out-of-range pending relocations (PR #116964)

2025-01-20 Thread Paschalis Mpeis via llvm-branch-commits


@@ -165,11 +165,17 @@ void 
BinarySection::flushPendingRelocations(raw_pwrite_stream &OS,
 OS.pwrite(Patch.Bytes.data(), Patch.Bytes.size(),
   SectionFileOffset + Patch.Offset);
 
+  uint64_t SkippedPendingRelocations = 0;
   for (Relocation &Reloc : PendingRelocations) {
 uint64_t Value = Reloc.Addend;
 if (Reloc.Symbol)
   Value += Resolver(Reloc.Symbol);
 
+if (!Relocation::canEncodeValue(Reloc.Type, Value,

paschalis-mpeis wrote:

`addPendingRelocation` now becomes the only way now to add a pending relocation 
(Parent PR #123635). If it comes from the `scanExternalRefs` optimization, then 
it is marked as optional. Finally, those relocations can be safely skipped when 
it's time to flush them.

https://github.com/llvm/llvm-project/pull/116964
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits