https://github.com/llvmbot created https://github.com/llvm/llvm-project/pull/150048
Backport dd36a6901249796a175c3058a50d97cc3a7ba52c 4b99eb2eb49d8fcdb29ba494501e481cf09831ae Requested by: @jrtc27 >From 5f76f7ec608a8e7fa15b19b38a53d0272f239468 Mon Sep 17 00:00:00 2001 From: Jessica Clarke <jrt...@jrtc27.com> Date: Mon, 21 Jul 2025 20:58:16 +0100 Subject: [PATCH 1/2] [NFC][Sparc] Pre-commit a test showing inefficient and broken LD/GD TLS (cherry picked from commit dd36a6901249796a175c3058a50d97cc3a7ba52c) --- llvm/test/CodeGen/SPARC/tls-sp.ll | 111 ++++++++++++++++++++++++++++++ 1 file changed, 111 insertions(+) create mode 100644 llvm/test/CodeGen/SPARC/tls-sp.ll diff --git a/llvm/test/CodeGen/SPARC/tls-sp.ll b/llvm/test/CodeGen/SPARC/tls-sp.ll new file mode 100644 index 0000000000000..928d1dcbe8300 --- /dev/null +++ b/llvm/test/CodeGen/SPARC/tls-sp.ll @@ -0,0 +1,111 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=sparc -relocation-model=pic < %s | FileCheck --check-prefix=SPARC %s +; RUN: llc -mtriple=sparc64 -relocation-model=pic < %s | FileCheck --check-prefix=SPARC64 %s + +;; TODO: Fix the code generation for these functions. + +@x = external thread_local global i8 + +;; Test that we don't over-allocate stack space when calling __tls_get_addr +;; with the call frame pseudos able to be eliminated. +define ptr @no_alloca() nounwind { +; SPARC-LABEL: no_alloca: +; SPARC: ! %bb.0: ! %entry +; SPARC-NEXT: save %sp, -96, %sp +; SPARC-NEXT: .Ltmp0: +; SPARC-NEXT: call .Ltmp1 +; SPARC-NEXT: .Ltmp2: +; SPARC-NEXT: sethi %hi(_GLOBAL_OFFSET_TABLE_+(.Ltmp2-.Ltmp0)), %i0 +; SPARC-NEXT: .Ltmp1: +; SPARC-NEXT: or %i0, %lo(_GLOBAL_OFFSET_TABLE_+(.Ltmp1-.Ltmp0)), %i0 +; SPARC-NEXT: add %i0, %o7, %i0 +; SPARC-NEXT: sethi %tgd_hi22(x), %i1 +; SPARC-NEXT: add %i1, %tgd_lo10(x), %i1 +; SPARC-NEXT: add %i0, %i1, %o0, %tgd_add(x) +; SPARC-NEXT: call __tls_get_addr, %tgd_call(x) +; SPARC-NEXT: nop +; SPARC-NEXT: ret +; SPARC-NEXT: restore %g0, %o0, %o0 +; +; SPARC64-LABEL: no_alloca: +; SPARC64: ! %bb.0: ! %entry +; SPARC64-NEXT: save %sp, -144, %sp +; SPARC64-NEXT: .Ltmp0: +; SPARC64-NEXT: rd %pc, %o7 +; SPARC64-NEXT: .Ltmp2: +; SPARC64-NEXT: sethi %hi(_GLOBAL_OFFSET_TABLE_+(.Ltmp2-.Ltmp0)), %i0 +; SPARC64-NEXT: .Ltmp1: +; SPARC64-NEXT: or %i0, %lo(_GLOBAL_OFFSET_TABLE_+(.Ltmp1-.Ltmp0)), %i0 +; SPARC64-NEXT: add %i0, %o7, %i0 +; SPARC64-NEXT: sethi %tgd_hi22(x), %i1 +; SPARC64-NEXT: add %i1, %tgd_lo10(x), %i1 +; SPARC64-NEXT: add %i0, %i1, %o0, %tgd_add(x) +; SPARC64-NEXT: call __tls_get_addr, %tgd_call(x) +; SPARC64-NEXT: nop +; SPARC64-NEXT: ret +; SPARC64-NEXT: restore %g0, %o0, %o0 +entry: + %0 = call ptr @llvm.threadlocal.address.p0(ptr @x) + ret ptr %0 +} + +;; Test that %sp is valid for the call to __tls_get_addr. We store to a dynamic +;; alloca in order to prevent eliminating any call frame pseudos from the call. +define ptr @dynamic_alloca(i64 %n) nounwind { +; SPARC-LABEL: dynamic_alloca: +; SPARC: ! %bb.0: ! %entry +; SPARC-NEXT: save %sp, -96, %sp +; SPARC-NEXT: .Ltmp3: +; SPARC-NEXT: call .Ltmp4 +; SPARC-NEXT: .Ltmp5: +; SPARC-NEXT: sethi %hi(_GLOBAL_OFFSET_TABLE_+(.Ltmp5-.Ltmp3)), %i0 +; SPARC-NEXT: .Ltmp4: +; SPARC-NEXT: or %i0, %lo(_GLOBAL_OFFSET_TABLE_+(.Ltmp4-.Ltmp3)), %i0 +; SPARC-NEXT: add %i0, %o7, %i0 +; SPARC-NEXT: add %sp, -1, %sp +; SPARC-NEXT: sethi %tgd_hi22(x), %i2 +; SPARC-NEXT: add %i2, %tgd_lo10(x), %i2 +; SPARC-NEXT: add %i0, %i2, %o0, %tgd_add(x) +; SPARC-NEXT: call __tls_get_addr, %tgd_call(x) +; SPARC-NEXT: nop +; SPARC-NEXT: add %sp, 1, %sp +; SPARC-NEXT: add %i1, 7, %i0 +; SPARC-NEXT: and %i0, -8, %i0 +; SPARC-NEXT: sub %sp, %i0, %i0 +; SPARC-NEXT: add %i0, -8, %sp +; SPARC-NEXT: mov 1, %i1 +; SPARC-NEXT: stb %i1, [%i0+88] +; SPARC-NEXT: ret +; SPARC-NEXT: restore %g0, %o0, %o0 +; +; SPARC64-LABEL: dynamic_alloca: +; SPARC64: ! %bb.0: ! %entry +; SPARC64-NEXT: save %sp, -128, %sp +; SPARC64-NEXT: .Ltmp3: +; SPARC64-NEXT: rd %pc, %o7 +; SPARC64-NEXT: .Ltmp5: +; SPARC64-NEXT: sethi %hi(_GLOBAL_OFFSET_TABLE_+(.Ltmp5-.Ltmp3)), %i1 +; SPARC64-NEXT: .Ltmp4: +; SPARC64-NEXT: or %i1, %lo(_GLOBAL_OFFSET_TABLE_+(.Ltmp4-.Ltmp3)), %i1 +; SPARC64-NEXT: add %i1, %o7, %i1 +; SPARC64-NEXT: add %sp, -1, %sp +; SPARC64-NEXT: sethi %tgd_hi22(x), %i2 +; SPARC64-NEXT: add %i2, %tgd_lo10(x), %i2 +; SPARC64-NEXT: add %i1, %i2, %o0, %tgd_add(x) +; SPARC64-NEXT: call __tls_get_addr, %tgd_call(x) +; SPARC64-NEXT: nop +; SPARC64-NEXT: add %sp, 1, %sp +; SPARC64-NEXT: add %i0, 15, %i0 +; SPARC64-NEXT: and %i0, -16, %i0 +; SPARC64-NEXT: sub %sp, %i0, %i0 +; SPARC64-NEXT: mov %i0, %sp +; SPARC64-NEXT: mov 1, %i1 +; SPARC64-NEXT: stb %i1, [%i0+2175] +; SPARC64-NEXT: ret +; SPARC64-NEXT: restore %g0, %o0, %o0 +entry: + %0 = call ptr @llvm.threadlocal.address.p0(ptr @x) + %1 = alloca i8, i64 %n + store i8 1, ptr %1 + ret ptr %0 +} >From 87ce9d1ab4c5514eb41d42ef719a3c03b90ceb2e Mon Sep 17 00:00:00 2001 From: Jessica Clarke <jrt...@jrtc27.com> Date: Tue, 22 Jul 2025 16:44:18 +0100 Subject: [PATCH 2/2] [Sparc] Remove bogus stack adjustment for LD/GD TLS (#149890) This argument is the number of bytes to adjust the stack by for the duration of the call. In most cases, PEI is able to eliminate the corresponding call frame pseudos, folding them into the initial stack frame allocation (rounded up to stack alignment), where it just ends up allocating more space than needed. However, in the rare case where this cannot be done, e.g. due to the use of a dynamic alloca, the 1 byte stack adjustment persists and results in a misaligned stack for the duration of the call. This has been the case ever since TLS support was added in cb1dca602c43 ("[Sparc] Add support for TLS in sparc."), and I can only assume that 1 was used erroneously thinking that it is the number of arguments (as there is 1 register argument for the call), not the number of bytes for on-stack arguments. Fixes: https://github.com/llvm/llvm-project/issues/149808 (cherry picked from commit 4b99eb2eb49d8fcdb29ba494501e481cf09831ae) --- llvm/lib/Target/Sparc/SparcISelLowering.cpp | 4 ++-- llvm/test/CodeGen/SPARC/tls-sp.ll | 8 +------- 2 files changed, 3 insertions(+), 9 deletions(-) diff --git a/llvm/lib/Target/Sparc/SparcISelLowering.cpp b/llvm/lib/Target/Sparc/SparcISelLowering.cpp index 9b434d87c2676..1aa8efe3e9979 100644 --- a/llvm/lib/Target/Sparc/SparcISelLowering.cpp +++ b/llvm/lib/Target/Sparc/SparcISelLowering.cpp @@ -2201,7 +2201,7 @@ SDValue SparcTargetLowering::LowerGlobalTLSAddress(SDValue Op, SDValue Chain = DAG.getEntryNode(); SDValue InGlue; - Chain = DAG.getCALLSEQ_START(Chain, 1, 0, DL); + Chain = DAG.getCALLSEQ_START(Chain, 0, 0, DL); Chain = DAG.getCopyToReg(Chain, DL, SP::O0, Argument, InGlue); InGlue = Chain.getValue(1); SDValue Callee = DAG.getTargetExternalSymbol("__tls_get_addr", PtrVT); @@ -2219,7 +2219,7 @@ SDValue SparcTargetLowering::LowerGlobalTLSAddress(SDValue Op, InGlue}; Chain = DAG.getNode(SPISD::TLS_CALL, DL, NodeTys, Ops); InGlue = Chain.getValue(1); - Chain = DAG.getCALLSEQ_END(Chain, 1, 0, InGlue, DL); + Chain = DAG.getCALLSEQ_END(Chain, 0, 0, InGlue, DL); InGlue = Chain.getValue(1); SDValue Ret = DAG.getCopyFromReg(Chain, DL, SP::O0, PtrVT, InGlue); diff --git a/llvm/test/CodeGen/SPARC/tls-sp.ll b/llvm/test/CodeGen/SPARC/tls-sp.ll index 928d1dcbe8300..de9af01398d23 100644 --- a/llvm/test/CodeGen/SPARC/tls-sp.ll +++ b/llvm/test/CodeGen/SPARC/tls-sp.ll @@ -2,8 +2,6 @@ ; RUN: llc -mtriple=sparc -relocation-model=pic < %s | FileCheck --check-prefix=SPARC %s ; RUN: llc -mtriple=sparc64 -relocation-model=pic < %s | FileCheck --check-prefix=SPARC64 %s -;; TODO: Fix the code generation for these functions. - @x = external thread_local global i8 ;; Test that we don't over-allocate stack space when calling __tls_get_addr @@ -29,7 +27,7 @@ define ptr @no_alloca() nounwind { ; ; SPARC64-LABEL: no_alloca: ; SPARC64: ! %bb.0: ! %entry -; SPARC64-NEXT: save %sp, -144, %sp +; SPARC64-NEXT: save %sp, -128, %sp ; SPARC64-NEXT: .Ltmp0: ; SPARC64-NEXT: rd %pc, %o7 ; SPARC64-NEXT: .Ltmp2: @@ -62,13 +60,11 @@ define ptr @dynamic_alloca(i64 %n) nounwind { ; SPARC-NEXT: .Ltmp4: ; SPARC-NEXT: or %i0, %lo(_GLOBAL_OFFSET_TABLE_+(.Ltmp4-.Ltmp3)), %i0 ; SPARC-NEXT: add %i0, %o7, %i0 -; SPARC-NEXT: add %sp, -1, %sp ; SPARC-NEXT: sethi %tgd_hi22(x), %i2 ; SPARC-NEXT: add %i2, %tgd_lo10(x), %i2 ; SPARC-NEXT: add %i0, %i2, %o0, %tgd_add(x) ; SPARC-NEXT: call __tls_get_addr, %tgd_call(x) ; SPARC-NEXT: nop -; SPARC-NEXT: add %sp, 1, %sp ; SPARC-NEXT: add %i1, 7, %i0 ; SPARC-NEXT: and %i0, -8, %i0 ; SPARC-NEXT: sub %sp, %i0, %i0 @@ -88,13 +84,11 @@ define ptr @dynamic_alloca(i64 %n) nounwind { ; SPARC64-NEXT: .Ltmp4: ; SPARC64-NEXT: or %i1, %lo(_GLOBAL_OFFSET_TABLE_+(.Ltmp4-.Ltmp3)), %i1 ; SPARC64-NEXT: add %i1, %o7, %i1 -; SPARC64-NEXT: add %sp, -1, %sp ; SPARC64-NEXT: sethi %tgd_hi22(x), %i2 ; SPARC64-NEXT: add %i2, %tgd_lo10(x), %i2 ; SPARC64-NEXT: add %i1, %i2, %o0, %tgd_add(x) ; SPARC64-NEXT: call __tls_get_addr, %tgd_call(x) ; SPARC64-NEXT: nop -; SPARC64-NEXT: add %sp, 1, %sp ; SPARC64-NEXT: add %i0, 15, %i0 ; SPARC64-NEXT: and %i0, -16, %i0 ; SPARC64-NEXT: sub %sp, %i0, %i0 _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits