https://github.com/dominik-steenken updated 
https://github.com/llvm/llvm-project/pull/169317

>From 3f2709c1080a849e9e1a679e3017750dcdf1dc66 Mon Sep 17 00:00:00 2001
From: Dominik Steenken <[email protected]>
Date: Wed, 16 Jul 2025 10:48:55 +0200
Subject: [PATCH 1/7] [SystemZ] Global Stackprotector and associated location
 section

This commit allows `-mstack-protector-guard=global` for `s390x`.

It also adds a new arch-specific option `-mstack-protector-guard-record`,
analogous to `-mrecord-mcount`, which will cause `clang` to emit a
`__stack_protector_loc` section containing all the locations in the output
binary that load the stack guard address, for the purposes of later rewriting
of those loads by the kernel. This new option only works together with the
`global` stack protector.

In order to minimize exposure of the stack guard, both the storing of the
stack guard onto the stack, and the later comparison of that value against
the reference value, are handled via direct mem-to-mem instructions, those
being `mvc` and `clc`.

This is achieved by introducing two new pseudo instructions, `MOVE_STACK_GUARD`
and `COMPARE_STACK_GUARD`, which are inserted by the DAGCombiner after
SelectionDAG construction. These pseudos stick around throughout the entire
backend pipeline, and are lowered only in the AsmPrinter.

This commit also adds tests for both kinds of stack protectors (tls and global),
for the proper insertion of the pseudos, the proper emission of the,
`__stack_protector_loc` section, as well as the option compatibility checks
for the new options.
---
 clang/include/clang/Basic/CodeGenOptions.def  |   1 +
 clang/include/clang/Options/Options.td        |   8 +
 clang/lib/CodeGen/CodeGenFunction.cpp         |   8 +
 clang/lib/Driver/ToolChains/Clang.cpp         |  40 +++--
 .../CodeGen/SystemZ/stack-guard-pseudos.c     |  16 ++
 clang/test/Driver/stack-protector-guard.c     |  18 ++
 llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp | 127 ++++++++++++++
 llvm/lib/Target/SystemZ/SystemZAsmPrinter.h   |   2 +
 .../Target/SystemZ/SystemZISelDAGToDAG.cpp    |   9 +-
 .../Target/SystemZ/SystemZISelLowering.cpp    | 114 ++++++++++++-
 llvm/lib/Target/SystemZ/SystemZISelLowering.h |   2 -
 llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp  |  59 +++----
 llvm/lib/Target/SystemZ/SystemZInstrInfo.h    |   1 -
 llvm/lib/Target/SystemZ/SystemZInstrInfo.td   |  13 ++
 .../SystemZ/stack-guard-global-nopic.ll       | 157 +++++++++++++++++
 .../CodeGen/SystemZ/stack-guard-global-pic.ll | 159 ++++++++++++++++++
 llvm/test/CodeGen/SystemZ/stack-guard-tls.ll  | 135 +++++++++++++++
 llvm/test/CodeGen/SystemZ/stack-guard.ll      |  33 ----
 18 files changed, 811 insertions(+), 91 deletions(-)
 create mode 100644 clang/test/CodeGen/SystemZ/stack-guard-pseudos.c
 create mode 100644 llvm/test/CodeGen/SystemZ/stack-guard-global-nopic.ll
 create mode 100644 llvm/test/CodeGen/SystemZ/stack-guard-global-pic.ll
 create mode 100644 llvm/test/CodeGen/SystemZ/stack-guard-tls.ll
 delete mode 100644 llvm/test/CodeGen/SystemZ/stack-guard.ll

diff --git a/clang/include/clang/Basic/CodeGenOptions.def 
b/clang/include/clang/Basic/CodeGenOptions.def
index a059803c433e3..9c951b62a7b2f 100644
--- a/clang/include/clang/Basic/CodeGenOptions.def
+++ b/clang/include/clang/Basic/CodeGenOptions.def
@@ -157,6 +157,7 @@ CODEGENOPT(InstrumentForProfiling , 1, 0, Benign) ///< Set 
when -pg is enabled.
 CODEGENOPT(CallFEntry , 1, 0, Benign) ///< Set when -mfentry is enabled.
 CODEGENOPT(MNopMCount , 1, 0, Benign) ///< Set when -mnop-mcount is enabled.
 CODEGENOPT(RecordMCount , 1, 0, Benign) ///< Set when -mrecord-mcount is 
enabled.
+CODEGENOPT(StackProtectorGuardRecord, 1, 0, Benign) ///< Set when 
-mstack-protector-guard-record is enabled.
 CODEGENOPT(PackedStack , 1, 0, Benign) ///< Set when -mpacked-stack is enabled.
 CODEGENOPT(LessPreciseFPMAD  , 1, 0, Benign) ///< Enable less precise MAD 
instructions to
                                              ///< be generated.
diff --git a/clang/include/clang/Options/Options.td 
b/clang/include/clang/Options/Options.td
index 1a2cf1410e9ed..1a715e3b8fbff 100644
--- a/clang/include/clang/Options/Options.td
+++ b/clang/include/clang/Options/Options.td
@@ -5799,6 +5799,14 @@ def mstack_protector_guard_reg_EQ : Joined<["-"], 
"mstack-protector-guard-reg=">
   Visibility<[ClangOption, CC1Option]>,
   HelpText<"Use the given reg for addressing the stack-protector guard">,
   MarshallingInfoString<CodeGenOpts<"StackProtectorGuardReg">>;
+def mstackprotector_guard_record
+    : Flag<["-"], "mstack-protector-guard-record">,
+      HelpText<
+          "Generate a __stack_protector_loc section entry for each load of "
+          "the stackguard address.">,
+      Visibility<[ClangOption, CC1Option]>,
+      Group<m_Group>,
+      MarshallingInfoFlag<CodeGenOpts<"StackProtectorGuardRecord">>;
 def mfentry : Flag<["-"], "mfentry">, HelpText<"Insert calls to fentry at 
function entry (x86/SystemZ only)">,
   Visibility<[ClangOption, CC1Option]>, Group<m_Group>,
   MarshallingInfoFlag<CodeGenOpts<"CallFEntry">>;
diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp 
b/clang/lib/CodeGen/CodeGenFunction.cpp
index ac25bd95f0463..73a80bff27e21 100644
--- a/clang/lib/CodeGen/CodeGenFunction.cpp
+++ b/clang/lib/CodeGen/CodeGenFunction.cpp
@@ -1195,6 +1195,14 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, 
QualType RetTy,
     }
   }
 
+  if (CGM.getCodeGenOpts().StackProtectorGuardRecord) {
+    if (CGM.getCodeGenOpts().StackProtectorGuard != "global")
+      CGM.getDiags().Report(diag::err_opt_not_valid_without_opt)
+          << "-mstack-protector-guard-record"
+          << "-mstack-protector-guard=global";
+    Fn->addFnAttr("mstackprotector-guard-record");
+  }
+
   if (CGM.getCodeGenOpts().PackedStack) {
     if (getContext().getTargetInfo().getTriple().getArch() !=
         llvm::Triple::systemz)
diff --git a/clang/lib/Driver/ToolChains/Clang.cpp 
b/clang/lib/Driver/ToolChains/Clang.cpp
index 7187d1a158e01..f3a6aa0bec399 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -3438,22 +3438,24 @@ static void RenderSSPOptions(const Driver &D, const 
ToolChain &TC,
   }
 
   const std::string &TripleStr = EffectiveTriple.getTriple();
+  StringRef GuardValue;
   if (Arg *A = Args.getLastArg(options::OPT_mstack_protector_guard_EQ)) {
-    StringRef Value = A->getValue();
+    GuardValue = A->getValue();
     if (!EffectiveTriple.isX86() && !EffectiveTriple.isAArch64() &&
         !EffectiveTriple.isARM() && !EffectiveTriple.isThumb() &&
-        !EffectiveTriple.isRISCV() && !EffectiveTriple.isPPC())
+        !EffectiveTriple.isRISCV() && !EffectiveTriple.isPPC() &&
+        !EffectiveTriple.isSystemZ())
       D.Diag(diag::err_drv_unsupported_opt_for_target)
           << A->getAsString(Args) << TripleStr;
     if ((EffectiveTriple.isX86() || EffectiveTriple.isARM() ||
-         EffectiveTriple.isThumb()) &&
-        Value != "tls" && Value != "global") {
+         EffectiveTriple.isThumb() || EffectiveTriple.isSystemZ()) &&
+        GuardValue != "tls" && GuardValue != "global") {
       D.Diag(diag::err_drv_invalid_value_with_suggestion)
-          << A->getOption().getName() << Value << "tls global";
+          << A->getOption().getName() << GuardValue << "tls global";
       return;
     }
     if ((EffectiveTriple.isARM() || EffectiveTriple.isThumb()) &&
-        Value == "tls") {
+        GuardValue == "tls") {
       if (!Args.hasArg(options::OPT_mstack_protector_guard_offset_EQ)) {
         D.Diag(diag::err_drv_ssp_missing_offset_argument)
             << A->getAsString(Args);
@@ -3477,18 +3479,19 @@ static void RenderSSPOptions(const Driver &D, const 
ToolChain &TC,
       CmdArgs.push_back("-target-feature");
       CmdArgs.push_back("+read-tp-tpidruro");
     }
-    if (EffectiveTriple.isAArch64() && Value != "sysreg" && Value != "global") 
{
+    if (EffectiveTriple.isAArch64() && GuardValue != "sysreg" &&
+        GuardValue != "global") {
       D.Diag(diag::err_drv_invalid_value_with_suggestion)
-          << A->getOption().getName() << Value << "sysreg global";
+          << A->getOption().getName() << GuardValue << "sysreg global";
       return;
     }
     if (EffectiveTriple.isRISCV() || EffectiveTriple.isPPC()) {
-      if (Value != "tls" && Value != "global") {
+      if (GuardValue != "tls" && GuardValue != "global") {
         D.Diag(diag::err_drv_invalid_value_with_suggestion)
-            << A->getOption().getName() << Value << "tls global";
+            << A->getOption().getName() << GuardValue << "tls global";
         return;
       }
-      if (Value == "tls") {
+      if (GuardValue == "tls") {
         if (!Args.hasArg(options::OPT_mstack_protector_guard_offset_EQ)) {
           D.Diag(diag::err_drv_ssp_missing_offset_argument)
               << A->getAsString(Args);
@@ -3562,6 +3565,21 @@ static void RenderSSPOptions(const Driver &D, const 
ToolChain &TC,
     }
     A->render(Args, CmdArgs);
   }
+
+  if (Arg *A = Args.getLastArg(options::OPT_mstackprotector_guard_record)) {
+    if (!EffectiveTriple.isSystemZ()) {
+      D.Diag(diag::err_drv_unsupported_opt_for_target)
+          << A->getAsString(Args) << TripleStr;
+      return;
+    }
+    if (GuardValue != "global") {
+      D.Diag(diag::err_drv_argument_only_allowed_with)
+          << "-mstack-protector-guard-record"
+          << "-mstack-protector-guard=global";
+      return;
+    }
+    A->render(Args, CmdArgs);
+  }
 }
 
 static void RenderSCPOptions(const ToolChain &TC, const ArgList &Args,
diff --git a/clang/test/CodeGen/SystemZ/stack-guard-pseudos.c 
b/clang/test/CodeGen/SystemZ/stack-guard-pseudos.c
new file mode 100644
index 0000000000000..b364aa4028ec7
--- /dev/null
+++ b/clang/test/CodeGen/SystemZ/stack-guard-pseudos.c
@@ -0,0 +1,16 @@
+// RUN: %clang_cc1 -S -mllvm -stop-after=systemz-isel -stack-protector 1 
-triple=s390x-ibm-linux < %s -o - | FileCheck -check-prefix=CHECK-PSEUDOS %s
+// RUN: not %clang_cc1 -S -stack-protector 1 -mstack-protector-guard-record 
-triple=s390x-ibm-linux < %s -o - 2>&1 | FileCheck -check-prefix=CHECK-OPTS %s 
+// CHECK-PSEUDOS:   bb.0.entry:
+// CHECK-PSEUDOS:     %3:addr64bit = LOAD_STACK_GUARD_ADDRESS
+// CHECK-PSEUDOS:     MOVE_STACK_GUARD %stack.0.StackGuardSlot, 0, %3
+// CHECK-PSEUDOS:     COMPARE_STACK_GUARD %stack.0.StackGuardSlot, 0, %3, 
implicit-def $cc
+
+extern char *strcpy (char * D, const char * S);
+int main(int argc, char *argv[])
+{
+    char Buffer[8] = {0};
+    strcpy(Buffer, argv[1]);
+    return 0;
+}
+
+// CHECK-OPTS: error: option '-mstack-protector-guard-record' cannot be 
specified without '-mstack-protector-guard=global'
diff --git a/clang/test/Driver/stack-protector-guard.c 
b/clang/test/Driver/stack-protector-guard.c
index 666c83079e519..8c8aacfa574c7 100644
--- a/clang/test/Driver/stack-protector-guard.c
+++ b/clang/test/Driver/stack-protector-guard.c
@@ -155,3 +155,21 @@
 
 // CHECK-TLS-POWERPC32: "-cc1" {{.*}}"-mstack-protector-guard=tls" 
"-mstack-protector-guard-offset=24" "-mstack-protector-guard-reg=r2"
 // INVALID-REG-POWERPC32: error: invalid value 'r3' in 
'mstack-protector-guard-reg=', expected one of: r2
+
+// RUN: %clang -### -target systemz-unknown-elf -mstack-protector-guard=tls %s 
2>&1 | \
+// RUN:  FileCheck -check-prefix=CHECK_TLS_SYSTEMZ %s
+// CHECK_TLS_SYSTEMZ: "-cc1" {{.*}}"-mstack-protector-guard=tls"
+
+// RUN: %clang -### -target systemz-unknown-elf -mstack-protector-guard=global 
%s 2>&1 | \
+// RUN:  FileCheck -check-prefix=CHECK_GLOBAL_SYSTEMZ %s
+// CHECK_GLOBAL_SYSTEMZ: "-cc1" {{.*}}"-mstack-protector-guard=global"
+
+// RUN: %clang -### -target systemz-unknown-elf -mstack-protector-guard=global 
\
+// RUN:  -mstack-protector-guard-record %s 2>&1 | \
+// RUN:  FileCheck -check-prefix=CHECK_GLOBAL_RECORD_SYSTEMZ %s
+// CHECK_GLOBAL_RECORD_SYSTEMZ: "-cc1" {{.*}}"-mstack-protector-guard=global" 
"-mstack-protector-guard-record"
+
+// RUN: not %clang -target systemz-unknown-elf -mstack-protector-guard=tls \
+// RUN:  -mstack-protector-guard-record %s 2>&1 | \
+// RUN:  FileCheck -check-prefix=INVALID_TLS_RECORD_SYSTEMZ %s
+// INVALID_TLS_RECORD_SYSTEMZ: error: invalid argument 
'-mstack-protector-guard-record' only allowed with 
'-mstack-protector-guard=global'
diff --git a/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp 
b/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp
index f061272d3fad4..0c16ba9cda4ff 100644
--- a/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp
@@ -24,9 +24,11 @@
 #include "llvm/BinaryFormat/GOFF.h"
 #include "llvm/CodeGen/MachineModuleInfoImpls.h"
 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/IR/GlobalVariable.h"
 #include "llvm/IR/Mangler.h"
 #include "llvm/IR/Module.h"
 #include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCInstBuilder.h"
 #include "llvm/MC/MCSectionELF.h"
 #include "llvm/MC/MCStreamer.h"
@@ -213,6 +215,16 @@ SystemZAsmPrinter::AssociatedDataAreaTable::insert(const 
MachineOperand MO) {
   return insert(Sym, ADAslotType);
 }
 
+namespace {
+unsigned long getStackGuardOffset(const MachineBasicBlock *MBB) {
+  // In the TLS (default) case, AddrReg will contain the thread pointer, so we
+  // need to add 40 bytes to get the actual address of the stack guard.
+  StringRef GuardType =
+      MBB->getParent()->getFunction().getParent()->getStackProtectorGuard();
+  return (GuardType == "global") ? 0 : 40;
+}
+} // namespace
+
 void SystemZAsmPrinter::emitInstruction(const MachineInstr *MI) {
   SystemZ_MC::verifyInstructionPredicates(MI->getOpcode(),
                                           getSubtargetInfo().getFeatureBits());
@@ -740,6 +752,42 @@ void SystemZAsmPrinter::emitInstruction(const MachineInstr 
*MI) {
   case SystemZ::EH_SjLj_Setup:
     return;
 
+  case SystemZ::LOAD_STACK_GUARD: {
+    // If requested, record address of stack guard address load
+    if (MF->getFunction().hasFnAttribute("mstackprotector-guard-record"))
+      emitStackProtectorLocEntry();
+    Register AddrReg = emitLoadStackGuardAddress(MI);
+    LoweredMI = MCInstBuilder(SystemZ::LG)
+                    .addReg(AddrReg)
+                    .addImm(getStackGuardOffset(MI->getParent()))
+                    .addReg(0);
+  } break;
+
+  case SystemZ::LOAD_STACK_GUARD_ADDRESS:
+    // If requested, record address of stack guard address load
+    if (MF->getFunction().hasFnAttribute("mstackprotector-guard-record"))
+      emitStackProtectorLocEntry();
+    emitLoadStackGuardAddress(MI);
+    return;
+
+  case SystemZ::COMPARE_STACK_GUARD:
+    LoweredMI = MCInstBuilder(SystemZ::CLC)
+                    .addReg(MI->getOperand(0).getReg())
+                    .addImm(MI->getOperand(1).getImm())
+                    .addImm(8)
+                    .addReg(MI->getOperand(2).getReg())
+                    .addImm(getStackGuardOffset(MI->getParent()));
+    break;
+
+  case SystemZ::MOVE_STACK_GUARD:
+    LoweredMI = MCInstBuilder(SystemZ::MVC)
+                    .addReg(MI->getOperand(0).getReg())
+                    .addImm(MI->getOperand(1).getImm())
+                    .addImm(8)
+                    .addReg(MI->getOperand(2).getReg())
+                    .addImm(getStackGuardOffset(MI->getParent()));
+    break;
+
   default:
     Lower.lower(MI, LoweredMI);
     break;
@@ -747,6 +795,85 @@ void SystemZAsmPrinter::emitInstruction(const MachineInstr 
*MI) {
   EmitToStreamer(*OutStreamer, LoweredMI);
 }
 
+void SystemZAsmPrinter::emitStackProtectorLocEntry() {
+  MCSymbol *Sym = OutContext.createTempSymbol();
+  OutStreamer->pushSection();
+  OutStreamer->switchSection(OutContext.getELFSection(
+      "__stack_protector_loc", ELF::SHT_PROGBITS, ELF::SHF_ALLOC));
+  OutStreamer->emitSymbolValue(Sym, getDataLayout().getPointerSize());
+  OutStreamer->popSection();
+  OutStreamer->emitLabel(Sym);
+}
+
+// Emit the stack guard address load, depending on guard type.
+// Return the register the stack guard address was loaded into.
+Register SystemZAsmPrinter::emitLoadStackGuardAddress(const MachineInstr *MI) {
+  const MachineBasicBlock *MBB = MI->getParent();
+  const MachineFunction &MF = *MBB->getParent();
+  const Register AddrReg = MI->getOperand(0).getReg();
+  const MCRegisterInfo &MRI = *TM.getMCRegisterInfo();
+  const Register Reg32 = MRI.getSubReg(AddrReg, SystemZ::subreg_l32);
+
+  const Module *M = MF.getFunction().getParent();
+  StringRef GuardType = M->getStackProtectorGuard();
+
+  if (GuardType.empty() || (GuardType == "tls")) {
+    // EAR can only load the low subregister so use a shift for %a0 to produce
+    // the GR containing %a0 and %a1.
+
+    // ear <reg>, %a0
+    MCInst EAR1 = MCInstBuilder(SystemZ::EAR)
+                      .addReg(Reg32)
+                      .addReg(SystemZ::A0)
+                      .addReg(AddrReg);
+
+    // sllg <reg>, <reg>, 32
+    MCInst SLLG = MCInstBuilder(SystemZ::SLLG)
+                      .addReg(AddrReg)
+                      .addReg(AddrReg)
+                      .addReg(0)
+                      .addImm(32);
+
+    // ear <reg>, %a1
+    MCInst EAR2 = MCInstBuilder(SystemZ::EAR)
+                      .addReg(Reg32)
+                      .addReg(SystemZ::A1)
+                      .addReg(AddrReg);
+
+    EmitToStreamer(*OutStreamer, EAR1);
+    EmitToStreamer(*OutStreamer, SLLG);
+    EmitToStreamer(*OutStreamer, EAR2);
+  } else if (GuardType == "global") {
+    // Obtain the global value.
+    const auto *GV = M->getGlobalVariable(
+        "__stack_chk_guard", PointerType::getUnqual(M->getContext()));
+    assert(GV &&
+           "could not create reference to global variable __stack_chk_guard");
+    auto *Sym = TM.getSymbol(GV);
+    // Ref->
+    // Emit the address load.
+    MCInst Load;
+    if (M->getPICLevel() == PICLevel::NotPIC) {
+      Load = MCInstBuilder(SystemZ::LARL)
+                 .addReg(AddrReg)
+                 .addExpr(MCSymbolRefExpr::create(Sym, OutContext));
+    } else {
+      Load =
+          MCInstBuilder(SystemZ::LGRL)
+              .addReg(AddrReg)
+              .addExpr(MCSymbolRefExpr::create(Sym, SystemZ::S_GOT, 
OutContext))
+              .addExpr(getGlobalOffsetTable(OutContext));
+    }
+    EmitToStreamer(*OutStreamer, Load);
+  } else {
+    llvm_unreachable(
+        (Twine("Unknown stack protector type \"") + GuardType + "\"")
+            .str()
+            .c_str());
+  }
+  return AddrReg;
+}
+
 // Emit the largest nop instruction smaller than or equal to NumBytes
 // bytes.  Return the size of nop emitted.
 static unsigned EmitNop(MCContext &OutContext, MCStreamer &OutStreamer,
diff --git a/llvm/lib/Target/SystemZ/SystemZAsmPrinter.h 
b/llvm/lib/Target/SystemZ/SystemZAsmPrinter.h
index cb101e472824f..22e82a691be64 100644
--- a/llvm/lib/Target/SystemZ/SystemZAsmPrinter.h
+++ b/llvm/lib/Target/SystemZ/SystemZAsmPrinter.h
@@ -139,6 +139,8 @@ class LLVM_LIBRARY_VISIBILITY SystemZAsmPrinter : public 
AsmPrinter {
   void LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr &MI,
                                      SystemZMCInstLower &Lower);
   void LowerPATCHABLE_RET(const MachineInstr &MI, SystemZMCInstLower &Lower);
+  Register emitLoadStackGuardAddress(const MachineInstr *MI);
+  void emitStackProtectorLocEntry();
   void emitAttributes(Module &M);
 };
 } // end namespace llvm
diff --git a/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp 
b/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
index a05fdc74e6366..fa1daa8bf8c54 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
@@ -10,10 +10,11 @@
 //
 
//===----------------------------------------------------------------------===//
 
-#include "SystemZTargetMachine.h"
 #include "SystemZISelLowering.h"
+#include "SystemZTargetMachine.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/IR/Module.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/KnownBits.h"
 #include "llvm/Support/raw_ostream.h"
@@ -369,7 +370,11 @@ class SystemZDAGToDAGISel : public SelectionDAGISel {
       if (F.hasFnAttribute("mrecord-mcount"))
         report_fatal_error("mrecord-mcount only supported with fentry-call");
     }
-
+    if (F.getParent()->getStackProtectorGuard() != "global") {
+      if (F.hasFnAttribute("mstack-protector-guard-record"))
+        report_fatal_error("mstack-protector-guard-record only supported with "
+                           "mstack-protector-guard=global");
+    }
     Subtarget = &MF.getSubtarget<SystemZSubtarget>();
     return SelectionDAGISel::runOnMachineFunction(MF);
   }
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp 
b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
index 2511d08a6d0ef..99dcb3906f4a7 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -14,12 +14,13 @@
 #include "SystemZCallingConv.h"
 #include "SystemZConstantPoolValue.h"
 #include "SystemZMachineFunctionInfo.h"
+#include "SystemZRegisterInfo.h"
 #include "SystemZTargetMachine.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/CodeGen/CallingConvLower.h"
 #include "llvm/CodeGen/ISDOpcodes.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
 #include "llvm/IR/GlobalAlias.h"
 #include "llvm/IR/IntrinsicInst.h"
@@ -8054,6 +8055,25 @@ SDValue SystemZTargetLowering::combineSTORE(
                                SN->getMemOperand());
     }
   }
+
+  // combine STORE (LOAD_STACK_GUARD) into MOVE_STACK_GUARD
+  if (Op1->isMachineOpcode() &&
+      (Op1->getMachineOpcode() == SystemZ::LOAD_STACK_GUARD)) {
+    // If so, create a MOVE_STACK_GUARD node to replace the store,
+    // and a LOAD_STACK_GUARD_ADDRESS to replace the LOAD_STACK_GUARD
+    MachineSDNode *LoadAddr = DAG.getMachineNode(
+        SystemZ::LOAD_STACK_GUARD_ADDRESS, SDLoc(SN), MVT::i64);
+    int FI = cast<FrameIndexSDNode>(SN->getOperand(2))->getIndex();
+    // FrameIndex, Dummy Displacement
+    SDValue Ops[] = {DAG.getTargetFrameIndex(FI, MVT::i64),
+                     DAG.getTargetConstant(0, SDLoc(SN), MVT::i64),
+                     SDValue(LoadAddr, 0), SN->getChain()};
+    MachineSDNode *Move = DAG.getMachineNode(SystemZ::MOVE_STACK_GUARD,
+                                             SDLoc(SN), MVT::Other, Ops);
+
+    return SDValue(Move, 0);
+  }
+
   // Combine STORE (BSWAP) into STRVH/STRV/STRVG/VSTBR
   if (!SN->isTruncatingStore() &&
       Op1.getOpcode() == ISD::BSWAP &&
@@ -8873,25 +8893,103 @@ 
SystemZTargetLowering::getJumpConditionMergingParams(Instruction::BinaryOps Opc,
   return {-1, -1, -1};
 }
 
+namespace {
+bool isStackGuardCheck(SDNode const *N, int &FI, SDValue &InChain,
+                       SDValue &OutChain, SDValue &StackGuardLoad,
+                       SystemZTargetLowering::DAGCombinerInfo &DCI) {
+  auto Comp = N->getOperand(4);
+  if (Comp->getOpcode() != SystemZISD::ICMP)
+    return false;
+
+  if (!Comp->hasOneUse())
+    return false;
+
+  SDValue LHS = Comp->getOperand(0);
+  SDValue RHS = Comp->getOperand(1);
+  LoadSDNode *FILoad;
+
+  if (LHS.isMachineOpcode() &&
+      LHS.getMachineOpcode() == SystemZ::LOAD_STACK_GUARD &&
+      ISD::isNormalLoad(RHS.getNode()) &&
+      dyn_cast<FrameIndexSDNode>(RHS.getOperand(1))) {
+    StackGuardLoad = LHS;
+    FILoad = cast<LoadSDNode>(RHS);
+  } else if ((RHS.isMachineOpcode() &&
+              RHS.getMachineOpcode() == SystemZ::LOAD_STACK_GUARD &&
+              ISD::isNormalLoad(LHS.getNode()) &&
+              dyn_cast<FrameIndexSDNode>(LHS.getOperand(1)))) {
+    StackGuardLoad = RHS;
+    FILoad = cast<LoadSDNode>(LHS);
+  } else
+    return false;
+
+  // Assert that the values of the loads are not used elsewhere.
+  // Bail for now. TODO: What is the proper response here?
+  assert(
+      SDValue(FILoad, 0).hasOneUse() &&
+      "Value of stackguard loaded from stack must be used for compare only!");
+  assert(StackGuardLoad.hasOneUse() &&
+         "Value of reference stackguard must be used for compare only!");
+
+  FI = cast<FrameIndexSDNode>(FILoad->getOperand(1))->getIndex();
+  InChain = FILoad->getChain();
+  OutChain = SDValue(FILoad, 1);
+  DCI.AddToWorklist(FILoad);
+  DCI.AddToWorklist(Comp.getNode());
+  return true;
+}
+} // namespace
+
 SDValue SystemZTargetLowering::combineBR_CCMASK(SDNode *N,
                                                 DAGCombinerInfo &DCI) const {
   SelectionDAG &DAG = DCI.DAG;
 
-  // Combine BR_CCMASK (ICMP (SELECT_CCMASK)) into a single BR_CCMASK.
   auto *CCValid = dyn_cast<ConstantSDNode>(N->getOperand(1));
   auto *CCMask = dyn_cast<ConstantSDNode>(N->getOperand(2));
   if (!CCValid || !CCMask)
     return SDValue();
-
   int CCValidVal = CCValid->getZExtValue();
   int CCMaskVal = CCMask->getZExtValue();
   SDValue Chain = N->getOperand(0);
   SDValue CCReg = N->getOperand(4);
+  SDLoc DL(N);
+
+  // Combine BR_CCMASK (ICMP (Load FI, Load StackGuard)) into BRC
+  // (COMPARE_STACK_GUARD)
+  int FI = 0;
+  SDValue InChain, OutChain, StackGuardLoad;
+  if (isStackGuardCheck(N, FI, InChain, OutChain, StackGuardLoad, DCI)) {
+    // Sanity Checks
+    assert(CCMaskVal == SystemZ::CCMASK_CMP_NE &&
+           "Unexpected branch condition in stack guard check");
+    // Handle the load's chain if necessary
+    DAG.ReplaceAllUsesOfValueWith(OutChain, InChain);
+
+    // Construct the LOAD_STACK_GUARD_ADDRESS node to replace LOAD_STACK_GUARD
+    auto *LoadAddress =
+        DAG.getMachineNode(SystemZ::LOAD_STACK_GUARD_ADDRESS, DL, MVT::i64);
+
+    // Construct the COMPARE_STACK_GUARD node
+    SDVTList CmpVTs = DAG.getVTList(MVT::Other, MVT::Glue);
+    auto CompOps = {DAG.getTargetFrameIndex(FI, MVT::i64),
+                    DAG.getTargetConstant(0, DL, MVT::i64),
+                    SDValue(LoadAddress, 0), InChain};
+    auto *Compare =
+        DAG.getMachineNode(SystemZ::COMPARE_STACK_GUARD, DL, CmpVTs, CompOps);
+    // Construct the BRC node using COMPARE_STACK_GUARD's CC result
+    auto BranchOps = {DAG.getTargetConstant(CCValidVal, DL, MVT::i32),
+                      DAG.getTargetConstant(CCMaskVal, DL, MVT::i32),
+                      N->getOperand(3), SDValue(Compare, 0),
+                      SDValue(Compare, 1)};
+    return SDValue(DAG.getMachineNode(SystemZ::BRC, DL, MVT::Other, BranchOps),
+                   0);
+  }
+
+  // Combine BR_CCMASK (ICMP (SELECT_CCMASK)) into a single BR_CCMASK.
   if (combineCCMask(CCReg, CCValidVal, CCMaskVal, DAG))
-    return DAG.getNode(SystemZISD::BR_CCMASK, SDLoc(N), N->getValueType(0),
-                       Chain,
-                       DAG.getTargetConstant(CCValidVal, SDLoc(N), MVT::i32),
-                       DAG.getTargetConstant(CCMaskVal, SDLoc(N), MVT::i32),
+    return DAG.getNode(SystemZISD::BR_CCMASK, DL, N->getValueType(0), Chain,
+                       DAG.getTargetConstant(CCValidVal, DL, MVT::i32),
+                       DAG.getTargetConstant(CCMaskVal, DL, MVT::i32),
                        N->getOperand(3), CCReg);
   return SDValue();
 }
@@ -9298,6 +9396,8 @@ SDValue SystemZTargetLowering::PerformDAGCombine(SDNode 
*N,
   case SystemZISD::BR_CCMASK:   return combineBR_CCMASK(N, DCI);
   case SystemZISD::SELECT_CCMASK: return combineSELECT_CCMASK(N, DCI);
   case SystemZISD::GET_CCMASK:  return combineGET_CCMASK(N, DCI);
+  // case SystemZISD::ICMP:
+  //   return combineICMP(N, DCI);
   case ISD::SRL:
   case ISD::SRA:                return combineShiftToMulAddHigh(N, DCI);
   case ISD::MUL:                return combineMUL(N, DCI);
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.h 
b/llvm/lib/Target/SystemZ/SystemZISelLowering.h
index 13a1cd1614a53..60a08fef01df2 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.h
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.h
@@ -220,8 +220,6 @@ class SystemZTargetLowering : public TargetLowering {
 
   /// Override to support customized stack guard loading.
   bool useLoadStackGuardNode(const Module &M) const override { return true; }
-  void insertSSPDeclarations(Module &M) const override {
-  }
 
   MachineBasicBlock *
   EmitInstrWithCustomInserter(MachineInstr &MI,
diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp 
b/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
index db4f9a15d6497..662ec5353cc93 100644
--- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
@@ -33,6 +33,7 @@
 #include "llvm/CodeGen/TargetOpcodes.h"
 #include "llvm/CodeGen/TargetSubtargetInfo.h"
 #include "llvm/CodeGen/VirtRegMap.h"
+#include "llvm/IR/Module.h"
 #include "llvm/MC/MCInstrDesc.h"
 #include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/Support/BranchProbability.h"
@@ -228,35 +229,6 @@ void SystemZInstrInfo::expandZExtPseudo(MachineInstr &MI, 
unsigned LowOpcode,
   MI.eraseFromParent();
 }
 
-void SystemZInstrInfo::expandLoadStackGuard(MachineInstr *MI) const {
-  MachineBasicBlock *MBB = MI->getParent();
-  MachineFunction &MF = *MBB->getParent();
-  const Register Reg64 = MI->getOperand(0).getReg();
-  const Register Reg32 = RI.getSubReg(Reg64, SystemZ::subreg_l32);
-
-  // EAR can only load the low subregister so us a shift for %a0 to produce
-  // the GR containing %a0 and %a1.
-
-  // ear <reg>, %a0
-  BuildMI(*MBB, MI, MI->getDebugLoc(), get(SystemZ::EAR), Reg32)
-    .addReg(SystemZ::A0)
-    .addReg(Reg64, RegState::ImplicitDefine);
-
-  // sllg <reg>, <reg>, 32
-  BuildMI(*MBB, MI, MI->getDebugLoc(), get(SystemZ::SLLG), Reg64)
-    .addReg(Reg64)
-    .addReg(0)
-    .addImm(32);
-
-  // ear <reg>, %a1
-  BuildMI(*MBB, MI, MI->getDebugLoc(), get(SystemZ::EAR), Reg32)
-    .addReg(SystemZ::A1);
-
-  // lg <reg>, 40(<reg>)
-  MI->setDesc(get(SystemZ::LG));
-  MachineInstrBuilder(MF, MI).addReg(Reg64).addImm(40).addReg(0);
-}
-
 // Emit a zero-extending move from 32-bit GPR SrcReg to 32-bit GPR
 // DestReg before MBBI in MBB.  Use LowLowOpcode when both DestReg and SrcReg
 // are low registers, otherwise use RISB[LH]G.  Size is the number of bits
@@ -1056,8 +1028,7 @@ void 
SystemZInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
 // and no index.  Flag is SimpleBDXLoad for loads and SimpleBDXStore for 
stores.
 static bool isSimpleBD12Move(const MachineInstr *MI, unsigned Flag) {
   const MCInstrDesc &MCID = MI->getDesc();
-  return ((MCID.TSFlags & Flag) &&
-          isUInt<12>(MI->getOperand(2).getImm()) &&
+  return ((MCID.TSFlags & Flag) && isUInt<12>(MI->getOperand(2).getImm()) &&
           MI->getOperand(3).getReg() == 0);
 }
 
@@ -1805,10 +1776,6 @@ bool SystemZInstrInfo::expandPostRAPseudo(MachineInstr 
&MI) const {
     splitAdjDynAlloc(MI);
     return true;
 
-  case TargetOpcode::LOAD_STACK_GUARD:
-    expandLoadStackGuard(&MI);
-    return true;
-
   default:
     return false;
   }
@@ -1830,6 +1797,28 @@ unsigned SystemZInstrInfo::getInstSizeInBytes(const 
MachineInstr &MI) const {
     return 18;
   if (MI.getOpcode() == TargetOpcode::PATCHABLE_RET)
     return 18 + (MI.getOperand(0).getImm() == SystemZ::CondReturn ? 4 : 0);
+  if ((MI.getOpcode() == SystemZ::MOVE_STACK_GUARD) ||
+      (MI.getOpcode() == SystemZ::COMPARE_STACK_GUARD))
+    return 6;
+  if ((MI.getOpcode() == SystemZ::LOAD_STACK_GUARD_ADDRESS) ||
+      (MI.getOpcode() == TargetOpcode::LOAD_STACK_GUARD)) {
+    StringRef GuardType = MI.getParent()
+                              ->getParent()
+                              ->getFunction()
+                              .getParent()
+                              ->getStackProtectorGuard();
+    unsigned Size = (MI.getOpcode() == TargetOpcode::LOAD_STACK_GUARD)
+                        ? 6
+                        : 0; // lg to load value
+    if (GuardType == "global")
+      return Size + 6; // larl/lgrl
+    if (GuardType.empty() || GuardType == "tls")
+      return Size + 14; // ear,sllg,ear
+    llvm_unreachable(
+        (Twine("Unknown stack protector type \"") + GuardType + "\"")
+            .str()
+            .c_str());
+  }
 
   return MI.getDesc().getSize();
 }
diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.h 
b/llvm/lib/Target/SystemZ/SystemZInstrInfo.h
index 9fadf7bfb6d2b..3e138cd1dec0c 100644
--- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.h
+++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.h
@@ -193,7 +193,6 @@ class SystemZInstrInfo : public SystemZGenInstrInfo {
                        unsigned HighOpcode) const;
   void expandZExtPseudo(MachineInstr &MI, unsigned LowOpcode,
                         unsigned Size) const;
-  void expandLoadStackGuard(MachineInstr *MI) const;
 
   MachineInstrBuilder
   emitGRX32Move(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.td 
b/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
index 4f75e0132610e..57f399045e4d2 100644
--- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
+++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
@@ -521,6 +521,19 @@ let SimpleBDXStore = 1, mayStore = 1 in {
                        [(store GR128:$src, bdxaddr20only128:$dst)]>;
   }
 }
+
+let hasNoSchedulingInfo = 1, hasSideEffects = 1 in {
+  // LOAD_STACK_GUARD_ADDRESS may not Load, because it has no (official)
+  // operands.
+  def LOAD_STACK_GUARD_ADDRESS : Pseudo<(outs ADDR64:$grdaddr), (ins), []>;
+  let mayLoad = 1 in {
+    let mayStore = 1 in def MOVE_STACK_GUARD
+        : Pseudo<(outs), (ins bdaddr12only:$grdloc, ADDR64:$grdaddr), []>;
+    let Defs = [CC] in def COMPARE_STACK_GUARD
+        : Pseudo<(outs), (ins bdaddr12only:$grdloc, ADDR64:$grdaddr), []>;
+  }
+}
+
 def STRL  : StoreRILPC<"strl", 0xC4F, aligned_store, GR32>;
 def STGRL : StoreRILPC<"stgrl", 0xC4B, aligned_store, GR64>;
 
diff --git a/llvm/test/CodeGen/SystemZ/stack-guard-global-nopic.ll 
b/llvm/test/CodeGen/SystemZ/stack-guard-global-nopic.ll
new file mode 100644
index 0000000000000..d1d98537c1df2
--- /dev/null
+++ b/llvm/test/CodeGen/SystemZ/stack-guard-global-nopic.ll
@@ -0,0 +1,157 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 
UTC_ARGS: --version 6
+; RUN: llc < %s -mtriple=s390x-linux-gnu -verify-machineinstrs | FileCheck %s
+
+define i32 @test_global_stack_guard() #0 {
+; CHECK-LABEL: test_global_stack_guard:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    stmg %r14, %r15, 112(%r15)
+; CHECK-NEXT:    .cfi_offset %r14, -48
+; CHECK-NEXT:    .cfi_offset %r15, -40
+; CHECK-NEXT:    aghi %r15, -1192
+; CHECK-NEXT:    .cfi_def_cfa_offset 1352
+; CHECK-NEXT:    .section __stack_protector_loc,"a",@progbits
+; CHECK-NEXT:    .quad .Ltmp0
+; CHECK-NEXT:    .text
+; CHECK-NEXT:  .Ltmp0:
+; CHECK-NEXT:    larl %r1, __stack_chk_guard
+; CHECK-NEXT:    mvc 1184(8,%r15), 0(%r1)
+; CHECK-NEXT:    la %r2, 160(%r15)
+; CHECK-NEXT:    brasl %r14, foo3@PLT
+; CHECK-NEXT:    .section __stack_protector_loc,"a",@progbits
+; CHECK-NEXT:    .quad .Ltmp1
+; CHECK-NEXT:    .text
+; CHECK-NEXT:  .Ltmp1:
+; CHECK-NEXT:    larl %r1, __stack_chk_guard
+; CHECK-NEXT:    clc 1184(8,%r15), 0(%r1)
+; CHECK-NEXT:    jlh .LBB0_2
+; CHECK-NEXT:  # %bb.1: # %entry
+; CHECK-NEXT:    lhi %r2, 0
+; CHECK-NEXT:    lmg %r14, %r15, 1304(%r15)
+; CHECK-NEXT:    br %r14
+; CHECK-NEXT:  .LBB0_2: # %entry
+; CHECK-NEXT:    brasl %r14, __stack_chk_fail@PLT
+entry:
+  %a1 = alloca [256 x i32], align 4
+  call void @foo3(ptr %a1)
+  ret i32 0
+}
+
+define i32 @test_global_stack_guard_branch(i32 %in) #0 {
+; CHECK-LABEL: test_global_stack_guard_branch:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    stmg %r13, %r15, 104(%r15)
+; CHECK-NEXT:    .cfi_offset %r13, -56
+; CHECK-NEXT:    .cfi_offset %r14, -48
+; CHECK-NEXT:    .cfi_offset %r15, -40
+; CHECK-NEXT:    aghi %r15, -1192
+; CHECK-NEXT:    .cfi_def_cfa_offset 1352
+; CHECK-NEXT:    .section __stack_protector_loc,"a",@progbits
+; CHECK-NEXT:    .quad .Ltmp2
+; CHECK-NEXT:    .text
+; CHECK-NEXT:  .Ltmp2:
+; CHECK-NEXT:    larl %r1, __stack_chk_guard
+; CHECK-NEXT:    mvc 1184(8,%r15), 0(%r1)
+; CHECK-NEXT:    lr %r13, %r2
+; CHECK-NEXT:    la %r2, 160(%r15)
+; CHECK-NEXT:    brasl %r14, foo3@PLT
+; CHECK-NEXT:    cije %r13, 1, .LBB1_4
+; CHECK-NEXT:  # %bb.1: # %entry
+; CHECK-NEXT:    cijlh %r13, 0, .LBB1_6
+; CHECK-NEXT:  # %bb.2: # %foo
+; CHECK-NEXT:    .section __stack_protector_loc,"a",@progbits
+; CHECK-NEXT:    .quad .Ltmp3
+; CHECK-NEXT:    .text
+; CHECK-NEXT:  .Ltmp3:
+; CHECK-NEXT:    larl %r1, __stack_chk_guard
+; CHECK-NEXT:    clc 1184(8,%r15), 0(%r1)
+; CHECK-NEXT:    jlh .LBB1_8
+; CHECK-NEXT:  # %bb.3: # %foo
+; CHECK-NEXT:    lhi %r2, 0
+; CHECK-NEXT:    lmg %r13, %r15, 1296(%r15)
+; CHECK-NEXT:    br %r14
+; CHECK-NEXT:  .LBB1_4: # %bar
+; CHECK-NEXT:    .section __stack_protector_loc,"a",@progbits
+; CHECK-NEXT:    .quad .Ltmp4
+; CHECK-NEXT:    .text
+; CHECK-NEXT:  .Ltmp4:
+; CHECK-NEXT:    larl %r1, __stack_chk_guard
+; CHECK-NEXT:    clc 1184(8,%r15), 0(%r1)
+; CHECK-NEXT:    jlh .LBB1_8
+; CHECK-NEXT:  # %bb.5: # %bar
+; CHECK-NEXT:    lhi %r2, 1
+; CHECK-NEXT:    lmg %r13, %r15, 1296(%r15)
+; CHECK-NEXT:    br %r14
+; CHECK-NEXT:  .LBB1_6: # %else
+; CHECK-NEXT:    .section __stack_protector_loc,"a",@progbits
+; CHECK-NEXT:    .quad .Ltmp5
+; CHECK-NEXT:    .text
+; CHECK-NEXT:  .Ltmp5:
+; CHECK-NEXT:    larl %r1, __stack_chk_guard
+; CHECK-NEXT:    clc 1184(8,%r15), 0(%r1)
+; CHECK-NEXT:    jlh .LBB1_8
+; CHECK-NEXT:  # %bb.7: # %else
+; CHECK-NEXT:    lhi %r2, 2
+; CHECK-NEXT:    lmg %r13, %r15, 1296(%r15)
+; CHECK-NEXT:    br %r14
+; CHECK-NEXT:  .LBB1_8: # %bar
+; CHECK-NEXT:    brasl %r14, __stack_chk_fail@PLT
+entry:
+  %a1 = alloca [256 x i32], align 4
+  call void @foo3(ptr %a1)
+  switch i32 %in, label %else [
+    i32 0, label %foo
+    i32 1, label %bar
+  ]
+foo:
+  ret i32 0
+bar:
+  ret i32 1
+else:
+  ret i32 2
+}
+
+define i32 @test_global_stack_guard_large() #0 {
+; CHECK-LABEL: test_global_stack_guard_large:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    stmg %r14, %r15, 112(%r15)
+; CHECK-NEXT:    .cfi_offset %r14, -48
+; CHECK-NEXT:    .cfi_offset %r15, -40
+; CHECK-NEXT:    aghi %r15, -8376
+; CHECK-NEXT:    .cfi_def_cfa_offset 8536
+; CHECK-NEXT:    .section __stack_protector_loc,"a",@progbits
+; CHECK-NEXT:    .quad .Ltmp6
+; CHECK-NEXT:    .text
+; CHECK-NEXT:  .Ltmp6:
+; CHECK-NEXT:    larl %r1, __stack_chk_guard
+; CHECK-NEXT:    lay %r2, 8192(%r15)
+; CHECK-NEXT:    mvc 176(8,%r2), 0(%r1)
+; CHECK-NEXT:    la %r2, 176(%r15)
+; CHECK-NEXT:    brasl %r14, foo3@PLT
+; CHECK-NEXT:    .section __stack_protector_loc,"a",@progbits
+; CHECK-NEXT:    .quad .Ltmp7
+; CHECK-NEXT:    .text
+; CHECK-NEXT:  .Ltmp7:
+; CHECK-NEXT:    larl %r1, __stack_chk_guard
+; CHECK-NEXT:    lay %r2, 8192(%r15)
+; CHECK-NEXT:    clc 176(8,%r2), 0(%r1)
+; CHECK-NEXT:    jlh .LBB2_2
+; CHECK-NEXT:  # %bb.1: # %entry
+; CHECK-NEXT:    lhi %r2, 0
+; CHECK-NEXT:    lmg %r14, %r15, 8488(%r15)
+; CHECK-NEXT:    br %r14
+; CHECK-NEXT:  .LBB2_2: # %entry
+; CHECK-NEXT:    brasl %r14, __stack_chk_fail@PLT
+entry:
+  %a1 = alloca [2048 x i32], align 4
+  call void @foo3(ptr %a1)
+  ret i32 0
+}
+
+
+declare void @foo3(ptr)
+
+attributes #0 = { sspstrong "mstackprotector-guard-record" }
+
+
+!llvm.module.flags = !{!0}
+!0 = !{i32 1, !"stack-protector-guard", !"global"}
diff --git a/llvm/test/CodeGen/SystemZ/stack-guard-global-pic.ll 
b/llvm/test/CodeGen/SystemZ/stack-guard-global-pic.ll
new file mode 100644
index 0000000000000..fe8b6a7e4214d
--- /dev/null
+++ b/llvm/test/CodeGen/SystemZ/stack-guard-global-pic.ll
@@ -0,0 +1,159 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 
UTC_ARGS: --version 6
+; RUN: llc < %s -mtriple=s390x-linux-gnu -verify-machineinstrs | FileCheck %s
+
+define i32 @test_global_stack_guard() #0 {
+; CHECK-LABEL: test_global_stack_guard:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    stmg %r14, %r15, 112(%r15)
+; CHECK-NEXT:    .cfi_offset %r14, -48
+; CHECK-NEXT:    .cfi_offset %r15, -40
+; CHECK-NEXT:    aghi %r15, -1192
+; CHECK-NEXT:    .cfi_def_cfa_offset 1352
+; CHECK-NEXT:    .section __stack_protector_loc,"a",@progbits
+; CHECK-NEXT:    .quad .Ltmp0
+; CHECK-NEXT:    .text
+; CHECK-NEXT:  .Ltmp0:
+; CHECK-NEXT:    lgrl %r1, __stack_chk_guard@GOT
+; CHECK-NEXT:    mvc 1184(8,%r15), 0(%r1)
+; CHECK-NEXT:    la %r2, 160(%r15)
+; CHECK-NEXT:    brasl %r14, foo3@PLT
+; CHECK-NEXT:    .section __stack_protector_loc,"a",@progbits
+; CHECK-NEXT:    .quad .Ltmp1
+; CHECK-NEXT:    .text
+; CHECK-NEXT:  .Ltmp1:
+; CHECK-NEXT:    lgrl %r1, __stack_chk_guard@GOT
+; CHECK-NEXT:    clc 1184(8,%r15), 0(%r1)
+; CHECK-NEXT:    jlh .LBB0_2
+; CHECK-NEXT:  # %bb.1: # %entry
+; CHECK-NEXT:    lhi %r2, 0
+; CHECK-NEXT:    lmg %r14, %r15, 1304(%r15)
+; CHECK-NEXT:    br %r14
+; CHECK-NEXT:  .LBB0_2: # %entry
+; CHECK-NEXT:    brasl %r14, __stack_chk_fail@PLT
+entry:
+  %a1 = alloca [256 x i32], align 4
+  call void @foo3(ptr %a1)
+  ret i32 0
+}
+
+define i32 @test_global_stack_guard_branch(i32 %in) #0 {
+; CHECK-LABEL: test_global_stack_guard_branch:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    stmg %r13, %r15, 104(%r15)
+; CHECK-NEXT:    .cfi_offset %r13, -56
+; CHECK-NEXT:    .cfi_offset %r14, -48
+; CHECK-NEXT:    .cfi_offset %r15, -40
+; CHECK-NEXT:    aghi %r15, -1192
+; CHECK-NEXT:    .cfi_def_cfa_offset 1352
+; CHECK-NEXT:    .section __stack_protector_loc,"a",@progbits
+; CHECK-NEXT:    .quad .Ltmp2
+; CHECK-NEXT:    .text
+; CHECK-NEXT:  .Ltmp2:
+; CHECK-NEXT:    lgrl %r1, __stack_chk_guard@GOT
+; CHECK-NEXT:    mvc 1184(8,%r15), 0(%r1)
+; CHECK-NEXT:    lr %r13, %r2
+; CHECK-NEXT:    la %r2, 160(%r15)
+; CHECK-NEXT:    brasl %r14, foo3@PLT
+; CHECK-NEXT:    cije %r13, 1, .LBB1_4
+; CHECK-NEXT:  # %bb.1: # %entry
+; CHECK-NEXT:    cijlh %r13, 0, .LBB1_6
+; CHECK-NEXT:  # %bb.2: # %foo
+; CHECK-NEXT:    .section __stack_protector_loc,"a",@progbits
+; CHECK-NEXT:    .quad .Ltmp3
+; CHECK-NEXT:    .text
+; CHECK-NEXT:  .Ltmp3:
+; CHECK-NEXT:    lgrl %r1, __stack_chk_guard@GOT
+; CHECK-NEXT:    clc 1184(8,%r15), 0(%r1)
+; CHECK-NEXT:    jlh .LBB1_8
+; CHECK-NEXT:  # %bb.3: # %foo
+; CHECK-NEXT:    lhi %r2, 0
+; CHECK-NEXT:    lmg %r13, %r15, 1296(%r15)
+; CHECK-NEXT:    br %r14
+; CHECK-NEXT:  .LBB1_4: # %bar
+; CHECK-NEXT:    .section __stack_protector_loc,"a",@progbits
+; CHECK-NEXT:    .quad .Ltmp4
+; CHECK-NEXT:    .text
+; CHECK-NEXT:  .Ltmp4:
+; CHECK-NEXT:    lgrl %r1, __stack_chk_guard@GOT
+; CHECK-NEXT:    clc 1184(8,%r15), 0(%r1)
+; CHECK-NEXT:    jlh .LBB1_8
+; CHECK-NEXT:  # %bb.5: # %bar
+; CHECK-NEXT:    lhi %r2, 1
+; CHECK-NEXT:    lmg %r13, %r15, 1296(%r15)
+; CHECK-NEXT:    br %r14
+; CHECK-NEXT:  .LBB1_6: # %else
+; CHECK-NEXT:    .section __stack_protector_loc,"a",@progbits
+; CHECK-NEXT:    .quad .Ltmp5
+; CHECK-NEXT:    .text
+; CHECK-NEXT:  .Ltmp5:
+; CHECK-NEXT:    lgrl %r1, __stack_chk_guard@GOT
+; CHECK-NEXT:    clc 1184(8,%r15), 0(%r1)
+; CHECK-NEXT:    jlh .LBB1_8
+; CHECK-NEXT:  # %bb.7: # %else
+; CHECK-NEXT:    lhi %r2, 2
+; CHECK-NEXT:    lmg %r13, %r15, 1296(%r15)
+; CHECK-NEXT:    br %r14
+; CHECK-NEXT:  .LBB1_8: # %bar
+; CHECK-NEXT:    brasl %r14, __stack_chk_fail@PLT
+entry:
+  %a1 = alloca [256 x i32], align 4
+  call void @foo3(ptr %a1)
+  switch i32 %in, label %else [
+    i32 0, label %foo
+    i32 1, label %bar
+  ]
+foo:
+  ret i32 0
+bar:
+  ret i32 1
+else:
+  ret i32 2
+}
+
+
+define i32 @test_global_stack_guard_large() #0 {
+; CHECK-LABEL: test_global_stack_guard_large:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    stmg %r14, %r15, 112(%r15)
+; CHECK-NEXT:    .cfi_offset %r14, -48
+; CHECK-NEXT:    .cfi_offset %r15, -40
+; CHECK-NEXT:    aghi %r15, -8376
+; CHECK-NEXT:    .cfi_def_cfa_offset 8536
+; CHECK-NEXT:    .section __stack_protector_loc,"a",@progbits
+; CHECK-NEXT:    .quad .Ltmp6
+; CHECK-NEXT:    .text
+; CHECK-NEXT:  .Ltmp6:
+; CHECK-NEXT:    lgrl %r1, __stack_chk_guard@GOT
+; CHECK-NEXT:    lay %r2, 8192(%r15)
+; CHECK-NEXT:    mvc 176(8,%r2), 0(%r1)
+; CHECK-NEXT:    la %r2, 176(%r15)
+; CHECK-NEXT:    brasl %r14, foo3@PLT
+; CHECK-NEXT:    .section __stack_protector_loc,"a",@progbits
+; CHECK-NEXT:    .quad .Ltmp7
+; CHECK-NEXT:    .text
+; CHECK-NEXT:  .Ltmp7:
+; CHECK-NEXT:    lgrl %r1, __stack_chk_guard@GOT
+; CHECK-NEXT:    lay %r2, 8192(%r15)
+; CHECK-NEXT:    clc 176(8,%r2), 0(%r1)
+; CHECK-NEXT:    jlh .LBB2_2
+; CHECK-NEXT:  # %bb.1: # %entry
+; CHECK-NEXT:    lhi %r2, 0
+; CHECK-NEXT:    lmg %r14, %r15, 8488(%r15)
+; CHECK-NEXT:    br %r14
+; CHECK-NEXT:  .LBB2_2: # %entry
+; CHECK-NEXT:    brasl %r14, __stack_chk_fail@PLT
+entry:
+  %a1 = alloca [2048 x i32], align 4
+  call void @foo3(ptr %a1)
+  ret i32 0
+}
+
+declare void @foo3(ptr)
+
+attributes #0 = { sspstrong "mstackprotector-guard-record" }
+
+
+!llvm.module.flags = !{!0, !1, !2}
+!0 = !{i32 1, !"stack-protector-guard", !"global"}
+!1 = !{i32 8, !"PIC Level", i32 2}
+!2 = !{i32 7, !"PIE Level", i32 2}
diff --git a/llvm/test/CodeGen/SystemZ/stack-guard-tls.ll 
b/llvm/test/CodeGen/SystemZ/stack-guard-tls.ll
new file mode 100644
index 0000000000000..ea5ad0d5429cb
--- /dev/null
+++ b/llvm/test/CodeGen/SystemZ/stack-guard-tls.ll
@@ -0,0 +1,135 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 
UTC_ARGS: --version 6
+; RUN: llc < %s -mtriple=s390x-linux-gnu -verify-machineinstrs | FileCheck %s
+
+define i32 @test_tls_stack_guard() #0 {
+; CHECK-LABEL: test_tls_stack_guard:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    stmg %r14, %r15, 112(%r15)
+; CHECK-NEXT:    .cfi_offset %r14, -48
+; CHECK-NEXT:    .cfi_offset %r15, -40
+; CHECK-NEXT:    aghi %r15, -1192
+; CHECK-NEXT:    .cfi_def_cfa_offset 1352
+; CHECK-NEXT:    ear %r1, %a0
+; CHECK-NEXT:    sllg %r1, %r1, 32
+; CHECK-NEXT:    ear %r1, %a1
+; CHECK-NEXT:    mvc 1184(8,%r15), 40(%r1)
+; CHECK-NEXT:    la %r2, 160(%r15)
+; CHECK-NEXT:    brasl %r14, foo3@PLT
+; CHECK-NEXT:    ear %r1, %a0
+; CHECK-NEXT:    sllg %r1, %r1, 32
+; CHECK-NEXT:    ear %r1, %a1
+; CHECK-NEXT:    clc 1184(8,%r15), 40(%r1)
+; CHECK-NEXT:    jlh .LBB0_2
+; CHECK-NEXT:  # %bb.1: # %entry
+; CHECK-NEXT:    lhi %r2, 0
+; CHECK-NEXT:    lmg %r14, %r15, 1304(%r15)
+; CHECK-NEXT:    br %r14
+; CHECK-NEXT:  .LBB0_2: # %entry
+; CHECK-NEXT:    brasl %r14, __stack_chk_fail@PLT
+entry:
+  %a1 = alloca [256 x i32], align 4
+  call void @foo3(ptr %a1)
+  ret i32 0
+}
+
+
+define i32 @test_global_stack_guard_branch(i32 %in) #0 {
+; CHECK-LABEL: test_global_stack_guard_branch:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    stmg %r13, %r15, 104(%r15)
+; CHECK-NEXT:    .cfi_offset %r13, -56
+; CHECK-NEXT:    .cfi_offset %r14, -48
+; CHECK-NEXT:    .cfi_offset %r15, -40
+; CHECK-NEXT:    aghi %r15, -1192
+; CHECK-NEXT:    .cfi_def_cfa_offset 1352
+; CHECK-NEXT:    ear %r1, %a0
+; CHECK-NEXT:    sllg %r1, %r1, 32
+; CHECK-NEXT:    ear %r1, %a1
+; CHECK-NEXT:    mvc 1184(8,%r15), 40(%r1)
+; CHECK-NEXT:    lr %r13, %r2
+; CHECK-NEXT:    la %r2, 160(%r15)
+; CHECK-NEXT:    brasl %r14, foo3@PLT
+; CHECK-NEXT:    cije %r13, 1, .LBB1_4
+; CHECK-NEXT:  # %bb.1: # %entry
+; CHECK-NEXT:    cijlh %r13, 0, .LBB1_6
+; CHECK-NEXT:  # %bb.2: # %foo
+; CHECK-NEXT:    ear %r1, %a0
+; CHECK-NEXT:    sllg %r1, %r1, 32
+; CHECK-NEXT:    ear %r1, %a1
+; CHECK-NEXT:    clc 1184(8,%r15), 40(%r1)
+; CHECK-NEXT:    jlh .LBB1_8
+; CHECK-NEXT:  # %bb.3: # %foo
+; CHECK-NEXT:    lhi %r2, 0
+; CHECK-NEXT:    lmg %r13, %r15, 1296(%r15)
+; CHECK-NEXT:    br %r14
+; CHECK-NEXT:  .LBB1_4: # %bar
+; CHECK-NEXT:    ear %r1, %a0
+; CHECK-NEXT:    sllg %r1, %r1, 32
+; CHECK-NEXT:    ear %r1, %a1
+; CHECK-NEXT:    clc 1184(8,%r15), 40(%r1)
+; CHECK-NEXT:    jlh .LBB1_8
+; CHECK-NEXT:  # %bb.5: # %bar
+; CHECK-NEXT:    lhi %r2, 1
+; CHECK-NEXT:    lmg %r13, %r15, 1296(%r15)
+; CHECK-NEXT:    br %r14
+; CHECK-NEXT:  .LBB1_6: # %else
+; CHECK-NEXT:    ear %r1, %a0
+; CHECK-NEXT:    sllg %r1, %r1, 32
+; CHECK-NEXT:    ear %r1, %a1
+; CHECK-NEXT:    clc 1184(8,%r15), 40(%r1)
+; CHECK-NEXT:    jlh .LBB1_8
+; CHECK-NEXT:  # %bb.7: # %else
+; CHECK-NEXT:    lhi %r2, 2
+; CHECK-NEXT:    lmg %r13, %r15, 1296(%r15)
+; CHECK-NEXT:    br %r14
+; CHECK-NEXT:  .LBB1_8: # %bar
+; CHECK-NEXT:    brasl %r14, __stack_chk_fail@PLT
+entry:
+  %a1 = alloca [256 x i32], align 4
+  call void @foo3(ptr %a1)
+  switch i32 %in, label %else [
+    i32 0, label %foo
+    i32 1, label %bar
+  ]
+foo:
+  ret i32 0
+bar:
+  ret i32 1
+else:
+  ret i32 2
+}
+
+define i32 @test_tls_stack_guard_large() #0 {
+; CHECK-LABEL: test_tls_stack_guard_large:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    stmg %r14, %r15, 112(%r15)
+; CHECK-NEXT:    .cfi_offset %r14, -48
+; CHECK-NEXT:    .cfi_offset %r15, -40
+; CHECK-NEXT:    aghi %r15, -1192
+; CHECK-NEXT:    .cfi_def_cfa_offset 1352
+; CHECK-NEXT:    ear %r1, %a0
+; CHECK-NEXT:    sllg %r1, %r1, 32
+; CHECK-NEXT:    ear %r1, %a1
+; CHECK-NEXT:    mvc 1184(8,%r15), 40(%r1)
+; CHECK-NEXT:    la %r2, 160(%r15)
+; CHECK-NEXT:    brasl %r14, foo3@PLT
+; CHECK-NEXT:    ear %r1, %a0
+; CHECK-NEXT:    sllg %r1, %r1, 32
+; CHECK-NEXT:    ear %r1, %a1
+; CHECK-NEXT:    clc 1184(8,%r15), 40(%r1)
+; CHECK-NEXT:    jlh .LBB2_2
+; CHECK-NEXT:  # %bb.1: # %entry
+; CHECK-NEXT:    lhi %r2, 0
+; CHECK-NEXT:    lmg %r14, %r15, 1304(%r15)
+; CHECK-NEXT:    br %r14
+; CHECK-NEXT:  .LBB2_2: # %entry
+; CHECK-NEXT:    brasl %r14, __stack_chk_fail@PLT
+entry:
+  %a1 = alloca [256 x i32], align 4
+  call void @foo3(ptr %a1)
+  ret i32 0
+}
+
+declare void @foo3(ptr)
+
+attributes #0 = { sspstrong }
diff --git a/llvm/test/CodeGen/SystemZ/stack-guard.ll 
b/llvm/test/CodeGen/SystemZ/stack-guard.ll
deleted file mode 100644
index 04a87b4632dd2..0000000000000
--- a/llvm/test/CodeGen/SystemZ/stack-guard.ll
+++ /dev/null
@@ -1,33 +0,0 @@
-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
-
-; CHECK-LABEL: @test_stack_guard
-; CHECK: ear [[REG1:%r[1-9][0-9]?]], %a0
-; CHECK: sllg [[REG1]], [[REG1]], 32
-; CHECK: ear [[REG1]], %a1
-; CHECK: lg [[REG1]], 40([[REG1]])
-; CHECK: stg [[REG1]], {{[0-9]*}}(%r15)
-; CHECK: brasl %r14, foo3@PLT
-; CHECK: ear [[REG2:%r[1-9][0-9]?]], %a0
-; CHECK: sllg [[REG2]], [[REG2]], 32
-; CHECK: ear [[REG2]], %a1
-; CHECK: lg [[REG2]], 40([[REG2]])
-; CHECK: cg [[REG2]], {{[0-9]*}}(%r15)
-
-define i32 @test_stack_guard() #0 {
-entry:
-  %a1 = alloca [256 x i32], align 4
-  call void @llvm.lifetime.start.p0(i64 1024, ptr %a1)
-  call void @foo3(ptr %a1)
-  call void @llvm.lifetime.end.p0(i64 1024, ptr %a1)
-  ret i32 0
-}
-
-; Function Attrs: nounwind
-declare void @llvm.lifetime.start.p0(i64, ptr nocapture)
-
-declare void @foo3(ptr)
-
-; Function Attrs: nounwind
-declare void @llvm.lifetime.end.p0(i64, ptr nocapture)
-
-attributes #0 = { sspstrong }

>From aa96a46ad5a2a1bfb8446f69764028dfdfac7179 Mon Sep 17 00:00:00 2001
From: Dominik Steenken <[email protected]>
Date: Fri, 5 Dec 2025 09:57:56 +0100
Subject: [PATCH 2/7] clarify comment

---
 llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp 
b/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
index 662ec5353cc93..9f8c917beb43b 100644
--- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
@@ -1808,8 +1808,8 @@ unsigned SystemZInstrInfo::getInstSizeInBytes(const 
MachineInstr &MI) const {
                               .getParent()
                               ->getStackProtectorGuard();
     unsigned Size = (MI.getOpcode() == TargetOpcode::LOAD_STACK_GUARD)
-                        ? 6
-                        : 0; // lg to load value
+                        ? 6 // lg to load value
+                        : 0;
     if (GuardType == "global")
       return Size + 6; // larl/lgrl
     if (GuardType.empty() || GuardType == "tls")

>From 43cef3093353872ac611b2ebaa5ec71490a25f08 Mon Sep 17 00:00:00 2001
From: Dominik Steenken <[email protected]>
Date: Fri, 5 Dec 2025 15:28:55 +0100
Subject: [PATCH 3/7] remove superfluous include

---
 llvm/lib/Target/SystemZ/SystemZISelLowering.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp 
b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
index 99dcb3906f4a7..6f402ab8ba7e7 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -20,7 +20,6 @@
 #include "llvm/CodeGen/CallingConvLower.h"
 #include "llvm/CodeGen/ISDOpcodes.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
 #include "llvm/IR/GlobalAlias.h"
 #include "llvm/IR/IntrinsicInst.h"

>From d5c7c3581ff545070b5153a1073d779cc03fb630 Mon Sep 17 00:00:00 2001
From: Dominik Steenken <[email protected]>
Date: Fri, 5 Dec 2025 16:22:57 +0100
Subject: [PATCH 4/7] add is Rematerializable to LOAD_STACK_GUARD_ADDRESS

---
 llvm/lib/Target/SystemZ/SystemZInstrInfo.td | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.td 
b/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
index 57f399045e4d2..60ad72be5e2d6 100644
--- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
+++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
@@ -525,7 +525,8 @@ let SimpleBDXStore = 1, mayStore = 1 in {
 let hasNoSchedulingInfo = 1, hasSideEffects = 1 in {
   // LOAD_STACK_GUARD_ADDRESS may not Load, because it has no (official)
   // operands.
-  def LOAD_STACK_GUARD_ADDRESS : Pseudo<(outs ADDR64:$grdaddr), (ins), []>;
+  let isReMaterializable = 1 in
+    def LOAD_STACK_GUARD_ADDRESS : Pseudo<(outs ADDR64:$grdaddr), (ins), []>;
   let mayLoad = 1 in {
     let mayStore = 1 in def MOVE_STACK_GUARD
         : Pseudo<(outs), (ins bdaddr12only:$grdloc, ADDR64:$grdaddr), []>;

>From 365fa3c4a2518692a5530fd361ea24d44f708478 Mon Sep 17 00:00:00 2001
From: Dominik Steenken <[email protected]>
Date: Thu, 11 Dec 2025 09:35:53 +0100
Subject: [PATCH 5/7] Double up the StackGuard Pseudos to introduce dead defreg

The intent here is to add two pseudos with a _DAG suffix which can be slotted
into the Selection DAG in places where the instruction they are replacing does
not define a register. Then, in the custom inserter, these pseudos are replaced
with the "real" pseudos which do define an early-clobber register, which will
be assigned a physical register by regalloc, which can then be used in
ExpandPostRAPseudos to store the stack guard's address without fear that the
register might end up spilled.
---
 llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp | 110 ++-------------
 .../Target/SystemZ/SystemZISelLowering.cpp    |  59 ++++++--
 llvm/lib/Target/SystemZ/SystemZISelLowering.h |   5 +-
 llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp  | 128 +++++++++++++++---
 llvm/lib/Target/SystemZ/SystemZInstrInfo.h    |   7 +-
 llvm/lib/Target/SystemZ/SystemZInstrInfo.td   |  23 ++--
 6 files changed, 189 insertions(+), 143 deletions(-)

diff --git a/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp 
b/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp
index 0c16ba9cda4ff..8171d4746eaa7 100644
--- a/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp
@@ -752,41 +752,21 @@ void SystemZAsmPrinter::emitInstruction(const 
MachineInstr *MI) {
   case SystemZ::EH_SjLj_Setup:
     return;
 
-  case SystemZ::LOAD_STACK_GUARD: {
-    // If requested, record address of stack guard address load
-    if (MF->getFunction().hasFnAttribute("mstackprotector-guard-record"))
-      emitStackProtectorLocEntry();
-    Register AddrReg = emitLoadStackGuardAddress(MI);
-    LoweredMI = MCInstBuilder(SystemZ::LG)
-                    .addReg(AddrReg)
-                    .addImm(getStackGuardOffset(MI->getParent()))
-                    .addReg(0);
-  } break;
-
-  case SystemZ::LOAD_STACK_GUARD_ADDRESS:
-    // If requested, record address of stack guard address load
-    if (MF->getFunction().hasFnAttribute("mstackprotector-guard-record"))
-      emitStackProtectorLocEntry();
-    emitLoadStackGuardAddress(MI);
-    return;
+  case SystemZ::LOAD_STACK_GUARD:
+    llvm_unreachable("LOAD_STACK_GUARD should have been eliminated by the DAG 
Combiner.");
 
+  case SystemZ::MOVE_STACK_GUARD:
   case SystemZ::COMPARE_STACK_GUARD:
-    LoweredMI = MCInstBuilder(SystemZ::CLC)
-                    .addReg(MI->getOperand(0).getReg())
-                    .addImm(MI->getOperand(1).getImm())
-                    .addImm(8)
-                    .addReg(MI->getOperand(2).getReg())
-                    .addImm(getStackGuardOffset(MI->getParent()));
-    break;
+    llvm_unreachable("MOVE_STACK_GUARD and COMPARE_STACK_GUARD should have 
been expanded by ExpandPostRAPseudo.");
 
-  case SystemZ::MOVE_STACK_GUARD:
-    LoweredMI = MCInstBuilder(SystemZ::MVC)
-                    .addReg(MI->getOperand(0).getReg())
-                    .addImm(MI->getOperand(1).getImm())
-                    .addImm(8)
-                    .addReg(MI->getOperand(2).getReg())
-                    .addImm(getStackGuardOffset(MI->getParent()));
+  case SystemZ::LARL:
+  case SystemZ::LGRL: {
+    auto & Op = MI->getOperand(1);
+    if (Op.isGlobal() && (Op.getGlobal()->getName() == "__stack_chk_guard"))
+      emitStackProtectorLocEntry();
+    Lower.lower(MI, LoweredMI);
     break;
+  }
 
   default:
     Lower.lower(MI, LoweredMI);
@@ -805,74 +785,6 @@ void SystemZAsmPrinter::emitStackProtectorLocEntry() {
   OutStreamer->emitLabel(Sym);
 }
 
-// Emit the stack guard address load, depending on guard type.
-// Return the register the stack guard address was loaded into.
-Register SystemZAsmPrinter::emitLoadStackGuardAddress(const MachineInstr *MI) {
-  const MachineBasicBlock *MBB = MI->getParent();
-  const MachineFunction &MF = *MBB->getParent();
-  const Register AddrReg = MI->getOperand(0).getReg();
-  const MCRegisterInfo &MRI = *TM.getMCRegisterInfo();
-  const Register Reg32 = MRI.getSubReg(AddrReg, SystemZ::subreg_l32);
-
-  const Module *M = MF.getFunction().getParent();
-  StringRef GuardType = M->getStackProtectorGuard();
-
-  if (GuardType.empty() || (GuardType == "tls")) {
-    // EAR can only load the low subregister so use a shift for %a0 to produce
-    // the GR containing %a0 and %a1.
-
-    // ear <reg>, %a0
-    MCInst EAR1 = MCInstBuilder(SystemZ::EAR)
-                      .addReg(Reg32)
-                      .addReg(SystemZ::A0)
-                      .addReg(AddrReg);
-
-    // sllg <reg>, <reg>, 32
-    MCInst SLLG = MCInstBuilder(SystemZ::SLLG)
-                      .addReg(AddrReg)
-                      .addReg(AddrReg)
-                      .addReg(0)
-                      .addImm(32);
-
-    // ear <reg>, %a1
-    MCInst EAR2 = MCInstBuilder(SystemZ::EAR)
-                      .addReg(Reg32)
-                      .addReg(SystemZ::A1)
-                      .addReg(AddrReg);
-
-    EmitToStreamer(*OutStreamer, EAR1);
-    EmitToStreamer(*OutStreamer, SLLG);
-    EmitToStreamer(*OutStreamer, EAR2);
-  } else if (GuardType == "global") {
-    // Obtain the global value.
-    const auto *GV = M->getGlobalVariable(
-        "__stack_chk_guard", PointerType::getUnqual(M->getContext()));
-    assert(GV &&
-           "could not create reference to global variable __stack_chk_guard");
-    auto *Sym = TM.getSymbol(GV);
-    // Ref->
-    // Emit the address load.
-    MCInst Load;
-    if (M->getPICLevel() == PICLevel::NotPIC) {
-      Load = MCInstBuilder(SystemZ::LARL)
-                 .addReg(AddrReg)
-                 .addExpr(MCSymbolRefExpr::create(Sym, OutContext));
-    } else {
-      Load =
-          MCInstBuilder(SystemZ::LGRL)
-              .addReg(AddrReg)
-              .addExpr(MCSymbolRefExpr::create(Sym, SystemZ::S_GOT, 
OutContext))
-              .addExpr(getGlobalOffsetTable(OutContext));
-    }
-    EmitToStreamer(*OutStreamer, Load);
-  } else {
-    llvm_unreachable(
-        (Twine("Unknown stack protector type \"") + GuardType + "\"")
-            .str()
-            .c_str());
-  }
-  return AddrReg;
-}
 
 // Emit the largest nop instruction smaller than or equal to NumBytes
 // bytes.  Return the size of nop emitted.
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp 
b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
index 6f402ab8ba7e7..898e32029264c 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -11,6 +11,7 @@
 
//===----------------------------------------------------------------------===//
 
 #include "SystemZISelLowering.h"
+#include "MCTargetDesc/SystemZMCTargetDesc.h"
 #include "SystemZCallingConv.h"
 #include "SystemZConstantPoolValue.h"
 #include "SystemZMachineFunctionInfo.h"
@@ -8058,16 +8059,14 @@ SDValue SystemZTargetLowering::combineSTORE(
   // combine STORE (LOAD_STACK_GUARD) into MOVE_STACK_GUARD
   if (Op1->isMachineOpcode() &&
       (Op1->getMachineOpcode() == SystemZ::LOAD_STACK_GUARD)) {
-    // If so, create a MOVE_STACK_GUARD node to replace the store,
-    // and a LOAD_STACK_GUARD_ADDRESS to replace the LOAD_STACK_GUARD
-    MachineSDNode *LoadAddr = DAG.getMachineNode(
-        SystemZ::LOAD_STACK_GUARD_ADDRESS, SDLoc(SN), MVT::i64);
+    // If so, create a MOVE_STACK_GUARD_DAG node to replace the store,
+    // as well as the LOAD_STACK_GUARD.
     int FI = cast<FrameIndexSDNode>(SN->getOperand(2))->getIndex();
     // FrameIndex, Dummy Displacement
     SDValue Ops[] = {DAG.getTargetFrameIndex(FI, MVT::i64),
                      DAG.getTargetConstant(0, SDLoc(SN), MVT::i64),
-                     SDValue(LoadAddr, 0), SN->getChain()};
-    MachineSDNode *Move = DAG.getMachineNode(SystemZ::MOVE_STACK_GUARD,
+                     SN->getChain()};
+    MachineSDNode *Move = DAG.getMachineNode(SystemZ::MOVE_STACK_GUARD_DAG,
                                              SDLoc(SN), MVT::Other, Ops);
 
     return SDValue(Move, 0);
@@ -8964,17 +8963,13 @@ SDValue SystemZTargetLowering::combineBR_CCMASK(SDNode 
*N,
     // Handle the load's chain if necessary
     DAG.ReplaceAllUsesOfValueWith(OutChain, InChain);
 
-    // Construct the LOAD_STACK_GUARD_ADDRESS node to replace LOAD_STACK_GUARD
-    auto *LoadAddress =
-        DAG.getMachineNode(SystemZ::LOAD_STACK_GUARD_ADDRESS, DL, MVT::i64);
-
-    // Construct the COMPARE_STACK_GUARD node
+    // Construct the COMPARE_STACK_GUARD_DAG to replace the icmp and
+    // LOAD_STACK_GUARD nodes.
     SDVTList CmpVTs = DAG.getVTList(MVT::Other, MVT::Glue);
     auto CompOps = {DAG.getTargetFrameIndex(FI, MVT::i64),
-                    DAG.getTargetConstant(0, DL, MVT::i64),
-                    SDValue(LoadAddress, 0), InChain};
-    auto *Compare =
-        DAG.getMachineNode(SystemZ::COMPARE_STACK_GUARD, DL, CmpVTs, CompOps);
+                    DAG.getTargetConstant(0, DL, MVT::i64), InChain};
+    auto *Compare = DAG.getMachineNode(SystemZ::COMPARE_STACK_GUARD_DAG, DL,
+                                       CmpVTs, CompOps);
     // Construct the BRC node using COMPARE_STACK_GUARD's CC result
     auto BranchOps = {DAG.getTargetConstant(CCValidVal, DL, MVT::i32),
                       DAG.getTargetConstant(CCMaskVal, DL, MVT::i32),
@@ -11065,6 +11060,34 @@ getBackchainAddress(SDValue SP, SelectionDAG &DAG) 
const {
                      DAG.getIntPtrConstant(TFL->getBackchainOffset(MF), DL));
 }
 
+MachineBasicBlock *
+SystemZTargetLowering::emitMSGPseudo(MachineInstr &MI,
+                                     MachineBasicBlock *MBB) const {
+  MachineRegisterInfo *MRI = &MBB->getParent()->getRegInfo();
+  const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
+  DebugLoc DL = MI.getDebugLoc();
+  Register AddrReg = MRI->createVirtualRegister(&SystemZ::ADDR64BitRegClass);
+  BuildMI(*MBB, MI, DL, TII->get(SystemZ::MOVE_STACK_GUARD), AddrReg)
+      .addFrameIndex(MI.getOperand(0).getIndex())
+      .addImm(MI.getOperand(1).getImm());
+  MI.eraseFromParent();
+  return MBB;
+}
+
+MachineBasicBlock *
+SystemZTargetLowering::emitCSGPseudo(MachineInstr &MI,
+                                     MachineBasicBlock *MBB) const {
+  MachineRegisterInfo *MRI = &MBB->getParent()->getRegInfo();
+  const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
+  DebugLoc DL = MI.getDebugLoc();
+  Register AddrReg = MRI->createVirtualRegister(&SystemZ::ADDR64BitRegClass);
+  BuildMI(*MBB, MI, DL, TII->get(SystemZ::COMPARE_STACK_GUARD), AddrReg)
+      .addFrameIndex(MI.getOperand(0).getIndex())
+      .addImm(MI.getOperand(1).getImm());
+  MI.eraseFromParent();
+  return MBB;
+}
+
 MachineBasicBlock *SystemZTargetLowering::EmitInstrWithCustomInserter(
     MachineInstr &MI, MachineBasicBlock *MBB) const {
   switch (MI.getOpcode()) {
@@ -11222,6 +11245,12 @@ MachineBasicBlock 
*SystemZTargetLowering::EmitInstrWithCustomInserter(
   case TargetOpcode::PATCHPOINT:
     return emitPatchPoint(MI, MBB);
 
+  case SystemZ::MOVE_STACK_GUARD_DAG:
+    return emitMSGPseudo(MI, MBB);
+
+  case SystemZ::COMPARE_STACK_GUARD_DAG:
+    return emitCSGPseudo(MI, MBB);
+
   default:
     llvm_unreachable("Unexpected instr type to insert");
   }
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.h 
b/llvm/lib/Target/SystemZ/SystemZISelLowering.h
index 60a08fef01df2..87ad3904e7470 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.h
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.h
@@ -463,7 +463,10 @@ class SystemZTargetLowering : public TargetLowering {
                                          unsigned Opcode) const;
   MachineBasicBlock *emitProbedAlloca(MachineInstr &MI,
                                       MachineBasicBlock *MBB) const;
-
+  MachineBasicBlock *emitMSGPseudo(MachineInstr &MI,
+                                   MachineBasicBlock *MBB) const;
+  MachineBasicBlock *emitCSGPseudo(MachineInstr &MI,
+                                   MachineBasicBlock *MBB) const;
   SDValue getBackchainAddress(SDValue SP, SelectionDAG &DAG) const;
 
   MachineMemOperand::Flags
diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp 
b/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
index 9f8c917beb43b..bf05c1a2bea5c 100644
--- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
@@ -1776,11 +1776,104 @@ bool SystemZInstrInfo::expandPostRAPseudo(MachineInstr 
&MI) const {
     splitAdjDynAlloc(MI);
     return true;
 
+  case SystemZ::MOVE_STACK_GUARD:
+    expandMSGPseudo(MI);
+    return true;
+
+  case SystemZ::COMPARE_STACK_GUARD:
+    expandCSGPseudo(MI);
+    return true;
+
   default:
     return false;
   }
 }
 
+namespace {
+unsigned long getStackGuardOffset(const MachineBasicBlock &MBB) {
+  // In the TLS (default) case, AddrReg will contain the thread pointer, so we
+  // need to add 40 bytes to get the actual address of the stack guard.
+  StringRef GuardType =
+      MBB.getParent()->getFunction().getParent()->getStackProtectorGuard();
+  return (GuardType == "global") ? 0 : 40;
+}
+} // namespace
+
+// Emit the stack guard address load, depending on guard type.
+// Return the register the stack guard address was loaded into.
+void SystemZInstrInfo::emitLoadStackGuardAddress(MachineInstr &MI) const {
+  MachineBasicBlock &MBB = *(MI.getParent());
+  const MachineFunction &MF = *(MBB.getParent());
+  const Register AddrReg = MI.getOperand(0).getReg();
+  const MachineRegisterInfo &MRI = MF.getRegInfo();
+  const Register Reg32 =
+      MRI.getTargetRegisterInfo()->getSubReg(AddrReg, SystemZ::subreg_l32);
+  const auto DL = MI.getDebugLoc();
+
+  const Module *M = MF.getFunction().getParent();
+  StringRef GuardType = M->getStackProtectorGuard();
+
+  if (GuardType.empty() || (GuardType == "tls")) {
+    // EAR can only load the low subregister so use a shift for %a0 to produce
+    // the GR containing %a0 and %a1.
+
+    // ear <reg>, %a0
+    BuildMI(MBB, MI, DL, get(SystemZ::EAR), Reg32)
+        .addReg(SystemZ::A0);
+
+    // sllg <reg>, <reg>, 32
+    BuildMI(MBB, MI, DL, get(SystemZ::SLLG), AddrReg)
+        .addReg(AddrReg)
+        .addReg(0)
+        .addImm(32);
+
+    // ear <reg>, %a1
+    BuildMI(MBB, MI, DL, get(SystemZ::EAR), Reg32)
+        .addReg(SystemZ::A1);
+
+  } else if (GuardType == "global") {
+    // Obtain the global value.
+    const auto *GV = M->getNamedGlobal("__stack_chk_guard");
+    assert(GV &&
+           "could not create reference to global variable __stack_chk_guard");
+    // Ref->
+    // Emit the address load.
+    if (M->getPICLevel() == PICLevel::NotPIC) {
+      BuildMI(MBB, MI, DL, get(SystemZ::LARL), AddrReg).addGlobalAddress(GV);
+    } else {
+      BuildMI(MBB, MI, DL, get(SystemZ::LGRL), AddrReg)
+          .addGlobalAddress(GV, 0, SystemZII::MO_GOT);
+    }
+
+  } else {
+    llvm_unreachable(
+        (Twine("Unknown stack protector type \"") + GuardType + "\"")
+            .str()
+            .c_str());
+  }
+}
+
+void SystemZInstrInfo::expandMSGPseudo(MachineInstr &MI) const {
+  emitLoadStackGuardAddress(MI);
+  BuildMI(*(MI.getParent()), MI, MI.getDebugLoc(), get(SystemZ::MVC))
+      .addReg(MI.getOperand(1).getReg())
+      .addImm(MI.getOperand(2).getImm())
+      .addImm(8)
+      .addReg(MI.getOperand(0).getReg())
+      .addImm(getStackGuardOffset(*(MI.getParent())));
+  MI.removeFromParent();
+}
+void SystemZInstrInfo::expandCSGPseudo(MachineInstr &MI) const {
+  emitLoadStackGuardAddress(MI);
+  BuildMI(*(MI.getParent()), MI, MI.getDebugLoc(), get(SystemZ::CLC))
+      .addReg(MI.getOperand(1).getReg())
+      .addImm(MI.getOperand(2).getImm())
+      .addImm(8)
+      .addReg(MI.getOperand(0).getReg())
+      .addImm(getStackGuardOffset(*(MI.getParent())));
+  MI.removeFromParent();
+}
+
 unsigned SystemZInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
   if (MI.isInlineAsm()) {
     const MachineFunction *MF = MI.getParent()->getParent();
@@ -1798,27 +1891,24 @@ unsigned SystemZInstrInfo::getInstSizeInBytes(const 
MachineInstr &MI) const {
   if (MI.getOpcode() == TargetOpcode::PATCHABLE_RET)
     return 18 + (MI.getOperand(0).getImm() == SystemZ::CondReturn ? 4 : 0);
   if ((MI.getOpcode() == SystemZ::MOVE_STACK_GUARD) ||
-      (MI.getOpcode() == SystemZ::COMPARE_STACK_GUARD))
-    return 6;
-  if ((MI.getOpcode() == SystemZ::LOAD_STACK_GUARD_ADDRESS) ||
-      (MI.getOpcode() == TargetOpcode::LOAD_STACK_GUARD)) {
-    StringRef GuardType = MI.getParent()
-                              ->getParent()
-                              ->getFunction()
-                              .getParent()
-                              ->getStackProtectorGuard();
-    unsigned Size = (MI.getOpcode() == TargetOpcode::LOAD_STACK_GUARD)
-                        ? 6 // lg to load value
-                        : 0;
-    if (GuardType == "global")
-      return Size + 6; // larl/lgrl
-    if (GuardType.empty() || GuardType == "tls")
-      return Size + 14; // ear,sllg,ear
-    llvm_unreachable(
-        (Twine("Unknown stack protector type \"") + GuardType + "\"")
+      (MI.getOpcode() == SystemZ::COMPARE_STACK_GUARD)) {
+      StringRef GuardType = MI.getParent()
+      ->getParent()
+      ->getFunction()
+      .getParent()
+      ->getStackProtectorGuard();
+      unsigned Size = 6;  // mvc,clc
+      if (GuardType == "global")
+        Size += 6; // larl/lgrl
+      else if (GuardType.empty() || GuardType == "tls")
+        Size += 14; // ear,sllg,ear
+      else
+        llvm_unreachable(
+          (Twine("Unknown stack protector type \"") + GuardType + "\"")
             .str()
             .c_str());
-  }
+      return Size;
+    }
 
   return MI.getDesc().getSize();
 }
diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.h 
b/llvm/lib/Target/SystemZ/SystemZInstrInfo.h
index 3e138cd1dec0c..3e088c478a298 100644
--- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.h
+++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.h
@@ -193,7 +193,8 @@ class SystemZInstrInfo : public SystemZGenInstrInfo {
                        unsigned HighOpcode) const;
   void expandZExtPseudo(MachineInstr &MI, unsigned LowOpcode,
                         unsigned Size) const;
-
+  void expandMSGPseudo(MachineInstr &MI) const;
+  void expandCSGPseudo(MachineInstr &MI) const;
   MachineInstrBuilder
   emitGRX32Move(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
                 const DebugLoc &DL, unsigned DestReg, unsigned SrcReg,
@@ -218,6 +219,10 @@ class SystemZInstrInfo : public SystemZGenInstrInfo {
                                        unsigned CommuteOpIdx1,
                                        unsigned CommuteOpIdx2) const override;
 
+  // Emits a load of the stack guard's address, using the DestReg
+  // of the given MI as the target.
+  void emitLoadStackGuardAddress(MachineInstr &MI) const;
+
 public:
   explicit SystemZInstrInfo(const SystemZSubtarget &STI);
 
diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.td 
b/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
index 60ad72be5e2d6..3c90af95626f2 100644
--- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
+++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
@@ -523,15 +523,22 @@ let SimpleBDXStore = 1, mayStore = 1 in {
 }
 
 let hasNoSchedulingInfo = 1, hasSideEffects = 1 in {
-  // LOAD_STACK_GUARD_ADDRESS may not Load, because it has no (official)
-  // operands.
-  let isReMaterializable = 1 in
-    def LOAD_STACK_GUARD_ADDRESS : Pseudo<(outs ADDR64:$grdaddr), (ins), []>;
   let mayLoad = 1 in {
-    let mayStore = 1 in def MOVE_STACK_GUARD
-        : Pseudo<(outs), (ins bdaddr12only:$grdloc, ADDR64:$grdaddr), []>;
-    let Defs = [CC] in def COMPARE_STACK_GUARD
-        : Pseudo<(outs), (ins bdaddr12only:$grdloc, ADDR64:$grdaddr), []>;
+    let mayStore = 1 in {
+      // load the stack guard's address, and move the stack guard to the stack.
+      let usesCustomInserter = 1 in def MOVE_STACK_GUARD_DAG
+          : Pseudo<(outs), (ins bdaddr12only:$grdloc), []>;
+      let Constraints = "@earlyclobber $grdaddr" in def MOVE_STACK_GUARD
+          : Pseudo<(outs ADDR64:$grdaddr), (ins bdaddr12only:$grdloc), []>;
+    }
+    let Defs = [CC] in {
+      // load the stack guard's address, and compare the stack guard against
+      // the one on the stack.
+      let usesCustomInserter = 1 in def COMPARE_STACK_GUARD_DAG
+          : Pseudo<(outs), (ins bdaddr12only:$grdloc), []>;
+      let Constraints = "@earlyclobber $grdaddr" in def COMPARE_STACK_GUARD
+          : Pseudo<(outs ADDR64:$grdaddr), (ins bdaddr12only:$grdloc), []>;
+    }
   }
 }
 

>From aa71e3564aec90e863375dc9c7920e0fc6b7c4f4 Mon Sep 17 00:00:00 2001
From: Dominik Steenken <[email protected]>
Date: Fri, 19 Dec 2025 10:51:19 +0100
Subject: [PATCH 6/7] Introduce RegScavenging Fallback

RegScavenger has a bug (https://github.com/llvm/llvm-project/issues/172511)
that makes it so that the early-clobber flag on the def reg associated with
`MOVE_STACK_GUARD` and `COMPARE_STACK_GUARD` gets ignored when frame index
elimination has to insert additional VRegs post RA in order to facilitate
materializing a workable base address from which to reach the stack guard
on a particularly large stack frame.
That bug leads to the early-clobber def reg, which is intended to be used
as a scratch register to hold the address of the stack guard, being assigned
to the same physical register as the one that is used to materialize the
stack base address.
In this workaround, that case is detected and RegScavenger is called again
to scavenge a different register for this use. This is not 100% guaranteed
to work, as extremely register-constrained, and large stack frames may not
have a register or emergency spill slot available, but this is all we can
do as a workaround until the underlying RegScavenger issue is fixed.
---
 llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp | 59 ++++++++++++++++----
 llvm/lib/Target/SystemZ/SystemZInstrInfo.h   |  6 +-
 2 files changed, 52 insertions(+), 13 deletions(-)

diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp 
b/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
index bf05c1a2bea5c..acc9a68f1e265 100644
--- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
@@ -27,6 +27,7 @@
 #include "llvm/CodeGen/MachineMemOperand.h"
 #include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
 #include "llvm/CodeGen/SlotIndexes.h"
 #include "llvm/CodeGen/StackMaps.h"
 #include "llvm/CodeGen/TargetInstrInfo.h"
@@ -1790,21 +1791,55 @@ bool SystemZInstrInfo::expandPostRAPseudo(MachineInstr 
&MI) const {
 }
 
 namespace {
-unsigned long getStackGuardOffset(const MachineBasicBlock &MBB) {
+    Register scavengeAddrReg(MachineInstr& MI, MachineBasicBlock* MBB) {
+    // create fresh RegScavanger instance.
+    RegScavenger RS;
+    // initialize RegScavenger to correct location
+    RS.enterBasicBlockEnd(*MBB);
+    RS.backward(MI);
+
+    // Attempt to find a free register.
+    Register Scratch = RS.FindUnusedReg(&SystemZ::ADDR64BitRegClass);
+    // If not found, scavenge one, i.e. evict something to a stack spill slot.
+    if (!Scratch) {
+      Scratch = RS.scavengeRegisterBackwards(
+          SystemZ::ADDR64BitRegClass,
+          MI,              // Scavenge back to this position.
+          true,  // Will need Scratch Reg after MI.
+          0,            
+          true     // Spills are allowed.
+      );
+    }
+    return Scratch;
+  }
+unsigned long getStackGuardOffset(const MachineBasicBlock *MBB) {
   // In the TLS (default) case, AddrReg will contain the thread pointer, so we
   // need to add 40 bytes to get the actual address of the stack guard.
   StringRef GuardType =
-      MBB.getParent()->getFunction().getParent()->getStackProtectorGuard();
+      MBB->getParent()->getFunction().getParent()->getStackProtectorGuard();
   return (GuardType == "global") ? 0 : 40;
 }
+// Check MI (which should be either MOVE_STACK_GUARD or COMPARE_STACK_GUARD)
+// to see if the early-clobber flag on the def reg was honored. If so,
+// return that register. If not, scavenge a new register and return that.
+// This is a workaround for https://github.com/llvm/llvm-project/issues/172511
+// and should be removed once that issue is resolved.
+Register chooseAddrReg(MachineInstr& MI, MachineBasicBlock *MBB) {
+  Register DefReg = MI.getOperand(0).getReg();
+  Register OpReg = MI.getOperand(1).getReg();
+  // if we can use DefReg, return it
+  if (DefReg != OpReg)
+    return DefReg;
+  // otherwise, scavenge
+  return scavengeAddrReg(MI, MBB);
+}
 } // namespace
 
 // Emit the stack guard address load, depending on guard type.
 // Return the register the stack guard address was loaded into.
-void SystemZInstrInfo::emitLoadStackGuardAddress(MachineInstr &MI) const {
+void SystemZInstrInfo::emitLoadStackGuardAddress(MachineInstr &MI, Register 
AddrReg) const {
   MachineBasicBlock &MBB = *(MI.getParent());
   const MachineFunction &MF = *(MBB.getParent());
-  const Register AddrReg = MI.getOperand(0).getReg();
   const MachineRegisterInfo &MRI = MF.getRegInfo();
   const Register Reg32 =
       MRI.getTargetRegisterInfo()->getSubReg(AddrReg, SystemZ::subreg_l32);
@@ -1854,23 +1889,27 @@ void 
SystemZInstrInfo::emitLoadStackGuardAddress(MachineInstr &MI) const {
 }
 
 void SystemZInstrInfo::expandMSGPseudo(MachineInstr &MI) const {
-  emitLoadStackGuardAddress(MI);
+  MachineBasicBlock* MBB = MI.getParent();
+  Register AddrReg = chooseAddrReg(MI, MBB);
+  emitLoadStackGuardAddress(MI, AddrReg);
   BuildMI(*(MI.getParent()), MI, MI.getDebugLoc(), get(SystemZ::MVC))
       .addReg(MI.getOperand(1).getReg())
       .addImm(MI.getOperand(2).getImm())
       .addImm(8)
-      .addReg(MI.getOperand(0).getReg())
-      .addImm(getStackGuardOffset(*(MI.getParent())));
+      .addReg(AddrReg)
+      .addImm(getStackGuardOffset(MBB));
   MI.removeFromParent();
 }
 void SystemZInstrInfo::expandCSGPseudo(MachineInstr &MI) const {
-  emitLoadStackGuardAddress(MI);
+  MachineBasicBlock* MBB = MI.getParent();
+  Register AddrReg = chooseAddrReg(MI, MBB);
+  emitLoadStackGuardAddress(MI, AddrReg);
   BuildMI(*(MI.getParent()), MI, MI.getDebugLoc(), get(SystemZ::CLC))
       .addReg(MI.getOperand(1).getReg())
       .addImm(MI.getOperand(2).getImm())
       .addImm(8)
-      .addReg(MI.getOperand(0).getReg())
-      .addImm(getStackGuardOffset(*(MI.getParent())));
+      .addReg(AddrReg)
+      .addImm(getStackGuardOffset(MBB));
   MI.removeFromParent();
 }
 
diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.h 
b/llvm/lib/Target/SystemZ/SystemZInstrInfo.h
index 3e088c478a298..1209d3adec88b 100644
--- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.h
+++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.h
@@ -219,9 +219,9 @@ class SystemZInstrInfo : public SystemZGenInstrInfo {
                                        unsigned CommuteOpIdx1,
                                        unsigned CommuteOpIdx2) const override;
 
-  // Emits a load of the stack guard's address, using the DestReg
-  // of the given MI as the target.
-  void emitLoadStackGuardAddress(MachineInstr &MI) const;
+  // Emits a load of the stack guard's address, using the given
+  // AddrReg as the target.
+  void emitLoadStackGuardAddress(MachineInstr &MI, Register AddrReg) const;
 
 public:
   explicit SystemZInstrInfo(const SystemZSubtarget &STI);

>From 8e0e4f52c5a8d10f412afeed7136d6b86ba4509b Mon Sep 17 00:00:00 2001
From: Dominik Steenken <[email protected]>
Date: Mon, 15 Dec 2025 17:53:52 +0100
Subject: [PATCH 7/7] Update tests to align with register choices and pseudo
 names

---
 clang/test/CodeGen/SystemZ/stack-guard-pseudos.c  | 15 +++++++++------
 .../CodeGen/SystemZ/stack-guard-global-nopic.ll   | 12 ++++++------
 .../CodeGen/SystemZ/stack-guard-global-pic.ll     | 12 ++++++------
 3 files changed, 21 insertions(+), 18 deletions(-)

diff --git a/clang/test/CodeGen/SystemZ/stack-guard-pseudos.c 
b/clang/test/CodeGen/SystemZ/stack-guard-pseudos.c
index b364aa4028ec7..ad7b8b177e106 100644
--- a/clang/test/CodeGen/SystemZ/stack-guard-pseudos.c
+++ b/clang/test/CodeGen/SystemZ/stack-guard-pseudos.c
@@ -1,10 +1,13 @@
-// RUN: %clang_cc1 -S -mllvm -stop-after=systemz-isel -stack-protector 1 
-triple=s390x-ibm-linux < %s -o - | FileCheck -check-prefix=CHECK-PSEUDOS %s
+// RUN: %clang_cc1 -S -mllvm -stop-after=systemz-isel -stack-protector 1 
-triple=s390x-ibm-linux < %s -o - | FileCheck -check-prefix=CHECK-DAGCOMBINE %s
+// RUN: %clang_cc1 -S -mllvm -stop-after=finalize-isel -stack-protector 1 
-triple=s390x-ibm-linux < %s -o - | FileCheck -check-prefix=CHECK-CUSTOMINSERT 
%s
 // RUN: not %clang_cc1 -S -stack-protector 1 -mstack-protector-guard-record 
-triple=s390x-ibm-linux < %s -o - 2>&1 | FileCheck -check-prefix=CHECK-OPTS %s 
-// CHECK-PSEUDOS:   bb.0.entry:
-// CHECK-PSEUDOS:     %3:addr64bit = LOAD_STACK_GUARD_ADDRESS
-// CHECK-PSEUDOS:     MOVE_STACK_GUARD %stack.0.StackGuardSlot, 0, %3
-// CHECK-PSEUDOS:     COMPARE_STACK_GUARD %stack.0.StackGuardSlot, 0, %3, 
implicit-def $cc
-
+// CHECK-DAGCOMBINE:   bb.0.entry:
+// CHECK-DAGCOMBINE:     MOVE_STACK_GUARD_DAG %stack.0.StackGuardSlot, 0
+// CHECK-DAGCOMBINE:     COMPARE_STACK_GUARD_DAG %stack.0.StackGuardSlot, 0, 
implicit-def $cc
+// CHECK-CUSTOMINSERT: bb.0.entry
+// CHECK-CUSTOMINSERT:   early-clobber %10:addr64bit = MOVE_STACK_GUARD 
%stack.0.StackGuardSlot, 0
+// CHECK_CUSTOMINSERT: bb.3.entry
+// CHECK-CUSTOMINSERT: early-clobber %14:addr64bit = COMPARE_STACK_GUARD 
%stack.0.StackGuardSlot, 0, implicit-def $cc
 extern char *strcpy (char * D, const char * S);
 int main(int argc, char *argv[])
 {
diff --git a/llvm/test/CodeGen/SystemZ/stack-guard-global-nopic.ll 
b/llvm/test/CodeGen/SystemZ/stack-guard-global-nopic.ll
index d1d98537c1df2..2ce8c12826e09 100644
--- a/llvm/test/CodeGen/SystemZ/stack-guard-global-nopic.ll
+++ b/llvm/test/CodeGen/SystemZ/stack-guard-global-nopic.ll
@@ -118,22 +118,22 @@ define i32 @test_global_stack_guard_large() #0 {
 ; CHECK-NEXT:    .cfi_offset %r15, -40
 ; CHECK-NEXT:    aghi %r15, -8376
 ; CHECK-NEXT:    .cfi_def_cfa_offset 8536
+; CHECK-NEXT:    lay %r1, 8192(%r15)
 ; CHECK-NEXT:    .section __stack_protector_loc,"a",@progbits
 ; CHECK-NEXT:    .quad .Ltmp6
 ; CHECK-NEXT:    .text
 ; CHECK-NEXT:  .Ltmp6:
-; CHECK-NEXT:    larl %r1, __stack_chk_guard
-; CHECK-NEXT:    lay %r2, 8192(%r15)
-; CHECK-NEXT:    mvc 176(8,%r2), 0(%r1)
+; CHECK-NEXT:    larl %r2, __stack_chk_guard
+; CHECK-NEXT:    mvc 176(8,%r1), 0(%r2)
 ; CHECK-NEXT:    la %r2, 176(%r15)
 ; CHECK-NEXT:    brasl %r14, foo3@PLT
+; CHECK-NEXT:    lay %r1, 8192(%r15)
 ; CHECK-NEXT:    .section __stack_protector_loc,"a",@progbits
 ; CHECK-NEXT:    .quad .Ltmp7
 ; CHECK-NEXT:    .text
 ; CHECK-NEXT:  .Ltmp7:
-; CHECK-NEXT:    larl %r1, __stack_chk_guard
-; CHECK-NEXT:    lay %r2, 8192(%r15)
-; CHECK-NEXT:    clc 176(8,%r2), 0(%r1)
+; CHECK-NEXT:    larl %r2, __stack_chk_guard
+; CHECK-NEXT:    clc 176(8,%r1), 0(%r2)
 ; CHECK-NEXT:    jlh .LBB2_2
 ; CHECK-NEXT:  # %bb.1: # %entry
 ; CHECK-NEXT:    lhi %r2, 0
diff --git a/llvm/test/CodeGen/SystemZ/stack-guard-global-pic.ll 
b/llvm/test/CodeGen/SystemZ/stack-guard-global-pic.ll
index fe8b6a7e4214d..2ea7e4bc22202 100644
--- a/llvm/test/CodeGen/SystemZ/stack-guard-global-pic.ll
+++ b/llvm/test/CodeGen/SystemZ/stack-guard-global-pic.ll
@@ -119,22 +119,22 @@ define i32 @test_global_stack_guard_large() #0 {
 ; CHECK-NEXT:    .cfi_offset %r15, -40
 ; CHECK-NEXT:    aghi %r15, -8376
 ; CHECK-NEXT:    .cfi_def_cfa_offset 8536
+; CHECK-NEXT:    lay %r1, 8192(%r15)
 ; CHECK-NEXT:    .section __stack_protector_loc,"a",@progbits
 ; CHECK-NEXT:    .quad .Ltmp6
 ; CHECK-NEXT:    .text
 ; CHECK-NEXT:  .Ltmp6:
-; CHECK-NEXT:    lgrl %r1, __stack_chk_guard@GOT
-; CHECK-NEXT:    lay %r2, 8192(%r15)
-; CHECK-NEXT:    mvc 176(8,%r2), 0(%r1)
+; CHECK-NEXT:    lgrl %r2, __stack_chk_guard@GOT
+; CHECK-NEXT:    mvc 176(8,%r1), 0(%r2)
 ; CHECK-NEXT:    la %r2, 176(%r15)
 ; CHECK-NEXT:    brasl %r14, foo3@PLT
+; CHECK-NEXT:    lay %r1, 8192(%r15)
 ; CHECK-NEXT:    .section __stack_protector_loc,"a",@progbits
 ; CHECK-NEXT:    .quad .Ltmp7
 ; CHECK-NEXT:    .text
 ; CHECK-NEXT:  .Ltmp7:
-; CHECK-NEXT:    lgrl %r1, __stack_chk_guard@GOT
-; CHECK-NEXT:    lay %r2, 8192(%r15)
-; CHECK-NEXT:    clc 176(8,%r2), 0(%r1)
+; CHECK-NEXT:    lgrl %r2, __stack_chk_guard@GOT
+; CHECK-NEXT:    clc 176(8,%r1), 0(%r2)
 ; CHECK-NEXT:    jlh .LBB2_2
 ; CHECK-NEXT:  # %bb.1: # %entry
 ; CHECK-NEXT:    lhi %r2, 0

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to