Author: Raphael Moreira Zinsly Date: 2024-12-10T16:48:26Z New Revision: 708a478d6739aea20a8834cea45490f05b07ca10
URL: https://github.com/llvm/llvm-project/commit/708a478d6739aea20a8834cea45490f05b07ca10 DIFF: https://github.com/llvm/llvm-project/commit/708a478d6739aea20a8834cea45490f05b07ca10.diff LOG: [RISCV] Add stack clash protection (#117612) Enable `-fstack-clash-protection` for RISCV and stack probe for function prologues. We probe the stack by creating a loop that allocates and probe the stack in ProbeSize chunks. We emit an unrolled probe loop for small allocations and emit a variable length probe loop for bigger ones. Added: llvm/test/CodeGen/RISCV/stack-clash-prologue-nounwind.ll llvm/test/CodeGen/RISCV/stack-clash-prologue.ll Modified: clang/lib/Driver/ToolChains/Clang.cpp llvm/lib/Target/RISCV/RISCVFrameLowering.cpp llvm/lib/Target/RISCV/RISCVFrameLowering.h llvm/lib/Target/RISCV/RISCVISelLowering.cpp llvm/lib/Target/RISCV/RISCVISelLowering.h llvm/lib/Target/RISCV/RISCVInstrInfo.td llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.cpp llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.h llvm/lib/Target/RISCV/RISCVTargetMachine.cpp Removed: ################################################################################ diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 7ab85ad9d100c2..d3206c3e8e25ed 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -3777,7 +3777,8 @@ static void RenderSCPOptions(const ToolChain &TC, const ArgList &Args, return; if (!EffectiveTriple.isX86() && !EffectiveTriple.isSystemZ() && - !EffectiveTriple.isPPC64() && !EffectiveTriple.isAArch64()) + !EffectiveTriple.isPPC64() && !EffectiveTriple.isAArch64() && + !EffectiveTriple.isRISCV()) return; Args.addOptInFlag(CmdArgs, options::OPT_fstack_clash_protection, diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp index deb0b627225c64..655de0b4e7eb5d 100644 --- a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp @@ -580,25 +580,124 @@ static MCCFIInstruction createDefCFAOffset(const TargetRegisterInfo &TRI, Comment.str()); } +// Allocate stack space and probe it if necessary. void RISCVFrameLowering::allocateStack(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, - MachineFunction &MF, StackOffset Offset, - uint64_t RealStackSize, - bool EmitCFI) const { + MachineFunction &MF, uint64_t Offset, + uint64_t RealStackSize, bool EmitCFI, + bool NeedProbe, + uint64_t ProbeSize) const { DebugLoc DL; const RISCVRegisterInfo *RI = STI.getRegisterInfo(); const RISCVInstrInfo *TII = STI.getInstrInfo(); - RI->adjustReg(MBB, MBBI, DL, SPReg, SPReg, Offset, MachineInstr::FrameSetup, + // Simply allocate the stack if it's not big enough to require a probe. + if (!NeedProbe || Offset <= ProbeSize) { + RI->adjustReg(MBB, MBBI, DL, SPReg, SPReg, StackOffset::getFixed(-Offset), + MachineInstr::FrameSetup, getStackAlign()); + + if (EmitCFI) { + // Emit ".cfi_def_cfa_offset RealStackSize" + unsigned CFIIndex = MF.addFrameInst( + MCCFIInstruction::cfiDefCfaOffset(nullptr, RealStackSize)); + BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex) + .setMIFlag(MachineInstr::FrameSetup); + } + + return; + } + + // Unroll the probe loop depending on the number of iterations. + if (Offset < ProbeSize * 5) { + uint64_t CurrentOffset = 0; + bool IsRV64 = STI.is64Bit(); + while (CurrentOffset + ProbeSize <= Offset) { + RI->adjustReg(MBB, MBBI, DL, SPReg, SPReg, + StackOffset::getFixed(-ProbeSize), MachineInstr::FrameSetup, + getStackAlign()); + // s[d|w] zero, 0(sp) + BuildMI(MBB, MBBI, DL, TII->get(IsRV64 ? RISCV::SD : RISCV::SW)) + .addReg(RISCV::X0) + .addReg(SPReg) + .addImm(0) + .setMIFlags(MachineInstr::FrameSetup); + + CurrentOffset += ProbeSize; + if (EmitCFI) { + // Emit ".cfi_def_cfa_offset CurrentOffset" + unsigned CFIIndex = MF.addFrameInst( + MCCFIInstruction::cfiDefCfaOffset(nullptr, CurrentOffset)); + BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex) + .setMIFlag(MachineInstr::FrameSetup); + } + } + + uint64_t Residual = Offset - CurrentOffset; + if (Residual) { + RI->adjustReg(MBB, MBBI, DL, SPReg, SPReg, + StackOffset::getFixed(-Residual), MachineInstr::FrameSetup, + getStackAlign()); + if (EmitCFI) { + // Emit ".cfi_def_cfa_offset Offset" + unsigned CFIIndex = + MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, Offset)); + BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex) + .setMIFlag(MachineInstr::FrameSetup); + } + } + + return; + } + + // Emit a variable-length allocation probing loop. + uint64_t RoundedSize = alignDown(Offset, ProbeSize); + uint64_t Residual = Offset - RoundedSize; + + Register TargetReg = RISCV::X6; + // SUB TargetReg, SP, RoundedSize + RI->adjustReg(MBB, MBBI, DL, TargetReg, SPReg, + StackOffset::getFixed(-RoundedSize), MachineInstr::FrameSetup, getStackAlign()); if (EmitCFI) { - // Emit ".cfi_def_cfa_offset RealStackSize" - unsigned CFIIndex = MF.addFrameInst( - MCCFIInstruction::cfiDefCfaOffset(nullptr, RealStackSize)); + // Set the CFA register to TargetReg. + unsigned Reg = STI.getRegisterInfo()->getDwarfRegNum(TargetReg, true); + unsigned CFIIndex = + MF.addFrameInst(MCCFIInstruction::cfiDefCfa(nullptr, Reg, RoundedSize)); BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex) - .setMIFlag(MachineInstr::FrameSetup); + .setMIFlags(MachineInstr::FrameSetup); + } + + // It will be expanded to a probe loop in `inlineStackProbe`. + BuildMI(MBB, MBBI, DL, TII->get(RISCV::PROBED_STACKALLOC)) + .addReg(SPReg) + .addReg(TargetReg); + + if (EmitCFI) { + // Set the CFA register back to SP. + unsigned Reg = STI.getRegisterInfo()->getDwarfRegNum(SPReg, true); + unsigned CFIIndex = + MF.addFrameInst(MCCFIInstruction::createDefCfaRegister(nullptr, Reg)); + BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex) + .setMIFlags(MachineInstr::FrameSetup); + } + + if (Residual) + RI->adjustReg(MBB, MBBI, DL, SPReg, SPReg, StackOffset::getFixed(-Residual), + MachineInstr::FrameSetup, getStackAlign()); + + if (EmitCFI) { + // Emit ".cfi_def_cfa_offset Offset" + unsigned CFIIndex = + MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, Offset)); + BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex) + .setMIFlags(MachineInstr::FrameSetup); } } @@ -716,11 +815,14 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF, getPushOrLibCallsSavedInfo(MF, CSI)); } - if (StackSize != 0) { - // Allocate space on the stack if necessary. - allocateStack(MBB, MBBI, MF, StackOffset::getFixed(-StackSize), - RealStackSize, /*EmitCFI=*/true); - } + // Allocate space on the stack if necessary. + auto &Subtarget = MF.getSubtarget<RISCVSubtarget>(); + const RISCVTargetLowering *TLI = Subtarget.getTargetLowering(); + bool NeedProbe = TLI->hasInlineStackProbe(MF); + uint64_t ProbeSize = TLI->getStackProbeSize(MF, getStackAlign()); + if (StackSize != 0) + allocateStack(MBB, MBBI, MF, StackSize, RealStackSize, /*EmitCFI=*/true, + NeedProbe, ProbeSize); // The frame pointer is callee-saved, and code has been generated for us to // save it to the stack. We need to skip over the storing of callee-saved @@ -761,8 +863,9 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF, assert(SecondSPAdjustAmount > 0 && "SecondSPAdjustAmount should be greater than zero"); - allocateStack(MBB, MBBI, MF, StackOffset::getFixed(-SecondSPAdjustAmount), - getStackSizeWithRVVPadding(MF), !hasFP(MF)); + allocateStack(MBB, MBBI, MF, SecondSPAdjustAmount, + getStackSizeWithRVVPadding(MF), !hasFP(MF), NeedProbe, + ProbeSize); } if (RVVStackSize) { @@ -1910,3 +2013,69 @@ bool RISCVFrameLowering::isSupportedStackID(TargetStackID::Value ID) const { TargetStackID::Value RISCVFrameLowering::getStackIDForScalableVectors() const { return TargetStackID::ScalableVector; } + +// Synthesize the probe loop. +static void emitStackProbeInline(MachineFunction &MF, MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + DebugLoc DL) { + + auto &Subtarget = MF.getSubtarget<RISCVSubtarget>(); + const RISCVInstrInfo *TII = Subtarget.getInstrInfo(); + bool IsRV64 = Subtarget.is64Bit(); + Align StackAlign = Subtarget.getFrameLowering()->getStackAlign(); + const RISCVTargetLowering *TLI = Subtarget.getTargetLowering(); + uint64_t ProbeSize = TLI->getStackProbeSize(MF, StackAlign); + + MachineFunction::iterator MBBInsertPoint = std::next(MBB.getIterator()); + MachineBasicBlock *LoopTestMBB = + MF.CreateMachineBasicBlock(MBB.getBasicBlock()); + MF.insert(MBBInsertPoint, LoopTestMBB); + MachineBasicBlock *ExitMBB = MF.CreateMachineBasicBlock(MBB.getBasicBlock()); + MF.insert(MBBInsertPoint, ExitMBB); + MachineInstr::MIFlag Flags = MachineInstr::FrameSetup; + Register TargetReg = RISCV::X6; + Register ScratchReg = RISCV::X7; + + // ScratchReg = ProbeSize + TII->movImm(MBB, MBBI, DL, ScratchReg, ProbeSize, Flags); + + // LoopTest: + // SUB SP, SP, ProbeSize + BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL, TII->get(RISCV::SUB), SPReg) + .addReg(SPReg) + .addReg(ScratchReg) + .setMIFlags(Flags); + + // s[d|w] zero, 0(sp) + BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL, + TII->get(IsRV64 ? RISCV::SD : RISCV::SW)) + .addReg(RISCV::X0) + .addReg(SPReg) + .addImm(0) + .setMIFlags(Flags); + + // BNE SP, TargetReg, LoopTest + BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL, TII->get(RISCV::BNE)) + .addReg(SPReg) + .addReg(TargetReg) + .addMBB(LoopTestMBB) + .setMIFlags(Flags); + + ExitMBB->splice(ExitMBB->end(), &MBB, std::next(MBBI), MBB.end()); + + LoopTestMBB->addSuccessor(ExitMBB); + LoopTestMBB->addSuccessor(LoopTestMBB); + MBB.addSuccessor(LoopTestMBB); +} + +void RISCVFrameLowering::inlineStackProbe(MachineFunction &MF, + MachineBasicBlock &MBB) const { + auto Where = llvm::find_if(MBB, [](MachineInstr &MI) { + return MI.getOpcode() == RISCV::PROBED_STACKALLOC; + }); + if (Where != MBB.end()) { + DebugLoc DL = MBB.findDebugLoc(Where); + emitStackProbeInline(MF, MBB, Where, DL); + Where->eraseFromParent(); + } +} diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.h b/llvm/lib/Target/RISCV/RISCVFrameLowering.h index ac0c805c744d63..190c063d9d3b5d 100644 --- a/llvm/lib/Target/RISCV/RISCVFrameLowering.h +++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.h @@ -79,8 +79,9 @@ class RISCVFrameLowering : public TargetFrameLowering { } void allocateStack(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, - MachineFunction &MF, StackOffset Offset, - uint64_t RealStackSize, bool EmitCFI) const; + MachineFunction &MF, uint64_t Offset, + uint64_t RealStackSize, bool EmitCFI, bool NeedProbe, + uint64_t ProbeSize) const; protected: const RISCVSubtarget &STI; @@ -103,6 +104,9 @@ class RISCVFrameLowering : public TargetFrameLowering { std::pair<int64_t, Align> assignRVVStackObjectOffsets(MachineFunction &MF) const; + // Replace a StackProbe stub (if any) with the actual probe code inline + void inlineStackProbe(MachineFunction &MF, + MachineBasicBlock &PrologueMBB) const override; }; } // namespace llvm #endif diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 46dedcc3e09cf2..c6838573637202 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -22350,3 +22350,25 @@ namespace llvm::RISCVVIntrinsicsTable { #include "RISCVGenSearchableTables.inc" } // namespace llvm::RISCVVIntrinsicsTable + +bool RISCVTargetLowering::hasInlineStackProbe(const MachineFunction &MF) const { + + // If the function specifically requests inline stack probes, emit them. + if (MF.getFunction().hasFnAttribute("probe-stack")) + return MF.getFunction().getFnAttribute("probe-stack").getValueAsString() == + "inline-asm"; + + return false; +} + +unsigned RISCVTargetLowering::getStackProbeSize(const MachineFunction &MF, + Align StackAlign) const { + // The default stack probe size is 4096 if the function has no + // stack-probe-size attribute. + const Function &Fn = MF.getFunction(); + unsigned StackProbeSize = + Fn.getFnAttributeAsParsedInteger("stack-probe-size", 4096); + // Round down to the stack alignment. + StackProbeSize = alignDown(StackProbeSize, StackAlign.value()); + return StackProbeSize ? StackProbeSize : StackAlign.value(); +} diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h index bb0d9a71abf7e6..778e38a1a834ee 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -919,6 +919,11 @@ class RISCVTargetLowering : public TargetLowering { MachineBasicBlock::instr_iterator &MBBI, const TargetInstrInfo *TII) const override; + /// True if stack clash protection is enabled for this functions. + bool hasInlineStackProbe(const MachineFunction &MF) const override; + + unsigned getStackProbeSize(const MachineFunction &MF, Align StackAlign) const; + private: void analyzeInputArgs(MachineFunction &MF, CCState &CCInfo, const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet, diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/llvm/lib/Target/RISCV/RISCVInstrInfo.td index cad9f5e3790be1..14b571cebe1fec 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td @@ -1373,6 +1373,17 @@ def PseudoAddTPRel : Pseudo<(outs GPR:$rd), def : Pat<(FrameAddrRegImm (iPTR GPR:$rs1), simm12:$imm12), (ADDI GPR:$rs1, simm12:$imm12)>; +/// Stack probing + +let hasSideEffects = 1, mayLoad = 1, mayStore = 1, isCodeGenOnly = 1 in { +// Probed stack allocation of a constant size, used in function prologues when +// stack-clash protection is enabled. +def PROBED_STACKALLOC : Pseudo<(outs GPR:$sp), + (ins GPR:$scratch), + []>, + Sched<[]>; +} + /// HI and ADD_LO address nodes. // Pseudo for a rematerializable LUI+ADDI sequence for loading an address. diff --git a/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.cpp b/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.cpp index d0c363042f5118..a0d79317638184 100644 --- a/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.cpp @@ -11,6 +11,7 @@ //===----------------------------------------------------------------------===// #include "RISCVMachineFunctionInfo.h" +#include "llvm/IR/Module.h" using namespace llvm; @@ -26,6 +27,35 @@ MachineFunctionInfo *RISCVMachineFunctionInfo::clone( return DestMF.cloneInfo<RISCVMachineFunctionInfo>(*this); } +RISCVMachineFunctionInfo::RISCVMachineFunctionInfo(const Function &F, + const RISCVSubtarget *STI) { + + // The default stack probe size is 4096 if the function has no + // stack-probe-size attribute. This is a safe default because it is the + // smallest possible guard page size. + uint64_t ProbeSize = 4096; + if (F.hasFnAttribute("stack-probe-size")) + ProbeSize = F.getFnAttributeAsParsedInteger("stack-probe-size"); + else if (const auto *PS = mdconst::extract_or_null<ConstantInt>( + F.getParent()->getModuleFlag("stack-probe-size"))) + ProbeSize = PS->getZExtValue(); + assert(int64_t(ProbeSize) > 0 && "Invalid stack probe size"); + + // Round down to the stack alignment. + uint64_t StackAlign = + STI->getFrameLowering()->getTransientStackAlign().value(); + ProbeSize = std::max(StackAlign, alignDown(ProbeSize, StackAlign)); + StringRef ProbeKind; + if (F.hasFnAttribute("probe-stack")) + ProbeKind = F.getFnAttribute("probe-stack").getValueAsString(); + else if (const auto *PS = dyn_cast_or_null<MDString>( + F.getParent()->getModuleFlag("probe-stack"))) + ProbeKind = PS->getString(); + if (ProbeKind.size()) { + StackProbeSize = ProbeSize; + } +} + void yaml::RISCVMachineFunctionInfo::mappingImpl(yaml::IO &YamlIO) { MappingTraits<RISCVMachineFunctionInfo>::mapping(YamlIO, *this); } diff --git a/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.h b/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.h index 779c652b4d8fc4..8909f2f3bd3170 100644 --- a/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.h +++ b/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.h @@ -76,8 +76,10 @@ class RISCVMachineFunctionInfo : public MachineFunctionInfo { unsigned RVPushRegs = 0; int RVPushRlist = llvm::RISCVZC::RLISTENCODE::INVALID_RLIST; + int64_t StackProbeSize = 0; + public: - RISCVMachineFunctionInfo(const Function &F, const TargetSubtargetInfo *STI) {} + RISCVMachineFunctionInfo(const Function &F, const RISCVSubtarget *STI); MachineFunctionInfo * clone(BumpPtrAllocator &Allocator, MachineFunction &DestMF, diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp index c5847d8bfacb42..dcd3598f658f6a 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp @@ -271,8 +271,8 @@ RISCVTargetMachine::getSubtargetImpl(const Function &F) const { MachineFunctionInfo *RISCVTargetMachine::createMachineFunctionInfo( BumpPtrAllocator &Allocator, const Function &F, const TargetSubtargetInfo *STI) const { - return RISCVMachineFunctionInfo::create<RISCVMachineFunctionInfo>(Allocator, - F, STI); + return RISCVMachineFunctionInfo::create<RISCVMachineFunctionInfo>( + Allocator, F, static_cast<const RISCVSubtarget *>(STI)); } TargetTransformInfo diff --git a/llvm/test/CodeGen/RISCV/stack-clash-prologue-nounwind.ll b/llvm/test/CodeGen/RISCV/stack-clash-prologue-nounwind.ll new file mode 100644 index 00000000000000..3b2d7f1f9a8ea4 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/stack-clash-prologue-nounwind.ll @@ -0,0 +1,345 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv64 -mattr=+m -O2 < %s \ +; RUN: | FileCheck %s -check-prefix=RV64I +; RUN: llc -mtriple=riscv32 -mattr=+m -O2 < %s \ +; RUN: | FileCheck %s -check-prefix=RV32I + +; Tests copied from PowerPC. + +; Free probe +define i8 @f0() #0 nounwind { +; RV64I-LABEL: f0: +; RV64I: # %bb.0: # %entry +; RV64I-NEXT: addi sp, sp, -64 +; RV64I-NEXT: li a0, 3 +; RV64I-NEXT: sb a0, 0(sp) +; RV64I-NEXT: lbu a0, 0(sp) +; RV64I-NEXT: addi sp, sp, 64 +; RV64I-NEXT: ret +; +; RV32I-LABEL: f0: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: addi sp, sp, -64 +; RV32I-NEXT: li a0, 3 +; RV32I-NEXT: sb a0, 0(sp) +; RV32I-NEXT: lbu a0, 0(sp) +; RV32I-NEXT: addi sp, sp, 64 +; RV32I-NEXT: ret +entry: + %a = alloca i8, i64 64 + %b = getelementptr inbounds i8, ptr %a, i64 63 + store volatile i8 3, ptr %a + %c = load volatile i8, ptr %a + ret i8 %c +} + +define i8 @f1() #0 nounwind { +; RV64I-LABEL: f1: +; RV64I: # %bb.0: # %entry +; RV64I-NEXT: lui a0, 1 +; RV64I-NEXT: sub sp, sp, a0 +; RV64I-NEXT: sd zero, 0(sp) +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: li a0, 3 +; RV64I-NEXT: sb a0, 16(sp) +; RV64I-NEXT: lbu a0, 16(sp) +; RV64I-NEXT: lui a1, 1 +; RV64I-NEXT: addiw a1, a1, 16 +; RV64I-NEXT: add sp, sp, a1 +; RV64I-NEXT: ret +; +; RV32I-LABEL: f1: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: lui a0, 1 +; RV32I-NEXT: sub sp, sp, a0 +; RV32I-NEXT: sw zero, 0(sp) +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: li a0, 3 +; RV32I-NEXT: sb a0, 16(sp) +; RV32I-NEXT: lbu a0, 16(sp) +; RV32I-NEXT: lui a1, 1 +; RV32I-NEXT: addi a1, a1, 16 +; RV32I-NEXT: add sp, sp, a1 +; RV32I-NEXT: ret +entry: + %a = alloca i8, i64 4096 + %b = getelementptr inbounds i8, ptr %a, i64 63 + store volatile i8 3, ptr %a + %c = load volatile i8, ptr %a + ret i8 %c +} + +define i8 @f2() #0 nounwind { +; RV64I-LABEL: f2: +; RV64I: # %bb.0: # %entry +; RV64I-NEXT: lui a0, 16 +; RV64I-NEXT: sub t1, sp, a0 +; RV64I-NEXT: lui t2, 1 +; RV64I-NEXT: .LBB2_1: # %entry +; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 +; RV64I-NEXT: sub sp, sp, t2 +; RV64I-NEXT: sd zero, 0(sp) +; RV64I-NEXT: bne sp, t1, .LBB2_1 +; RV64I-NEXT: # %bb.2: # %entry +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: li a0, 3 +; RV64I-NEXT: sb a0, 16(sp) +; RV64I-NEXT: lbu a0, 16(sp) +; RV64I-NEXT: lui a1, 16 +; RV64I-NEXT: addiw a1, a1, 16 +; RV64I-NEXT: add sp, sp, a1 +; RV64I-NEXT: ret +; +; RV32I-LABEL: f2: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: lui a0, 16 +; RV32I-NEXT: sub t1, sp, a0 +; RV32I-NEXT: lui t2, 1 +; RV32I-NEXT: .LBB2_1: # %entry +; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 +; RV32I-NEXT: sub sp, sp, t2 +; RV32I-NEXT: sw zero, 0(sp) +; RV32I-NEXT: bne sp, t1, .LBB2_1 +; RV32I-NEXT: # %bb.2: # %entry +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: li a0, 3 +; RV32I-NEXT: sb a0, 16(sp) +; RV32I-NEXT: lbu a0, 16(sp) +; RV32I-NEXT: lui a1, 16 +; RV32I-NEXT: addi a1, a1, 16 +; RV32I-NEXT: add sp, sp, a1 +; RV32I-NEXT: ret +entry: + %a = alloca i8, i64 65536 + %b = getelementptr inbounds i8, ptr %a, i64 63 + store volatile i8 3, ptr %a + %c = load volatile i8, ptr %a + ret i8 %c +} + +define i8 @f3() #0 "stack-probe-size"="32768" nounwind { +; RV64I-LABEL: f3: +; RV64I: # %bb.0: # %entry +; RV64I-NEXT: lui a0, 8 +; RV64I-NEXT: sub sp, sp, a0 +; RV64I-NEXT: sd zero, 0(sp) +; RV64I-NEXT: lui a0, 8 +; RV64I-NEXT: sub sp, sp, a0 +; RV64I-NEXT: sd zero, 0(sp) +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: li a0, 3 +; RV64I-NEXT: sb a0, 16(sp) +; RV64I-NEXT: lbu a0, 16(sp) +; RV64I-NEXT: lui a1, 16 +; RV64I-NEXT: addiw a1, a1, 16 +; RV64I-NEXT: add sp, sp, a1 +; RV64I-NEXT: ret +; +; RV32I-LABEL: f3: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: lui a0, 8 +; RV32I-NEXT: sub sp, sp, a0 +; RV32I-NEXT: sw zero, 0(sp) +; RV32I-NEXT: lui a0, 8 +; RV32I-NEXT: sub sp, sp, a0 +; RV32I-NEXT: sw zero, 0(sp) +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: li a0, 3 +; RV32I-NEXT: sb a0, 16(sp) +; RV32I-NEXT: lbu a0, 16(sp) +; RV32I-NEXT: lui a1, 16 +; RV32I-NEXT: addi a1, a1, 16 +; RV32I-NEXT: add sp, sp, a1 +; RV32I-NEXT: ret +entry: + %a = alloca i8, i64 65536 + %b = getelementptr inbounds i8, ptr %a, i64 63 + store volatile i8 3, ptr %a + %c = load volatile i8, ptr %a + ret i8 %c +} + +; Same as f2, but without protection. +define i8 @f4() nounwind { +; RV64I-LABEL: f4: +; RV64I: # %bb.0: # %entry +; RV64I-NEXT: lui a0, 16 +; RV64I-NEXT: addiw a0, a0, 16 +; RV64I-NEXT: sub sp, sp, a0 +; RV64I-NEXT: li a0, 3 +; RV64I-NEXT: sb a0, 16(sp) +; RV64I-NEXT: lbu a0, 16(sp) +; RV64I-NEXT: lui a1, 16 +; RV64I-NEXT: addiw a1, a1, 16 +; RV64I-NEXT: add sp, sp, a1 +; RV64I-NEXT: ret +; +; RV32I-LABEL: f4: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: lui a0, 16 +; RV32I-NEXT: addi a0, a0, 16 +; RV32I-NEXT: sub sp, sp, a0 +; RV32I-NEXT: li a0, 3 +; RV32I-NEXT: sb a0, 16(sp) +; RV32I-NEXT: lbu a0, 16(sp) +; RV32I-NEXT: lui a1, 16 +; RV32I-NEXT: addi a1, a1, 16 +; RV32I-NEXT: add sp, sp, a1 +; RV32I-NEXT: ret +entry: + %a = alloca i8, i64 65536 + %b = getelementptr inbounds i8, ptr %a, i64 63 + store volatile i8 3, ptr %a + %c = load volatile i8, ptr %a + ret i8 %c +} + +define i8 @f5() #0 "stack-probe-size"="65536" nounwind { +; RV64I-LABEL: f5: +; RV64I: # %bb.0: # %entry +; RV64I-NEXT: lui a0, 256 +; RV64I-NEXT: sub t1, sp, a0 +; RV64I-NEXT: lui t2, 16 +; RV64I-NEXT: .LBB5_1: # %entry +; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 +; RV64I-NEXT: sub sp, sp, t2 +; RV64I-NEXT: sd zero, 0(sp) +; RV64I-NEXT: bne sp, t1, .LBB5_1 +; RV64I-NEXT: # %bb.2: # %entry +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: li a0, 3 +; RV64I-NEXT: sb a0, 16(sp) +; RV64I-NEXT: lbu a0, 16(sp) +; RV64I-NEXT: lui a1, 256 +; RV64I-NEXT: addiw a1, a1, 16 +; RV64I-NEXT: add sp, sp, a1 +; RV64I-NEXT: ret +; +; RV32I-LABEL: f5: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: lui a0, 256 +; RV32I-NEXT: sub t1, sp, a0 +; RV32I-NEXT: lui t2, 16 +; RV32I-NEXT: .LBB5_1: # %entry +; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 +; RV32I-NEXT: sub sp, sp, t2 +; RV32I-NEXT: sw zero, 0(sp) +; RV32I-NEXT: bne sp, t1, .LBB5_1 +; RV32I-NEXT: # %bb.2: # %entry +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: li a0, 3 +; RV32I-NEXT: sb a0, 16(sp) +; RV32I-NEXT: lbu a0, 16(sp) +; RV32I-NEXT: lui a1, 256 +; RV32I-NEXT: addi a1, a1, 16 +; RV32I-NEXT: add sp, sp, a1 +; RV32I-NEXT: ret +entry: + %a = alloca i8, i64 1048576 + %b = getelementptr inbounds i8, ptr %a, i64 63 + store volatile i8 3, ptr %a + %c = load volatile i8, ptr %a + ret i8 %c +} + +define i8 @f6() #0 nounwind { +; RV64I-LABEL: f6: +; RV64I: # %bb.0: # %entry +; RV64I-NEXT: lui a0, 262144 +; RV64I-NEXT: sub t1, sp, a0 +; RV64I-NEXT: lui t2, 1 +; RV64I-NEXT: .LBB6_1: # %entry +; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 +; RV64I-NEXT: sub sp, sp, t2 +; RV64I-NEXT: sd zero, 0(sp) +; RV64I-NEXT: bne sp, t1, .LBB6_1 +; RV64I-NEXT: # %bb.2: # %entry +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: li a0, 3 +; RV64I-NEXT: sb a0, 16(sp) +; RV64I-NEXT: lbu a0, 16(sp) +; RV64I-NEXT: lui a1, 262144 +; RV64I-NEXT: addiw a1, a1, 16 +; RV64I-NEXT: add sp, sp, a1 +; RV64I-NEXT: ret +; +; RV32I-LABEL: f6: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: lui a0, 262144 +; RV32I-NEXT: sub t1, sp, a0 +; RV32I-NEXT: lui t2, 1 +; RV32I-NEXT: .LBB6_1: # %entry +; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 +; RV32I-NEXT: sub sp, sp, t2 +; RV32I-NEXT: sw zero, 0(sp) +; RV32I-NEXT: bne sp, t1, .LBB6_1 +; RV32I-NEXT: # %bb.2: # %entry +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: li a0, 3 +; RV32I-NEXT: sb a0, 16(sp) +; RV32I-NEXT: lbu a0, 16(sp) +; RV32I-NEXT: lui a1, 262144 +; RV32I-NEXT: addi a1, a1, 16 +; RV32I-NEXT: add sp, sp, a1 +; RV32I-NEXT: ret +entry: + %a = alloca i8, i64 1073741824 + %b = getelementptr inbounds i8, ptr %a, i64 63 + store volatile i8 3, ptr %a + %c = load volatile i8, ptr %a + ret i8 %c +} + +define i8 @f7() #0 "stack-probe-size"="65536" nounwind { +; RV64I-LABEL: f7: +; RV64I: # %bb.0: # %entry +; RV64I-NEXT: lui a0, 244128 +; RV64I-NEXT: sub t1, sp, a0 +; RV64I-NEXT: lui t2, 16 +; RV64I-NEXT: .LBB7_1: # %entry +; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 +; RV64I-NEXT: sub sp, sp, t2 +; RV64I-NEXT: sd zero, 0(sp) +; RV64I-NEXT: bne sp, t1, .LBB7_1 +; RV64I-NEXT: # %bb.2: # %entry +; RV64I-NEXT: lui a0, 13 +; RV64I-NEXT: addiw a0, a0, -1520 +; RV64I-NEXT: sub sp, sp, a0 +; RV64I-NEXT: li a0, 3 +; RV64I-NEXT: sb a0, 9(sp) +; RV64I-NEXT: lbu a0, 9(sp) +; RV64I-NEXT: lui a1, 244141 +; RV64I-NEXT: addiw a1, a1, -1520 +; RV64I-NEXT: add sp, sp, a1 +; RV64I-NEXT: ret +; +; RV32I-LABEL: f7: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: lui a0, 244128 +; RV32I-NEXT: sub t1, sp, a0 +; RV32I-NEXT: lui t2, 16 +; RV32I-NEXT: .LBB7_1: # %entry +; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 +; RV32I-NEXT: sub sp, sp, t2 +; RV32I-NEXT: sw zero, 0(sp) +; RV32I-NEXT: bne sp, t1, .LBB7_1 +; RV32I-NEXT: # %bb.2: # %entry +; RV32I-NEXT: lui a0, 13 +; RV32I-NEXT: addi a0, a0, -1520 +; RV32I-NEXT: sub sp, sp, a0 +; RV32I-NEXT: li a0, 3 +; RV32I-NEXT: sb a0, 9(sp) +; RV32I-NEXT: lbu a0, 9(sp) +; RV32I-NEXT: lui a1, 244141 +; RV32I-NEXT: addi a1, a1, -1520 +; RV32I-NEXT: add sp, sp, a1 +; RV32I-NEXT: ret +entry: + %a = alloca i8, i64 1000000007 + %b = getelementptr inbounds i8, ptr %a, i64 101 + store volatile i8 3, ptr %a + %c = load volatile i8, ptr %a + ret i8 %c +} + +attributes #0 = { "probe-stack"="inline-asm" } diff --git a/llvm/test/CodeGen/RISCV/stack-clash-prologue.ll b/llvm/test/CodeGen/RISCV/stack-clash-prologue.ll new file mode 100644 index 00000000000000..18af080e86747b --- /dev/null +++ b/llvm/test/CodeGen/RISCV/stack-clash-prologue.ll @@ -0,0 +1,541 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv64 -mattr=+m -O2 < %s \ +; RUN: | FileCheck %s -check-prefix=RV64I +; RUN: llc -mtriple=riscv32 -mattr=+m -O2 < %s \ +; RUN: | FileCheck %s -check-prefix=RV32I + +; Tests copied from PowerPC. + +; Free probe +define i8 @f0() #0 { +; RV64I-LABEL: f0: +; RV64I: # %bb.0: # %entry +; RV64I-NEXT: addi sp, sp, -64 +; RV64I-NEXT: .cfi_def_cfa_offset 64 +; RV64I-NEXT: li a0, 3 +; RV64I-NEXT: sb a0, 0(sp) +; RV64I-NEXT: lbu a0, 0(sp) +; RV64I-NEXT: addi sp, sp, 64 +; RV64I-NEXT: .cfi_def_cfa_offset 0 +; RV64I-NEXT: ret +; +; RV32I-LABEL: f0: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: addi sp, sp, -64 +; RV32I-NEXT: .cfi_def_cfa_offset 64 +; RV32I-NEXT: li a0, 3 +; RV32I-NEXT: sb a0, 0(sp) +; RV32I-NEXT: lbu a0, 0(sp) +; RV32I-NEXT: addi sp, sp, 64 +; RV32I-NEXT: .cfi_def_cfa_offset 0 +; RV32I-NEXT: ret +entry: + %a = alloca i8, i64 64 + %b = getelementptr inbounds i8, ptr %a, i64 63 + store volatile i8 3, ptr %a + %c = load volatile i8, ptr %a + ret i8 %c +} + +define i8 @f1() #0 { +; RV64I-LABEL: f1: +; RV64I: # %bb.0: # %entry +; RV64I-NEXT: lui a0, 1 +; RV64I-NEXT: sub sp, sp, a0 +; RV64I-NEXT: sd zero, 0(sp) +; RV64I-NEXT: .cfi_def_cfa_offset 4096 +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: .cfi_def_cfa_offset 4112 +; RV64I-NEXT: li a0, 3 +; RV64I-NEXT: sb a0, 16(sp) +; RV64I-NEXT: lbu a0, 16(sp) +; RV64I-NEXT: lui a1, 1 +; RV64I-NEXT: addiw a1, a1, 16 +; RV64I-NEXT: add sp, sp, a1 +; RV64I-NEXT: .cfi_def_cfa_offset 0 +; RV64I-NEXT: ret +; +; RV32I-LABEL: f1: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: lui a0, 1 +; RV32I-NEXT: sub sp, sp, a0 +; RV32I-NEXT: sw zero, 0(sp) +; RV32I-NEXT: .cfi_def_cfa_offset 4096 +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: .cfi_def_cfa_offset 4112 +; RV32I-NEXT: li a0, 3 +; RV32I-NEXT: sb a0, 16(sp) +; RV32I-NEXT: lbu a0, 16(sp) +; RV32I-NEXT: lui a1, 1 +; RV32I-NEXT: addi a1, a1, 16 +; RV32I-NEXT: add sp, sp, a1 +; RV32I-NEXT: .cfi_def_cfa_offset 0 +; RV32I-NEXT: ret +entry: + %a = alloca i8, i64 4096 + %b = getelementptr inbounds i8, ptr %a, i64 63 + store volatile i8 3, ptr %a + %c = load volatile i8, ptr %a + ret i8 %c +} + +define i8 @f2() #0 { +; RV64I-LABEL: f2: +; RV64I: # %bb.0: # %entry +; RV64I-NEXT: lui a0, 16 +; RV64I-NEXT: sub t1, sp, a0 +; RV64I-NEXT: .cfi_def_cfa t1, 65536 +; RV64I-NEXT: lui t2, 1 +; RV64I-NEXT: .LBB2_1: # %entry +; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 +; RV64I-NEXT: sub sp, sp, t2 +; RV64I-NEXT: sd zero, 0(sp) +; RV64I-NEXT: bne sp, t1, .LBB2_1 +; RV64I-NEXT: # %bb.2: # %entry +; RV64I-NEXT: .cfi_def_cfa_register sp +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: .cfi_def_cfa_offset 65552 +; RV64I-NEXT: li a0, 3 +; RV64I-NEXT: sb a0, 16(sp) +; RV64I-NEXT: lbu a0, 16(sp) +; RV64I-NEXT: lui a1, 16 +; RV64I-NEXT: addiw a1, a1, 16 +; RV64I-NEXT: add sp, sp, a1 +; RV64I-NEXT: .cfi_def_cfa_offset 0 +; RV64I-NEXT: ret +; +; RV32I-LABEL: f2: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: lui a0, 16 +; RV32I-NEXT: sub t1, sp, a0 +; RV32I-NEXT: .cfi_def_cfa t1, 65536 +; RV32I-NEXT: lui t2, 1 +; RV32I-NEXT: .LBB2_1: # %entry +; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 +; RV32I-NEXT: sub sp, sp, t2 +; RV32I-NEXT: sw zero, 0(sp) +; RV32I-NEXT: bne sp, t1, .LBB2_1 +; RV32I-NEXT: # %bb.2: # %entry +; RV32I-NEXT: .cfi_def_cfa_register sp +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: .cfi_def_cfa_offset 65552 +; RV32I-NEXT: li a0, 3 +; RV32I-NEXT: sb a0, 16(sp) +; RV32I-NEXT: lbu a0, 16(sp) +; RV32I-NEXT: lui a1, 16 +; RV32I-NEXT: addi a1, a1, 16 +; RV32I-NEXT: add sp, sp, a1 +; RV32I-NEXT: .cfi_def_cfa_offset 0 +; RV32I-NEXT: ret +entry: + %a = alloca i8, i64 65536 + %b = getelementptr inbounds i8, ptr %a, i64 63 + store volatile i8 3, ptr %a + %c = load volatile i8, ptr %a + ret i8 %c +} + +define i8 @f3() #0 "stack-probe-size"="32768" { +; RV64I-LABEL: f3: +; RV64I: # %bb.0: # %entry +; RV64I-NEXT: lui a0, 8 +; RV64I-NEXT: sub sp, sp, a0 +; RV64I-NEXT: sd zero, 0(sp) +; RV64I-NEXT: .cfi_def_cfa_offset 32768 +; RV64I-NEXT: lui a0, 8 +; RV64I-NEXT: sub sp, sp, a0 +; RV64I-NEXT: sd zero, 0(sp) +; RV64I-NEXT: .cfi_def_cfa_offset 65536 +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: .cfi_def_cfa_offset 65552 +; RV64I-NEXT: li a0, 3 +; RV64I-NEXT: sb a0, 16(sp) +; RV64I-NEXT: lbu a0, 16(sp) +; RV64I-NEXT: lui a1, 16 +; RV64I-NEXT: addiw a1, a1, 16 +; RV64I-NEXT: add sp, sp, a1 +; RV64I-NEXT: .cfi_def_cfa_offset 0 +; RV64I-NEXT: ret +; +; RV32I-LABEL: f3: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: lui a0, 8 +; RV32I-NEXT: sub sp, sp, a0 +; RV32I-NEXT: sw zero, 0(sp) +; RV32I-NEXT: .cfi_def_cfa_offset 32768 +; RV32I-NEXT: lui a0, 8 +; RV32I-NEXT: sub sp, sp, a0 +; RV32I-NEXT: sw zero, 0(sp) +; RV32I-NEXT: .cfi_def_cfa_offset 65536 +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: .cfi_def_cfa_offset 65552 +; RV32I-NEXT: li a0, 3 +; RV32I-NEXT: sb a0, 16(sp) +; RV32I-NEXT: lbu a0, 16(sp) +; RV32I-NEXT: lui a1, 16 +; RV32I-NEXT: addi a1, a1, 16 +; RV32I-NEXT: add sp, sp, a1 +; RV32I-NEXT: .cfi_def_cfa_offset 0 +; RV32I-NEXT: ret +entry: + %a = alloca i8, i64 65536 + %b = getelementptr inbounds i8, ptr %a, i64 63 + store volatile i8 3, ptr %a + %c = load volatile i8, ptr %a + ret i8 %c +} + +; Same as f2, but without protection. +define i8 @f4() { +; RV64I-LABEL: f4: +; RV64I: # %bb.0: # %entry +; RV64I-NEXT: lui a0, 16 +; RV64I-NEXT: addiw a0, a0, 16 +; RV64I-NEXT: sub sp, sp, a0 +; RV64I-NEXT: .cfi_def_cfa_offset 65552 +; RV64I-NEXT: li a0, 3 +; RV64I-NEXT: sb a0, 16(sp) +; RV64I-NEXT: lbu a0, 16(sp) +; RV64I-NEXT: lui a1, 16 +; RV64I-NEXT: addiw a1, a1, 16 +; RV64I-NEXT: add sp, sp, a1 +; RV64I-NEXT: .cfi_def_cfa_offset 0 +; RV64I-NEXT: ret +; +; RV32I-LABEL: f4: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: lui a0, 16 +; RV32I-NEXT: addi a0, a0, 16 +; RV32I-NEXT: sub sp, sp, a0 +; RV32I-NEXT: .cfi_def_cfa_offset 65552 +; RV32I-NEXT: li a0, 3 +; RV32I-NEXT: sb a0, 16(sp) +; RV32I-NEXT: lbu a0, 16(sp) +; RV32I-NEXT: lui a1, 16 +; RV32I-NEXT: addi a1, a1, 16 +; RV32I-NEXT: add sp, sp, a1 +; RV32I-NEXT: .cfi_def_cfa_offset 0 +; RV32I-NEXT: ret +entry: + %a = alloca i8, i64 65536 + %b = getelementptr inbounds i8, ptr %a, i64 63 + store volatile i8 3, ptr %a + %c = load volatile i8, ptr %a + ret i8 %c +} + +define i8 @f5() #0 "stack-probe-size"="65536" { +; RV64I-LABEL: f5: +; RV64I: # %bb.0: # %entry +; RV64I-NEXT: lui a0, 256 +; RV64I-NEXT: sub t1, sp, a0 +; RV64I-NEXT: .cfi_def_cfa t1, 1048576 +; RV64I-NEXT: lui t2, 16 +; RV64I-NEXT: .LBB5_1: # %entry +; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 +; RV64I-NEXT: sub sp, sp, t2 +; RV64I-NEXT: sd zero, 0(sp) +; RV64I-NEXT: bne sp, t1, .LBB5_1 +; RV64I-NEXT: # %bb.2: # %entry +; RV64I-NEXT: .cfi_def_cfa_register sp +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: .cfi_def_cfa_offset 1048592 +; RV64I-NEXT: li a0, 3 +; RV64I-NEXT: sb a0, 16(sp) +; RV64I-NEXT: lbu a0, 16(sp) +; RV64I-NEXT: lui a1, 256 +; RV64I-NEXT: addiw a1, a1, 16 +; RV64I-NEXT: add sp, sp, a1 +; RV64I-NEXT: .cfi_def_cfa_offset 0 +; RV64I-NEXT: ret +; +; RV32I-LABEL: f5: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: lui a0, 256 +; RV32I-NEXT: sub t1, sp, a0 +; RV32I-NEXT: .cfi_def_cfa t1, 1048576 +; RV32I-NEXT: lui t2, 16 +; RV32I-NEXT: .LBB5_1: # %entry +; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 +; RV32I-NEXT: sub sp, sp, t2 +; RV32I-NEXT: sw zero, 0(sp) +; RV32I-NEXT: bne sp, t1, .LBB5_1 +; RV32I-NEXT: # %bb.2: # %entry +; RV32I-NEXT: .cfi_def_cfa_register sp +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: .cfi_def_cfa_offset 1048592 +; RV32I-NEXT: li a0, 3 +; RV32I-NEXT: sb a0, 16(sp) +; RV32I-NEXT: lbu a0, 16(sp) +; RV32I-NEXT: lui a1, 256 +; RV32I-NEXT: addi a1, a1, 16 +; RV32I-NEXT: add sp, sp, a1 +; RV32I-NEXT: .cfi_def_cfa_offset 0 +; RV32I-NEXT: ret +entry: + %a = alloca i8, i64 1048576 + %b = getelementptr inbounds i8, ptr %a, i64 63 + store volatile i8 3, ptr %a + %c = load volatile i8, ptr %a + ret i8 %c +} + +define i8 @f6() #0 { +; RV64I-LABEL: f6: +; RV64I: # %bb.0: # %entry +; RV64I-NEXT: lui a0, 262144 +; RV64I-NEXT: sub t1, sp, a0 +; RV64I-NEXT: .cfi_def_cfa t1, 1073741824 +; RV64I-NEXT: lui t2, 1 +; RV64I-NEXT: .LBB6_1: # %entry +; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 +; RV64I-NEXT: sub sp, sp, t2 +; RV64I-NEXT: sd zero, 0(sp) +; RV64I-NEXT: bne sp, t1, .LBB6_1 +; RV64I-NEXT: # %bb.2: # %entry +; RV64I-NEXT: .cfi_def_cfa_register sp +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: .cfi_def_cfa_offset 1073741840 +; RV64I-NEXT: li a0, 3 +; RV64I-NEXT: sb a0, 16(sp) +; RV64I-NEXT: lbu a0, 16(sp) +; RV64I-NEXT: lui a1, 262144 +; RV64I-NEXT: addiw a1, a1, 16 +; RV64I-NEXT: add sp, sp, a1 +; RV64I-NEXT: .cfi_def_cfa_offset 0 +; RV64I-NEXT: ret +; +; RV32I-LABEL: f6: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: lui a0, 262144 +; RV32I-NEXT: sub t1, sp, a0 +; RV32I-NEXT: .cfi_def_cfa t1, 1073741824 +; RV32I-NEXT: lui t2, 1 +; RV32I-NEXT: .LBB6_1: # %entry +; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 +; RV32I-NEXT: sub sp, sp, t2 +; RV32I-NEXT: sw zero, 0(sp) +; RV32I-NEXT: bne sp, t1, .LBB6_1 +; RV32I-NEXT: # %bb.2: # %entry +; RV32I-NEXT: .cfi_def_cfa_register sp +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: .cfi_def_cfa_offset 1073741840 +; RV32I-NEXT: li a0, 3 +; RV32I-NEXT: sb a0, 16(sp) +; RV32I-NEXT: lbu a0, 16(sp) +; RV32I-NEXT: lui a1, 262144 +; RV32I-NEXT: addi a1, a1, 16 +; RV32I-NEXT: add sp, sp, a1 +; RV32I-NEXT: .cfi_def_cfa_offset 0 +; RV32I-NEXT: ret +entry: + %a = alloca i8, i64 1073741824 + %b = getelementptr inbounds i8, ptr %a, i64 63 + store volatile i8 3, ptr %a + %c = load volatile i8, ptr %a + ret i8 %c +} + +define i8 @f7() #0 "stack-probe-size"="65536" { +; RV64I-LABEL: f7: +; RV64I: # %bb.0: # %entry +; RV64I-NEXT: lui a0, 244128 +; RV64I-NEXT: sub t1, sp, a0 +; RV64I-NEXT: .cfi_def_cfa t1, 999948288 +; RV64I-NEXT: lui t2, 16 +; RV64I-NEXT: .LBB7_1: # %entry +; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 +; RV64I-NEXT: sub sp, sp, t2 +; RV64I-NEXT: sd zero, 0(sp) +; RV64I-NEXT: bne sp, t1, .LBB7_1 +; RV64I-NEXT: # %bb.2: # %entry +; RV64I-NEXT: .cfi_def_cfa_register sp +; RV64I-NEXT: lui a0, 13 +; RV64I-NEXT: addiw a0, a0, -1520 +; RV64I-NEXT: sub sp, sp, a0 +; RV64I-NEXT: .cfi_def_cfa_offset 1000000016 +; RV64I-NEXT: li a0, 3 +; RV64I-NEXT: sb a0, 9(sp) +; RV64I-NEXT: lbu a0, 9(sp) +; RV64I-NEXT: lui a1, 244141 +; RV64I-NEXT: addiw a1, a1, -1520 +; RV64I-NEXT: add sp, sp, a1 +; RV64I-NEXT: .cfi_def_cfa_offset 0 +; RV64I-NEXT: ret +; +; RV32I-LABEL: f7: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: lui a0, 244128 +; RV32I-NEXT: sub t1, sp, a0 +; RV32I-NEXT: .cfi_def_cfa t1, 999948288 +; RV32I-NEXT: lui t2, 16 +; RV32I-NEXT: .LBB7_1: # %entry +; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 +; RV32I-NEXT: sub sp, sp, t2 +; RV32I-NEXT: sw zero, 0(sp) +; RV32I-NEXT: bne sp, t1, .LBB7_1 +; RV32I-NEXT: # %bb.2: # %entry +; RV32I-NEXT: .cfi_def_cfa_register sp +; RV32I-NEXT: lui a0, 13 +; RV32I-NEXT: addi a0, a0, -1520 +; RV32I-NEXT: sub sp, sp, a0 +; RV32I-NEXT: .cfi_def_cfa_offset 1000000016 +; RV32I-NEXT: li a0, 3 +; RV32I-NEXT: sb a0, 9(sp) +; RV32I-NEXT: lbu a0, 9(sp) +; RV32I-NEXT: lui a1, 244141 +; RV32I-NEXT: addi a1, a1, -1520 +; RV32I-NEXT: add sp, sp, a1 +; RV32I-NEXT: .cfi_def_cfa_offset 0 +; RV32I-NEXT: ret +entry: + %a = alloca i8, i64 1000000007 + %b = getelementptr inbounds i8, ptr %a, i64 101 + store volatile i8 3, ptr %a + %c = load volatile i8, ptr %a + ret i8 %c +} + +; alloca + align < probe_size +define i32 @f8(i64 %i) local_unnamed_addr #0 { +; RV64I-LABEL: f8: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -832 +; RV64I-NEXT: .cfi_def_cfa_offset 832 +; RV64I-NEXT: sd ra, 824(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s0, 816(sp) # 8-byte Folded Spill +; RV64I-NEXT: .cfi_offset ra, -8 +; RV64I-NEXT: .cfi_offset s0, -16 +; RV64I-NEXT: addi s0, sp, 832 +; RV64I-NEXT: .cfi_def_cfa s0, 0 +; RV64I-NEXT: andi sp, sp, -64 +; RV64I-NEXT: slli a0, a0, 2 +; RV64I-NEXT: mv a1, sp +; RV64I-NEXT: add a0, a1, a0 +; RV64I-NEXT: li a1, 1 +; RV64I-NEXT: sw a1, 0(a0) +; RV64I-NEXT: lw a0, 0(sp) +; RV64I-NEXT: addi sp, s0, -832 +; RV64I-NEXT: .cfi_def_cfa sp, 832 +; RV64I-NEXT: ld ra, 824(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s0, 816(sp) # 8-byte Folded Reload +; RV64I-NEXT: .cfi_restore ra +; RV64I-NEXT: .cfi_restore s0 +; RV64I-NEXT: addi sp, sp, 832 +; RV64I-NEXT: .cfi_def_cfa_offset 0 +; RV64I-NEXT: ret +; +; RV32I-LABEL: f8: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -832 +; RV32I-NEXT: .cfi_def_cfa_offset 832 +; RV32I-NEXT: sw ra, 828(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s0, 824(sp) # 4-byte Folded Spill +; RV32I-NEXT: .cfi_offset ra, -4 +; RV32I-NEXT: .cfi_offset s0, -8 +; RV32I-NEXT: addi s0, sp, 832 +; RV32I-NEXT: .cfi_def_cfa s0, 0 +; RV32I-NEXT: andi sp, sp, -64 +; RV32I-NEXT: slli a0, a0, 2 +; RV32I-NEXT: mv a1, sp +; RV32I-NEXT: add a0, a1, a0 +; RV32I-NEXT: li a1, 1 +; RV32I-NEXT: sw a1, 0(a0) +; RV32I-NEXT: lw a0, 0(sp) +; RV32I-NEXT: addi sp, s0, -832 +; RV32I-NEXT: .cfi_def_cfa sp, 832 +; RV32I-NEXT: lw ra, 828(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s0, 824(sp) # 4-byte Folded Reload +; RV32I-NEXT: .cfi_restore ra +; RV32I-NEXT: .cfi_restore s0 +; RV32I-NEXT: addi sp, sp, 832 +; RV32I-NEXT: .cfi_def_cfa_offset 0 +; RV32I-NEXT: ret + %a = alloca i32, i32 200, align 64 + %b = getelementptr inbounds i32, ptr %a, i64 %i + store volatile i32 1, ptr %b + %c = load volatile i32, ptr %a + ret i32 %c +} + +; alloca > probe_size, align > probe_size +define i32 @f9(i64 %i) local_unnamed_addr #0 { +; RV64I-LABEL: f9: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -2032 +; RV64I-NEXT: .cfi_def_cfa_offset 2032 +; RV64I-NEXT: sd ra, 2024(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s0, 2016(sp) # 8-byte Folded Spill +; RV64I-NEXT: .cfi_offset ra, -8 +; RV64I-NEXT: .cfi_offset s0, -16 +; RV64I-NEXT: addi s0, sp, 2032 +; RV64I-NEXT: .cfi_def_cfa s0, 0 +; RV64I-NEXT: lui a1, 1 +; RV64I-NEXT: sub sp, sp, a1 +; RV64I-NEXT: sd zero, 0(sp) +; RV64I-NEXT: sub sp, sp, a1 +; RV64I-NEXT: sd zero, 0(sp) +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: andi sp, sp, -2048 +; RV64I-NEXT: slli a0, a0, 2 +; RV64I-NEXT: addi a1, sp, 2047 +; RV64I-NEXT: addi a1, a1, 1 +; RV64I-NEXT: add a0, a1, a0 +; RV64I-NEXT: li a1, 1 +; RV64I-NEXT: sw a1, 0(a0) +; RV64I-NEXT: lui a0, 1 +; RV64I-NEXT: add a0, sp, a0 +; RV64I-NEXT: lw a0, -2048(a0) +; RV64I-NEXT: addi sp, s0, -2032 +; RV64I-NEXT: .cfi_def_cfa sp, 2032 +; RV64I-NEXT: ld ra, 2024(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s0, 2016(sp) # 8-byte Folded Reload +; RV64I-NEXT: .cfi_restore ra +; RV64I-NEXT: .cfi_restore s0 +; RV64I-NEXT: addi sp, sp, 2032 +; RV64I-NEXT: .cfi_def_cfa_offset 0 +; RV64I-NEXT: ret +; +; RV32I-LABEL: f9: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -2032 +; RV32I-NEXT: .cfi_def_cfa_offset 2032 +; RV32I-NEXT: sw ra, 2028(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s0, 2024(sp) # 4-byte Folded Spill +; RV32I-NEXT: .cfi_offset ra, -4 +; RV32I-NEXT: .cfi_offset s0, -8 +; RV32I-NEXT: addi s0, sp, 2032 +; RV32I-NEXT: .cfi_def_cfa s0, 0 +; RV32I-NEXT: lui a1, 1 +; RV32I-NEXT: sub sp, sp, a1 +; RV32I-NEXT: sw zero, 0(sp) +; RV32I-NEXT: sub sp, sp, a1 +; RV32I-NEXT: sw zero, 0(sp) +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: andi sp, sp, -2048 +; RV32I-NEXT: slli a0, a0, 2 +; RV32I-NEXT: addi a1, sp, 2047 +; RV32I-NEXT: addi a1, a1, 1 +; RV32I-NEXT: add a0, a1, a0 +; RV32I-NEXT: li a1, 1 +; RV32I-NEXT: sw a1, 0(a0) +; RV32I-NEXT: lui a0, 1 +; RV32I-NEXT: add a0, sp, a0 +; RV32I-NEXT: lw a0, -2048(a0) +; RV32I-NEXT: addi sp, s0, -2032 +; RV32I-NEXT: .cfi_def_cfa sp, 2032 +; RV32I-NEXT: lw ra, 2028(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s0, 2024(sp) # 4-byte Folded Reload +; RV32I-NEXT: .cfi_restore ra +; RV32I-NEXT: .cfi_restore s0 +; RV32I-NEXT: addi sp, sp, 2032 +; RV32I-NEXT: .cfi_def_cfa_offset 0 +; RV32I-NEXT: ret + %a = alloca i32, i32 2000, align 2048 + %b = getelementptr inbounds i32, ptr %a, i64 %i + store volatile i32 1, ptr %b + %c = load volatile i32, ptr %a + ret i32 %c +} + +attributes #0 = { "probe-stack"="inline-asm" } _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits