================ @@ -9460,6 +9461,94 @@ bool AArch64InstrInfo::isReallyTriviallyReMaterializable( return TargetInstrInfo::isReallyTriviallyReMaterializable(MI); } +MachineBasicBlock::iterator +AArch64InstrInfo::insertStackProbingLoop(MachineBasicBlock::iterator MBBI, + Register ScratchReg, + Register TargetReg) const { + MachineBasicBlock &MBB = *MBBI->getParent(); + MachineFunction &MF = *MBB.getParent(); + const AArch64InstrInfo *TII = + MF.getSubtarget<AArch64Subtarget>().getInstrInfo(); + int64_t ProbeSize = MF.getInfo<AArch64FunctionInfo>()->getStackProbeSize(); + DebugLoc DL = MBB.findDebugLoc(MBBI); + + MachineFunction::iterator MBBInsertPoint = std::next(MBB.getIterator()); + MachineBasicBlock *LoopTestMBB = + MF.CreateMachineBasicBlock(MBB.getBasicBlock()); + MF.insert(MBBInsertPoint, LoopTestMBB); + MachineBasicBlock *LoopBodyMBB = + MF.CreateMachineBasicBlock(MBB.getBasicBlock()); + MF.insert(MBBInsertPoint, LoopBodyMBB); + MachineBasicBlock *ExitMBB = MF.CreateMachineBasicBlock(MBB.getBasicBlock()); + MF.insert(MBBInsertPoint, ExitMBB); + + // LoopTest: + // SUB ScratchReg, ScratchReg, #ProbeSize + emitFrameOffset(*LoopTestMBB, LoopTestMBB->end(), DL, ScratchReg, ScratchReg, + StackOffset::getFixed(-ProbeSize), TII, + MachineInstr::FrameSetup); + + // CMP ScratchReg, TargetReg + AArch64CC::CondCode Cond = AArch64CC::LE; + Register Op1 = ScratchReg; + Register Op2 = TargetReg; + if (Op2 == AArch64::SP) { + assert(Op1 != AArch64::SP && "At most one of the registers can be SP"); + // CMP TargetReg, ScratchReg + std::swap(Op1, Op2); + Cond = AArch64CC::GT; + } + BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL, TII->get(AArch64::SUBSXrx64), + AArch64::XZR) + .addReg(Op1) + .addReg(Op2) + .addImm(AArch64_AM::getArithExtendImm(AArch64_AM::UXTX, 0)) + .setMIFlags(MachineInstr::FrameSetup); + + // B.<Cond> LoopExit + BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL, TII->get(AArch64::Bcc)) + .addImm(Cond) + .addMBB(ExitMBB) + .setMIFlags(MachineInstr::FrameSetup); + + // STR XZR, [ScratchReg] + BuildMI(*LoopBodyMBB, LoopBodyMBB->end(), DL, TII->get(AArch64::STRXui)) + .addReg(AArch64::XZR) + .addReg(ScratchReg) + .addImm(0) + .setMIFlags(MachineInstr::FrameSetup); + + // B loop + BuildMI(*LoopBodyMBB, LoopBodyMBB->end(), DL, TII->get(AArch64::B)) + .addMBB(LoopTestMBB) + .setMIFlags(MachineInstr::FrameSetup); + + // LoopExit: + // STR XZR, [TargetReg] + BuildMI(*ExitMBB, ExitMBB->begin(), DL, TII->get(AArch64::STRXui)) + .addReg(AArch64::XZR) + .addReg(TargetReg) + .addImm(0) + .setMIFlags(MachineInstr::FrameSetup); ---------------- momchil-velikov wrote:
> ``` > sub sp, sp, #0x1, lsl #0xc > cmp sp, x1 > b.le 0x5555557388 > str xzr, [x1] {0x0} > ``` > > We are probing the _old_ stack head! `x1` contains `0x7fffffee80` but `sp` is > at `7fffffde80`! This means that the selection of the `x1` register instead > of `sp` is incorrect. I can't quite see how it is possible to generate this code. This is part of the sequence for allocating a compile time unknown amount of stack space that is done by `AArch64InstrInfo::insertStackProbingLoop`. In this function `TargetReg` is the new top of the stack and right now [1] `ScratchReg` is always `AArch64::SP` . Thus first we have ``` // LoopTest: // SUB ScratchReg, ScratchReg, #ProbeSize emitFrameOffset(*LoopTestMBB, LoopTestMBB->end(), DL, ScratchReg, ScratchReg, StackOffset::getFixed(-ProbeSize), TII, MachineInstr::FrameSetup); ``` This is the code the emits the ` su sp, sp, #0x1, lsl #0xc`. Note, it uses `ScratchReg`. Then we emit the compare ``` // CMP ScratchReg, TargetReg AArch64CC::CondCode Cond = AArch64CC::LE; Register Op1 = ScratchReg; Register Op2 = TargetReg; if (Op2 == AArch64::SP) { // condition is false here // ... } BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL, TII->get(AArch64::SUBSXrx64), AArch64::XZR) .addReg(Op1) .addReg(Op2) .addImm(AArch64_AM::getArithExtendImm(AArch64_AM::UXTX, 0)) .setMIFlags(MachineInstr::FrameSetup); ``` That is the `cmp sp, x1`. So, `Op2` is `TargetReg` and `TargetReg` is `x1`. Then we emit the loop exit branch: ``` // B.<Cond> LoopExit BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL, TII->get(AArch64::Bcc)) .addImm(Cond) .addMBB(ExitMBB) .setMIFlags(MachineInstr::FrameSetup); ``` This is the `b.le 0x5555557388` above. and then, still inside the probing loop, we emit a stack probe to `ScratchReg`, i.e. to `SP`. ``` // STR XZR, [ScratchReg] BuildMI(*LoopBodyMBB, LoopBodyMBB->end(), DL, TII->get(AArch64::STRXui)) .addReg(AArch64::XZR) .addReg(ScratchReg) .addImm(0) .setMIFlags(MachineInstr::FrameSetup); ``` However, in your assembly snippet I see ` str xzr, [x1] {0x0}`, i.e. a store to `TargetReg`. That looks impossible to happen with the current source. That part of the source that you changed is the final probe to `TargetReg` after the loop ``` // B loop BuildMI(*LoopBodyMBB, LoopBodyMBB->end(), DL, TII->get(AArch64::B)) .addMBB(LoopTestMBB) .setMIFlags(MachineInstr::FrameSetup); // LoopExit: // STR XZR, [TargetReg] BuildMI(*ExitMBB, ExitMBB->begin(), DL, TII->get(AArch64::STRXui)) .addReg(AArch64::XZR) .addReg(TargetReg) .addImm(0) .setMIFlags(MachineInstr::FrameSetup); ``` You can see, it's preceded by an unconditional branch, so it's not the code that generates the stack probe instruction in you assembly snippet. Having said that, this is indeed a bug!!! Although it might not be the same bug you're seeing. The instruction should issue a probe to the new top of the stack. And it indeed does so, in the sense it writes to the correct address. **However, the stack pointer should have been already set to that address.** It is set immediately after that instruction, but it should be set before that instruction. I'll prepare a fix (may take 3-4 days) and then we'll see. Thanks for looking at it, much appreciated!!! [1] the function is slightly more flexible. https://github.com/llvm/llvm-project/pull/66524 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits