llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-backend-aarch64 Author: Sander de Smalen (sdesmalen-arm) <details> <summary>Changes</summary> f314e12 uses `requiresSaveVG` which was introduced in 334a366ba792, which is also missing from the release/19.x branch. I figured it made sense to cherry-pick that one as well. --- Full diff: https://github.com/llvm/llvm-project/pull/117695.diff 4 Files Affected: - (modified) llvm/lib/Target/AArch64/AArch64FrameLowering.cpp (+19-12) - (modified) llvm/lib/Target/AArch64/AArch64ISelLowering.cpp (+11-8) - (added) llvm/test/CodeGen/AArch64/sme-darwin-no-sve-vg.ll (+161) - (modified) llvm/test/CodeGen/AArch64/sme-vg-to-stack.ll (+38) ``````````diff diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp index 87e057a468afd6..9d16c92b3fea5c 100644 --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -1394,6 +1394,18 @@ bool requiresGetVGCall(MachineFunction &MF) { !MF.getSubtarget<AArch64Subtarget>().hasSVE(); } +static bool requiresSaveVG(MachineFunction &MF) { + AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); + // For Darwin platforms we don't save VG for non-SVE functions, even if SME + // is enabled with streaming mode changes. + if (!AFI->hasStreamingModeChanges()) + return false; + auto &ST = MF.getSubtarget<AArch64Subtarget>(); + if (ST.isTargetDarwin()) + return ST.hasSVE(); + return true; +} + bool isVGInstruction(MachineBasicBlock::iterator MBBI) { unsigned Opc = MBBI->getOpcode(); if (Opc == AArch64::CNTD_XPiI || Opc == AArch64::RDSVLI_XI || @@ -1430,8 +1442,7 @@ static MachineBasicBlock::iterator convertCalleeSaveRestoreToSPPrePostIncDec( // functions, we need to do this for both the streaming and non-streaming // vector length. Move past these instructions if necessary. MachineFunction &MF = *MBB.getParent(); - AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); - if (AFI->hasStreamingModeChanges()) + if (requiresSaveVG(MF)) while (isVGInstruction(MBBI)) ++MBBI; @@ -1936,12 +1947,9 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF, // pointer bump above. while (MBBI != End && MBBI->getFlag(MachineInstr::FrameSetup) && !IsSVECalleeSave(MBBI)) { - // Move past instructions generated to calculate VG - if (AFI->hasStreamingModeChanges()) - while (isVGInstruction(MBBI)) - ++MBBI; - - if (CombineSPBump) + if (CombineSPBump && + // Only fix-up frame-setup load/store instructions. + (!requiresSaveVG(MF) || !isVGInstruction(MBBI))) fixupCalleeSaveRestoreStackOffset(*MBBI, AFI->getLocalStackSize(), NeedsWinCFI, &HasWinCFI); ++MBBI; @@ -3720,7 +3728,7 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF, // non-streaming VG value. const Function &F = MF.getFunction(); SMEAttrs Attrs(F); - if (AFI->hasStreamingModeChanges()) { + if (requiresSaveVG(MF)) { if (Attrs.hasStreamingBody() && !Attrs.hasStreamingInterface()) CSStackSize += 16; else @@ -3873,7 +3881,7 @@ bool AArch64FrameLowering::assignCalleeSavedSpillSlots( } // Insert VG into the list of CSRs, immediately before LR if saved. - if (AFI->hasStreamingModeChanges()) { + if (requiresSaveVG(MF)) { std::vector<CalleeSavedInfo> VGSaves; SMEAttrs Attrs(MF.getFunction()); @@ -4602,10 +4610,9 @@ MachineBasicBlock::iterator emitVGSaveRestore(MachineBasicBlock::iterator II, void AArch64FrameLowering::processFunctionBeforeFrameIndicesReplaced( MachineFunction &MF, RegScavenger *RS = nullptr) const { - AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); for (auto &BB : MF) for (MachineBasicBlock::iterator II = BB.begin(); II != BB.end();) { - if (AFI->hasStreamingModeChanges()) + if (requiresSaveVG(MF)) II = emitVGSaveRestore(II, this); if (StackTaggingMergeSetTag) II = tryMergeAdjacentSTG(II, this, RS); diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 62078822c89b18..ef2789e96213b5 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -8732,10 +8732,11 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI, SDValue InGlue; if (RequiresSMChange) { - - Chain = DAG.getNode(AArch64ISD::VG_SAVE, DL, - DAG.getVTList(MVT::Other, MVT::Glue), Chain); - InGlue = Chain.getValue(1); + if (!Subtarget->isTargetDarwin() || Subtarget->hasSVE()) { + Chain = DAG.getNode(AArch64ISD::VG_SAVE, DL, + DAG.getVTList(MVT::Other, MVT::Glue), Chain); + InGlue = Chain.getValue(1); + } SDValue NewChain = changeStreamingMode( DAG, DL, CalleeAttrs.hasStreamingInterface(), Chain, InGlue, @@ -8914,11 +8915,13 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI, Result = changeStreamingMode( DAG, DL, !CalleeAttrs.hasStreamingInterface(), Result, InGlue, getSMCondition(CallerAttrs, CalleeAttrs), PStateSM); - InGlue = Result.getValue(1); - Result = - DAG.getNode(AArch64ISD::VG_RESTORE, DL, - DAG.getVTList(MVT::Other, MVT::Glue), {Result, InGlue}); + if (!Subtarget->isTargetDarwin() || Subtarget->hasSVE()) { + InGlue = Result.getValue(1); + Result = + DAG.getNode(AArch64ISD::VG_RESTORE, DL, + DAG.getVTList(MVT::Other, MVT::Glue), {Result, InGlue}); + } } if (CallerAttrs.requiresEnablingZAAfterCall(CalleeAttrs)) diff --git a/llvm/test/CodeGen/AArch64/sme-darwin-no-sve-vg.ll b/llvm/test/CodeGen/AArch64/sme-darwin-no-sve-vg.ll new file mode 100644 index 00000000000000..36a300fea25e5a --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sme-darwin-no-sve-vg.ll @@ -0,0 +1,161 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -o - %s | FileCheck %s +target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128-Fn32" +target triple = "arm64-apple-macosx14.0.0" + +; Check we don't crash on Darwin and that we don't try to save VG +; when only SME (and not SVE) is enabled. + +; Function Attrs: mustprogress norecurse nounwind ssp uwtable(sync) +define noundef i32 @main() local_unnamed_addr #0 { +; CHECK-LABEL: main: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: stp d15, d14, [sp, #-80]! ; 16-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 80 +; CHECK-NEXT: stp d13, d12, [sp, #16] ; 16-byte Folded Spill +; CHECK-NEXT: stp d11, d10, [sp, #32] ; 16-byte Folded Spill +; CHECK-NEXT: stp d9, d8, [sp, #48] ; 16-byte Folded Spill +; CHECK-NEXT: stp x29, x30, [sp, #64] ; 16-byte Folded Spill +; CHECK-NEXT: .cfi_offset w30, -8 +; CHECK-NEXT: .cfi_offset w29, -16 +; CHECK-NEXT: .cfi_offset b8, -24 +; CHECK-NEXT: .cfi_offset b9, -32 +; CHECK-NEXT: .cfi_offset b10, -40 +; CHECK-NEXT: .cfi_offset b11, -48 +; CHECK-NEXT: .cfi_offset b12, -56 +; CHECK-NEXT: .cfi_offset b13, -64 +; CHECK-NEXT: .cfi_offset b14, -72 +; CHECK-NEXT: .cfi_offset b15, -80 +; CHECK-NEXT: smstart sm +; CHECK-NEXT: bl __ZL9sme_crashv +; CHECK-NEXT: smstop sm +; CHECK-NEXT: mov w0, #0 ; =0x0 +; CHECK-NEXT: ldp x29, x30, [sp, #64] ; 16-byte Folded Reload +; CHECK-NEXT: ldp d9, d8, [sp, #48] ; 16-byte Folded Reload +; CHECK-NEXT: ldp d11, d10, [sp, #32] ; 16-byte Folded Reload +; CHECK-NEXT: ldp d13, d12, [sp, #16] ; 16-byte Folded Reload +; CHECK-NEXT: ldp d15, d14, [sp], #80 ; 16-byte Folded Reload +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w30 +; CHECK-NEXT: .cfi_restore w29 +; CHECK-NEXT: .cfi_restore b8 +; CHECK-NEXT: .cfi_restore b9 +; CHECK-NEXT: .cfi_restore b10 +; CHECK-NEXT: .cfi_restore b11 +; CHECK-NEXT: .cfi_restore b12 +; CHECK-NEXT: .cfi_restore b13 +; CHECK-NEXT: .cfi_restore b14 +; CHECK-NEXT: .cfi_restore b15 +; CHECK-NEXT: ret +entry: + tail call fastcc void @_ZL9sme_crashv() #4 + ret i32 0 +} + +; Function Attrs: mustprogress norecurse nounwind ssp uwtable(sync) +define internal fastcc void @_ZL9sme_crashv() unnamed_addr #1 { +; CHECK-LABEL: _ZL9sme_crashv: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: stp d15, d14, [sp, #-96]! ; 16-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 96 +; CHECK-NEXT: stp d13, d12, [sp, #16] ; 16-byte Folded Spill +; CHECK-NEXT: stp d11, d10, [sp, #32] ; 16-byte Folded Spill +; CHECK-NEXT: stp d9, d8, [sp, #48] ; 16-byte Folded Spill +; CHECK-NEXT: stp x28, x27, [sp, #64] ; 16-byte Folded Spill +; CHECK-NEXT: stp x29, x30, [sp, #80] ; 16-byte Folded Spill +; CHECK-NEXT: add x29, sp, #80 +; CHECK-NEXT: .cfi_def_cfa w29, 16 +; CHECK-NEXT: .cfi_offset w30, -8 +; CHECK-NEXT: .cfi_offset w29, -16 +; CHECK-NEXT: .cfi_offset w27, -24 +; CHECK-NEXT: .cfi_offset w28, -32 +; CHECK-NEXT: .cfi_offset b8, -40 +; CHECK-NEXT: .cfi_offset b9, -48 +; CHECK-NEXT: .cfi_offset b10, -56 +; CHECK-NEXT: .cfi_offset b11, -64 +; CHECK-NEXT: .cfi_offset b12, -72 +; CHECK-NEXT: .cfi_offset b13, -80 +; CHECK-NEXT: .cfi_offset b14, -88 +; CHECK-NEXT: .cfi_offset b15, -96 +; CHECK-NEXT: .cfi_remember_state +; CHECK-NEXT: sub x9, sp, #160 +; CHECK-NEXT: and sp, x9, #0xffffffffffffff00 +; CHECK-NEXT: Lloh0: +; CHECK-NEXT: adrp x8, ___stack_chk_guard@GOTPAGE +; CHECK-NEXT: Lloh1: +; CHECK-NEXT: ldr x8, [x8, ___stack_chk_guard@GOTPAGEOFF] +; CHECK-NEXT: Lloh2: +; CHECK-NEXT: ldr x8, [x8] +; CHECK-NEXT: str x8, [sp, #152] +; CHECK-NEXT: mov z0.b, #0 ; =0x0 +; CHECK-NEXT: stp q0, q0, [sp, #32] +; CHECK-NEXT: stp q0, q0, [sp] +; CHECK-NEXT: mov x8, sp +; CHECK-NEXT: ; InlineAsm Start +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: st1w { z0.s }, p0, [x8] +; CHECK-EMPTY: +; CHECK-NEXT: ; InlineAsm End +; CHECK-NEXT: ldr x8, [sp, #152] +; CHECK-NEXT: Lloh3: +; CHECK-NEXT: adrp x9, ___stack_chk_guard@GOTPAGE +; CHECK-NEXT: Lloh4: +; CHECK-NEXT: ldr x9, [x9, ___stack_chk_guard@GOTPAGEOFF] +; CHECK-NEXT: Lloh5: +; CHECK-NEXT: ldr x9, [x9] +; CHECK-NEXT: cmp x9, x8 +; CHECK-NEXT: b.ne LBB1_2 +; CHECK-NEXT: ; %bb.1: ; %entry +; CHECK-NEXT: sub sp, x29, #80 +; CHECK-NEXT: .cfi_def_cfa wsp, 96 +; CHECK-NEXT: ldp x29, x30, [sp, #80] ; 16-byte Folded Reload +; CHECK-NEXT: ldp x28, x27, [sp, #64] ; 16-byte Folded Reload +; CHECK-NEXT: ldp d9, d8, [sp, #48] ; 16-byte Folded Reload +; CHECK-NEXT: ldp d11, d10, [sp, #32] ; 16-byte Folded Reload +; CHECK-NEXT: ldp d13, d12, [sp, #16] ; 16-byte Folded Reload +; CHECK-NEXT: ldp d15, d14, [sp], #96 ; 16-byte Folded Reload +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w30 +; CHECK-NEXT: .cfi_restore w29 +; CHECK-NEXT: .cfi_restore w27 +; CHECK-NEXT: .cfi_restore w28 +; CHECK-NEXT: .cfi_restore b8 +; CHECK-NEXT: .cfi_restore b9 +; CHECK-NEXT: .cfi_restore b10 +; CHECK-NEXT: .cfi_restore b11 +; CHECK-NEXT: .cfi_restore b12 +; CHECK-NEXT: .cfi_restore b13 +; CHECK-NEXT: .cfi_restore b14 +; CHECK-NEXT: .cfi_restore b15 +; CHECK-NEXT: ret +; CHECK-NEXT: LBB1_2: ; %entry +; CHECK-NEXT: .cfi_restore_state +; CHECK-NEXT: smstop sm +; CHECK-NEXT: bl ___stack_chk_fail +; CHECK-NEXT: smstart sm +; CHECK-NEXT: .loh AdrpLdrGotLdr Lloh3, Lloh4, Lloh5 +; CHECK-NEXT: .loh AdrpLdrGotLdr Lloh0, Lloh1, Lloh2 +entry: + %uu = alloca [16 x float], align 256 + call void @llvm.lifetime.start.p0(i64 64, ptr nonnull %uu) #5 + call void @llvm.memset.p0.i64(ptr noundef nonnull align 256 dereferenceable(64) %uu, i8 0, i64 64, i1 false) + call void asm sideeffect "ptrue p0.s\0Ast1w { z0.s }, p0, [$0]\0A", "r"(ptr nonnull %uu) #5 + call void @llvm.lifetime.end.p0(i64 64, ptr nonnull %uu) #5 + ret void +} + +; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) +declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #2 + +; Function Attrs: mustprogress nocallback nofree nounwind willreturn memory(argmem: write) +declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg) #3 + +; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) +declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #2 + +attributes #0 = { mustprogress norecurse nounwind ssp uwtable(sync) "stack-protector-buffer-size"="8" "target-cpu"="apple-a16" "target-features"="+sme,+sme-f64f64,+sme2" } +attributes #1 = { mustprogress norecurse nounwind ssp uwtable(sync) "aarch64_pstate_sm_enabled" "stack-protector-buffer-size"="8" "target-cpu"="apple-a16" "target-features"="+sme,+sme-f64f64,+sme2" } +attributes #2 = { mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) } +attributes #3 = { mustprogress nocallback nofree nounwind willreturn memory(argmem: write) } +attributes #4 = { "aarch64_pstate_sm_enabled" "no-builtin-calloc" "no-builtin-stpcpy" } +attributes #5 = { nounwind } diff --git a/llvm/test/CodeGen/AArch64/sme-vg-to-stack.ll b/llvm/test/CodeGen/AArch64/sme-vg-to-stack.ll index fa8f92cb0a2c99..38666a05c20f8c 100644 --- a/llvm/test/CodeGen/AArch64/sme-vg-to-stack.ll +++ b/llvm/test/CodeGen/AArch64/sme-vg-to-stack.ll @@ -1102,6 +1102,44 @@ define void @streaming_compatible_no_sve(i32 noundef %x) #4 { ret void } +; The algorithm that fixes up the offsets of the callee-save/restore +; instructions must jump over the instructions that instantiate the current +; 'VG' value. We must make sure that it doesn't consider any RDSVL in +; user-code as if it is part of the frame-setup when doing so. +define void @test_rdsvl_right_after_prologue(i64 %x0) nounwind { +; NO-SVE-CHECK-LABEL: test_rdsvl_right_after_prologue: +; NO-SVE-CHECK: // %bb.0: +; NO-SVE-CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill +; NO-SVE-CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill +; NO-SVE-CHECK-NEXT: mov x9, x0 +; NO-SVE-CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill +; NO-SVE-CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill +; NO-SVE-CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill +; NO-SVE-CHECK-NEXT: bl __arm_get_current_vg +; NO-SVE-CHECK-NEXT: str x0, [sp, #80] // 8-byte Folded Spill +; NO-SVE-CHECK-NEXT: mov x0, x9 +; NO-SVE-CHECK-NEXT: rdsvl x8, #1 +; NO-SVE-CHECK-NEXT: add x29, sp, #64 +; NO-SVE-CHECK-NEXT: lsr x8, x8, #3 +; NO-SVE-CHECK-NEXT: mov x1, x0 +; NO-SVE-CHECK-NEXT: smstart sm +; NO-SVE-CHECK-NEXT: mov x0, x8 +; NO-SVE-CHECK-NEXT: bl bar +; NO-SVE-CHECK-NEXT: smstop sm +; NO-SVE-CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload +; NO-SVE-CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload +; NO-SVE-CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload +; NO-SVE-CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload +; NO-SVE-CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload +; NO-SVE-CHECK-NEXT: ret + %some_alloc = alloca i64, align 8 + %rdsvl = tail call i64 @llvm.aarch64.sme.cntsd() + call void @bar(i64 %rdsvl, i64 %x0) "aarch64_pstate_sm_enabled" + ret void +} + +declare void @bar(i64, i64) + ; Ensure we still emit async unwind information with -fno-asynchronous-unwind-tables ; if the function contains a streaming-mode change. `````````` </details> https://github.com/llvm/llvm-project/pull/117695 _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits