Author: Benjamin Maxwell Date: 2025-12-17T10:14:53Z New Revision: 90adfb2774d2815f86ee2c8ceef05f547e8f2dd9
URL: https://github.com/llvm/llvm-project/commit/90adfb2774d2815f86ee2c8ceef05f547e8f2dd9 DIFF: https://github.com/llvm/llvm-project/commit/90adfb2774d2815f86ee2c8ceef05f547e8f2dd9.diff LOG: [AArch64][SME] Add pass remarks to the MachineSMEABIPass (#170277) This patch implements SME ABI pass remarks for when: * a lazy ZA save is set up * a full/agnostic ZA save is set up * a ZT0 spill is emitted These remarks can be enabled with `--pass-remarks-analysis=sme` (like the current SME remarks). The remarks work by noting where the save/spill was emitted, then pointing out calls later in the function that require the save. The remarks stop at the first call found along any path. For example: ``` 7: void merge_paths(bool a) __arm_inout("za") { 8: if (a) 9: private_za_callee_a(); 10: else 11: private_za_callee_b(); 12: private_za_callee_c(); // no remark (does not require a new save) 13: } ``` Results in the following remarks: ``` remark: foo.c:8:7: lazy save of ZA emitted in 'merge_paths' remark: foo.c:9:5: call to 'private_za_callee_a' requires ZA save remark: foo.c:11:5: call to 'private_za_callee_b' requires ZA save ``` Added: clang/test/CodeGen/AArch64/sme-remarks.c llvm/test/CodeGen/AArch64/sme-abi-save-call-remarks.ll Modified: llvm/lib/Target/AArch64/MachineSMEABIPass.cpp Removed: llvm/test/CodeGen/AArch64/sme-lazy-save-call-remarks.ll ################################################################################ diff --git a/clang/test/CodeGen/AArch64/sme-remarks.c b/clang/test/CodeGen/AArch64/sme-remarks.c new file mode 100644 index 0000000000000..fd144b8a6c425 --- /dev/null +++ b/clang/test/CodeGen/AArch64/sme-remarks.c @@ -0,0 +1,41 @@ +// REQUIRES: aarch64-registered-target + +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -Rpass-analysis=sme -verify %s -S -o /dev/null +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -mllvm -aarch64-new-sme-abi -Rpass-analysis=sme -verify=expected-new %s -S -o /dev/null %s + +void private_za_callee_a(); +void private_za_callee_b(); +void private_za_callee_c(); + +void test_za_merge_paths(int a) __arm_inout("za") { + // expected-new-remark@+1 {{lazy save of ZA emitted in 'test_za_merge_paths'}} + if (a != 0) + // expected-remark@+2 {{call from 'test_za_merge_paths' to 'unknown callee' sets up a lazy save for ZA}} + // expected-new-remark@+1 {{call to 'private_za_callee_a' requires ZA save}} + private_za_callee_a(); + else + // expected-remark@+2 {{call from 'test_za_merge_paths' to 'unknown callee' sets up a lazy save for ZA}} + // expected-new-remark@+1 {{call to 'private_za_callee_b' requires ZA save}} + private_za_callee_b(); + // expected-remark@+3 {{call from 'test_za_merge_paths' to 'unknown callee' sets up a lazy save for ZA}} + /// The new lowering won't report this call as the save is already needed due + /// to the call to `private_za_callee_a/b()` calls on both paths to this call. + private_za_callee_c(); +} + +void test_lazy_save_multiple_paths(int a) __arm_inout("za") { + // expected-new-remark@+1 {{lazy save of ZA emitted in 'test_lazy_save_multiple_paths'}} + if (a != 0) + // expected-remark@+2 {{call from 'test_lazy_save_multiple_paths' to 'unknown callee' sets up a lazy save for ZA}} + // expected-new-remark@+1 {{call to 'private_za_callee_a' requires ZA save}} + private_za_callee_a(); + else { + // expected-remark@+2 {{call from 'test_lazy_save_multiple_paths' to 'unknown callee' sets up a lazy save for ZA}} + // expected-new-remark@+1 {{call to 'private_za_callee_b' requires ZA save}} + private_za_callee_b(); + // expected-remark@+3 {{call from 'test_lazy_save_multiple_paths' to 'unknown callee' sets up a lazy save for ZA}} + /// The new lowering won't report this call as the save is already needed + /// due to the call to `private_za_callee_b()`. + private_za_callee_c(); + } +} diff --git a/llvm/lib/Target/AArch64/MachineSMEABIPass.cpp b/llvm/lib/Target/AArch64/MachineSMEABIPass.cpp index b3e1ddbb91f79..823c754a0ac05 100644 --- a/llvm/lib/Target/AArch64/MachineSMEABIPass.cpp +++ b/llvm/lib/Target/AArch64/MachineSMEABIPass.cpp @@ -63,6 +63,7 @@ #include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetRegisterInfo.h" @@ -303,6 +304,7 @@ struct MachineSMEABI : public MachineFunctionPass { void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); AU.addRequired<EdgeBundlesWrapperLegacy>(); + AU.addRequired<MachineOptimizationRemarkEmitterPass>(); AU.addPreservedID(MachineLoopInfoID); AU.addPreservedID(MachineDominatorsID); MachineFunctionPass::getAnalysisUsage(AU); @@ -391,6 +393,19 @@ struct MachineSMEABI : public MachineFunctionPass { return emitAllocateLazySaveBuffer(Context, MBB, MBBI); } + /// Collects the reachable calls from \p MBBI marked with \p Marker. This is + /// intended to be used to emit lazy save remarks. Note: This stops at the + /// first marked call along any path. + void collectReachableMarkedCalls(const MachineBasicBlock &MBB, + MachineBasicBlock::const_iterator MBBI, + SmallVectorImpl<const MachineInstr *> &Calls, + unsigned Marker) const; + + void emitCallSaveRemarks(const MachineBasicBlock &MBB, + MachineBasicBlock::const_iterator MBBI, DebugLoc DL, + unsigned Marker, StringRef RemarkName, + StringRef SaveName) const; + /// Save live physical registers to virtual registers. PhysRegSave createPhysRegSave(LiveRegs PhysLiveRegs, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, DebugLoc DL); @@ -406,6 +421,8 @@ struct MachineSMEABI : public MachineFunctionPass { const AArch64RegisterInfo *TRI = nullptr; const AArch64FunctionInfo *AFI = nullptr; const TargetInstrInfo *TII = nullptr; + + MachineOptimizationRemarkEmitter *ORE = nullptr; MachineRegisterInfo *MRI = nullptr; MachineLoopInfo *MLI = nullptr; }; @@ -706,9 +723,95 @@ void MachineSMEABI::insertStateChanges(EmitContext &Context, static DebugLoc getDebugLoc(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) { - if (MBBI != MBB.end()) - return MBBI->getDebugLoc(); - return DebugLoc(); + if (MBB.empty()) + return DebugLoc(); + return MBBI != MBB.end() ? MBBI->getDebugLoc() : MBB.back().getDebugLoc(); +} + +/// Finds the first call (as determined by MachineInstr::isCall()) starting from +/// \p MBBI in \p MBB marked with \p Marker (which is a marker opcode such as +/// RequiresZASavePseudo). If a marked call is found, it is pushed to \p Calls +/// and the function returns true. +static bool findMarkedCall(const MachineBasicBlock &MBB, + MachineBasicBlock::const_iterator MBBI, + SmallVectorImpl<const MachineInstr *> &Calls, + unsigned Marker, unsigned CallDestroyOpcode) { + auto IsMarker = [&](auto &MI) { return MI.getOpcode() == Marker; }; + auto MarkerInst = std::find_if(MBBI, MBB.end(), IsMarker); + if (MarkerInst == MBB.end()) + return false; + MachineBasicBlock::const_iterator I = MarkerInst; + while (++I != MBB.end()) { + if (I->isCall() || I->getOpcode() == CallDestroyOpcode) + break; + } + if (I != MBB.end() && I->isCall()) + Calls.push_back(&*I); + // Note: This function always returns true if a "Marker" was found. + return true; +} + +void MachineSMEABI::collectReachableMarkedCalls( + const MachineBasicBlock &StartMBB, + MachineBasicBlock::const_iterator StartInst, + SmallVectorImpl<const MachineInstr *> &Calls, unsigned Marker) const { + assert(Marker == AArch64::InOutZAUsePseudo || + Marker == AArch64::RequiresZASavePseudo || + Marker == AArch64::RequiresZT0SavePseudo); + unsigned CallDestroyOpcode = TII->getCallFrameDestroyOpcode(); + if (findMarkedCall(StartMBB, StartInst, Calls, Marker, CallDestroyOpcode)) + return; + + SmallPtrSet<const MachineBasicBlock *, 4> Visited; + SmallVector<const MachineBasicBlock *> Worklist(StartMBB.succ_rbegin(), + StartMBB.succ_rend()); + while (!Worklist.empty()) { + const MachineBasicBlock *MBB = Worklist.pop_back_val(); + auto [_, Inserted] = Visited.insert(MBB); + if (!Inserted) + continue; + + if (!findMarkedCall(*MBB, MBB->begin(), Calls, Marker, CallDestroyOpcode)) + Worklist.append(MBB->succ_rbegin(), MBB->succ_rend()); + } +} + +static StringRef getCalleeName(const MachineInstr &CallInst) { + assert(CallInst.isCall() && "expected a call"); + for (const MachineOperand &MO : CallInst.operands()) { + if (MO.isSymbol()) + return MO.getSymbolName(); + if (MO.isGlobal()) + return MO.getGlobal()->getName(); + } + return {}; +} + +void MachineSMEABI::emitCallSaveRemarks(const MachineBasicBlock &MBB, + MachineBasicBlock::const_iterator MBBI, + DebugLoc DL, unsigned Marker, + StringRef RemarkName, + StringRef SaveName) const { + auto SaveRemark = [&](DebugLoc DL, const MachineBasicBlock &MBB) { + return MachineOptimizationRemarkAnalysis("sme", RemarkName, DL, &MBB); + }; + StringRef StateName = Marker == AArch64::RequiresZT0SavePseudo ? "ZT0" : "ZA"; + ORE->emit([&] { + return SaveRemark(DL, MBB) << SaveName << " of " << StateName + << " emitted in '" << MF->getName() << "'"; + }); + if (!ORE->allowExtraAnalysis("sme")) + return; + SmallVector<const MachineInstr *> CallsRequiringSaves; + collectReachableMarkedCalls(MBB, MBBI, CallsRequiringSaves, Marker); + for (const MachineInstr *CallInst : CallsRequiringSaves) { + auto R = SaveRemark(CallInst->getDebugLoc(), *CallInst->getParent()); + R << "call"; + if (StringRef CalleeName = getCalleeName(*CallInst); !CalleeName.empty()) + R << " to '" << CalleeName << "'"; + R << " requires " << StateName << " save"; + ORE->emit(R); + } } void MachineSMEABI::emitSetupLazySave(EmitContext &Context, @@ -716,6 +819,9 @@ void MachineSMEABI::emitSetupLazySave(EmitContext &Context, MachineBasicBlock::iterator MBBI) { DebugLoc DL = getDebugLoc(MBB, MBBI); + emitCallSaveRemarks(MBB, MBBI, DL, AArch64::RequiresZASavePseudo, + "SMELazySaveZA", "lazy save"); + // Get pointer to TPIDR2 block. Register TPIDR2 = MRI->createVirtualRegister(&AArch64::GPR64spRegClass); Register TPIDR2Ptr = MRI->createVirtualRegister(&AArch64::GPR64RegClass); @@ -926,12 +1032,17 @@ void MachineSMEABI::emitFullZASaveRestore(EmitContext &Context, MachineBasicBlock::iterator MBBI, LiveRegs PhysLiveRegs, bool IsSave) { auto *TLI = Subtarget->getTargetLowering(); + DebugLoc DL = getDebugLoc(MBB, MBBI); - Register BufferPtr = AArch64::X0; + + if (IsSave) + emitCallSaveRemarks(MBB, MBBI, DL, AArch64::RequiresZASavePseudo, + "SMEFullZASave", "full save"); PhysRegSave RegSave = createPhysRegSave(PhysLiveRegs, MBB, MBBI, DL); // Copy the buffer pointer into X0. + Register BufferPtr = AArch64::X0; BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::COPY), BufferPtr) .addReg(Context.getAgnosticZABufferPtr(*MF)); @@ -952,6 +1063,14 @@ void MachineSMEABI::emitZT0SaveRestore(EmitContext &Context, MachineBasicBlock::iterator MBBI, bool IsSave) { DebugLoc DL = getDebugLoc(MBB, MBBI); + + // Note: This will report calls that _only_ need ZT0 saved. Call that save + // both ZA and ZT0 will be under the SMELazySaveZA remark. This prevents + // reporting the same calls twice. + if (IsSave) + emitCallSaveRemarks(MBB, MBBI, DL, AArch64::RequiresZT0SavePseudo, + "SMEZT0Save", "spill"); + Register ZT0Save = MRI->createVirtualRegister(&AArch64::GPR64spRegClass); BuildMI(MBB, MBBI, DL, TII->get(AArch64::ADDXri), ZT0Save) @@ -1139,6 +1258,7 @@ bool MachineSMEABI::runOnMachineFunction(MachineFunction &MF) { this->MF = &MF; Subtarget = &MF.getSubtarget<AArch64Subtarget>(); + ORE = &getAnalysis<MachineOptimizationRemarkEmitterPass>().getORE(); TII = Subtarget->getInstrInfo(); TRI = Subtarget->getRegisterInfo(); MRI = &MF.getRegInfo(); diff --git a/llvm/test/CodeGen/AArch64/sme-abi-save-call-remarks.ll b/llvm/test/CodeGen/AArch64/sme-abi-save-call-remarks.ll new file mode 100644 index 0000000000000..755dcfbf17ba4 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sme-abi-save-call-remarks.ll @@ -0,0 +1,128 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64 -mattr=+sme2 --pass-remarks-analysis=sme -o /dev/null < %s 2>&1 | FileCheck %s +; RUN: llc -mtriple=aarch64 -mattr=+sme2 --aarch64-new-sme-abi --pass-remarks-analysis=sme -o /dev/null < %s 2>&1 | FileCheck %s --check-prefix=CHECK-NEWLOWERING + +declare void @private_za_callee() +declare void @private_za_callee_a() +declare void @private_za_callee_b() +declare void @private_za_callee_c() + +declare void @shared_za_callee() "aarch64_inout_za" +declare void @shared_za_zt0_callee() "aarch64_inout_za" "aarch64_inout_zt0" + +; Note: These remarks are more useful with source debug info (which gives line numbers for `<unknown>:0:0`). + +define void @test_lazy_save_1_callee() nounwind "aarch64_inout_za" { +; CHECK: remark: <unknown>:0:0: call from 'test_lazy_save_1_callee' to 'private_za_callee' sets up a lazy save for ZA + +; CHECK-NEWLOWERING: remark: <unknown>:0:0: lazy save of ZA emitted in 'test_lazy_save_1_callee' +; CHECK-NEWLOWERING-NEXT: remark: <unknown>:0:0: call to 'private_za_callee' requires ZA save + call void @private_za_callee() + ret void +} + +define void @test_lazy_save_2_callees() nounwind "aarch64_inout_za" { +; CHECK: remark: <unknown>:0:0: call from 'test_lazy_save_2_callees' to 'private_za_callee' sets up a lazy save for ZA +; CHECK: remark: <unknown>:0:0: call from 'test_lazy_save_2_callees' to 'private_za_callee' sets up a lazy save for ZA + +; CHECK-NEWLOWERING: remark: <unknown>:0:0: lazy save of ZA emitted in 'test_lazy_save_2_callees' +; CHECK-NEWLOWERING-NEXT: remark: <unknown>:0:0: call to 'private_za_callee' requires ZA save + call void @private_za_callee() + call void @private_za_callee() + ret void +} + +define float @test_lazy_save_expanded_intrinsic(float %a) nounwind "aarch64_inout_za" { +; CHECK: remark: <unknown>:0:0: call from 'test_lazy_save_expanded_intrinsic' to 'cosf' sets up a lazy save for ZA + +; CHECK-NEWLOWERING: remark: <unknown>:0:0: lazy save of ZA emitted in 'test_lazy_save_expanded_intrinsic' +; CHECK-NEWLOWERING-NEXT: remark: <unknown>:0:0: call to 'cosf' requires ZA save + %res = call float @llvm.cos.f32(float %a) + ret float %res +} + +define void @test_lazy_save_multiple_paths(i1 %a) "aarch64_inout_za" { +; CHECK: remark: <unknown>:0:0: call from 'test_lazy_save_multiple_paths' to 'private_za_callee_a' sets up a lazy save for ZA +; CHECK: remark: <unknown>:0:0: call from 'test_lazy_save_multiple_paths' to 'private_za_callee_b' sets up a lazy save for ZA +; CHECK: remark: <unknown>:0:0: call from 'test_lazy_save_multiple_paths' to 'private_za_callee_c' sets up a lazy save for ZA + +; CHECK-NEWLOWERING: remark: <unknown>:0:0: lazy save of ZA emitted in 'test_lazy_save_multiple_paths' +; CHECK-NEWLOWERING-NEXT: remark: <unknown>:0:0: call to 'private_za_callee_b' requires ZA save +; CHECK-NEWLOWERING-NEXT: remark: <unknown>:0:0: call to 'private_za_callee_a' requires ZA save +entry: + br i1 %a, label %if.end, label %if.else + +if.else: + call void @private_za_callee_a() + br label %if.end + +if.end: + call void @private_za_callee_b() + ; The new lowering won't report this call as the save is already needed due to + ; the call to `private_za_callee_b()`. + call void @private_za_callee_c() + + ret void +} + +define void @test_lazy_save_with_zt0() "aarch64_inout_za" "aarch64_inout_zt0" +{ +; CHECK: remark: <unknown>:0:0: call from 'test_lazy_save_with_zt0' to 'private_za_callee' sets up a lazy save for ZA + +; CHECK-NEWLOWERING: remark: <unknown>:0:0: spill of ZT0 emitted in 'test_lazy_save_with_zt0' +; CHECK-NEWLOWERING-NEXT: remark: <unknown>:0:0: call to 'shared_za_callee' requires ZT0 save +; CHECK-NEWLOWERING-NEXT: remark: <unknown>:0:0: lazy save of ZA emitted in 'test_lazy_save_with_zt0' +; CHECK-NEWLOWERING-NEXT: remark: <unknown>:0:0: call to 'private_za_callee' requires ZA save + call void @shared_za_callee() ; Save ZT0 (remark ZT0 spill) + call void @private_za_callee() ; Save ZA (remark ZA save) + ret void +} + +define void @test_lazy_save_with_zt0_reload() "aarch64_inout_za" "aarch64_inout_zt0" +{ +; CHECK: remark: <unknown>:0:0: call from 'test_lazy_save_with_zt0_reload' to 'private_za_callee' sets up a lazy save for ZA + +; CHECK-NEWLOWERING: remark: <unknown>:0:0: spill of ZT0 emitted in 'test_lazy_save_with_zt0_reload' +; CHECK-NEWLOWERING-NEXT: remark: <unknown>:0:0: call to 'shared_za_callee' requires ZT0 save +; CHECK-NEWLOWERING-NEXT: remark: <unknown>:0:0: spill of ZT0 emitted in 'test_lazy_save_with_zt0_reload' +; CHECK-NEWLOWERING-NEXT: remark: <unknown>:0:0: lazy save of ZA emitted in 'test_lazy_save_with_zt0_reload' +; CHECK-NEWLOWERING-NEXT: remark: <unknown>:0:0: call to 'private_za_callee' requires ZA save + call void @shared_za_callee() ; Save ZT0 (remark ZT0 spill) + call void @shared_za_zt0_callee() ; Reload ZT0 + call void @private_za_callee() ; Save ZA, ZT0 (remark ZT0 spill and ZA save) + ret void +} + +define void @test_za_merge_paths(i1 %a) "aarch64_za_state_agnostic" { +;; Note: The old lowering does not emit any remarks for agnostic ZA saves. + +; CHECK-NEWLOWERING: remark: <unknown>:0:0: full save of ZA emitted in 'test_za_merge_paths' +; CHECK-NEWLOWERING-NEXT: remark: <unknown>:0:0: call to 'private_za_callee_b' requires ZA save +; CHECK-NEWLOWERING-NEXT: remark: <unknown>:0:0: call to 'private_za_callee_a' requires ZA save +entry: + br i1 %a, label %if.end, label %if.else + +if.else: + call void @private_za_callee_a() + br label %exit + +if.end: + call void @private_za_callee_b() + br label %exit + +exit: + ; The new lowering won't report this call as the save is already needed due to + ; the call to `private_za_callee_*()` calls on both paths to this BB. + call void @private_za_callee_c() + + ret void +} + +define void @test_lazy_save_function_ptr_callee(ptr %private_za_callee) nounwind "aarch64_inout_za" { +; CHECK: remark: <unknown>:0:0: call from 'test_lazy_save_function_ptr_callee' to 'unknown callee' sets up a lazy save for ZA + +; CHECK-NEWLOWERING: remark: <unknown>:0:0: lazy save of ZA emitted in 'test_lazy_save_function_ptr_callee' +; CHECK-NEWLOWERING-NEXT: remark: <unknown>:0:0: call requires ZA save + call void %private_za_callee() + ret void +} diff --git a/llvm/test/CodeGen/AArch64/sme-lazy-save-call-remarks.ll b/llvm/test/CodeGen/AArch64/sme-lazy-save-call-remarks.ll deleted file mode 100644 index 65e50842d5d78..0000000000000 --- a/llvm/test/CodeGen/AArch64/sme-lazy-save-call-remarks.ll +++ /dev/null @@ -1,25 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=aarch64 -mattr=+sme --pass-remarks-analysis=sme -o /dev/null < %s 2>&1 | FileCheck %s - -declare void @private_za_callee() -declare float @llvm.cos.f32(float) - -define void @test_lazy_save_1_callee() nounwind "aarch64_inout_za" { -; CHECK: remark: <unknown>:0:0: call from 'test_lazy_save_1_callee' to 'private_za_callee' sets up a lazy save for ZA - call void @private_za_callee() - ret void -} - -define void @test_lazy_save_2_callees() nounwind "aarch64_inout_za" { -; CHECK: remark: <unknown>:0:0: call from 'test_lazy_save_2_callees' to 'private_za_callee' sets up a lazy save for ZA - call void @private_za_callee() -; CHECK: remark: <unknown>:0:0: call from 'test_lazy_save_2_callees' to 'private_za_callee' sets up a lazy save for ZA - call void @private_za_callee() - ret void -} - -define float @test_lazy_save_expanded_intrinsic(float %a) nounwind "aarch64_inout_za" { -; CHECK: remark: <unknown>:0:0: call from 'test_lazy_save_expanded_intrinsic' to 'cosf' sets up a lazy save for ZA - %res = call float @llvm.cos.f32(float %a) - ret float %res -} _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
