llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-backend-amdgpu Author: Jay Foad (jayfoad) <details> <summary>Changes</summary> When a loop contains a VMEM load whose result is only used outside the loop, do not bother to flush vmcnt in the loop head on GFX12. A wait for vmcnt will be required inside the loop anyway, because VMEM instructions can write their VGPR results out of order. --- Full diff: https://github.com/llvm/llvm-project/pull/105550.diff 2 Files Affected: - (modified) llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp (+1-1) - (modified) llvm/test/CodeGen/AMDGPU/waitcnt-vmcnt-loop.mir (+5-5) ``````````diff diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp index 4262e7b5d9c25..eafe20be17d5b 100644 --- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp +++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp @@ -2390,7 +2390,7 @@ bool SIInsertWaitcnts::shouldFlushVmCnt(MachineLoop *ML, } if (!ST->hasVscnt() && HasVMemStore && !HasVMemLoad && UsesVgprLoadedOutside) return true; - return HasVMemLoad && UsesVgprLoadedOutside; + return HasVMemLoad && UsesVgprLoadedOutside && ST->hasVmemWriteVgprInOrder(); } bool SIInsertWaitcnts::runOnMachineFunction(MachineFunction &MF) { diff --git a/llvm/test/CodeGen/AMDGPU/waitcnt-vmcnt-loop.mir b/llvm/test/CodeGen/AMDGPU/waitcnt-vmcnt-loop.mir index bdef55ab956a0..0ddd2aa285b26 100644 --- a/llvm/test/CodeGen/AMDGPU/waitcnt-vmcnt-loop.mir +++ b/llvm/test/CodeGen/AMDGPU/waitcnt-vmcnt-loop.mir @@ -295,7 +295,7 @@ body: | # GFX12-LABEL: waitcnt_vm_loop2 # GFX12-LABEL: bb.0: # GFX12: BUFFER_LOAD_FORMAT_X_IDXEN -# GFX12: S_WAIT_LOADCNT 0 +# GFX12-NOT: S_WAIT_LOADCNT 0 # GFX12-LABEL: bb.1: # GFX12: S_WAIT_LOADCNT 0 # GFX12-LABEL: bb.2: @@ -342,7 +342,7 @@ body: | # GFX12-LABEL: waitcnt_vm_loop2_store # GFX12-LABEL: bb.0: # GFX12: BUFFER_LOAD_FORMAT_X_IDXEN -# GFX12: S_WAIT_LOADCNT 0 +# GFX12-NOT: S_WAIT_LOADCNT 0 # GFX12-LABEL: bb.1: # GFX12: S_WAIT_LOADCNT 0 # GFX12-LABEL: bb.2: @@ -499,9 +499,9 @@ body: | # GFX12-LABEL: waitcnt_vm_loop2_reginterval # GFX12-LABEL: bb.0: # GFX12: GLOBAL_LOAD_DWORDX4 -# GFX12: S_WAIT_LOADCNT 0 -# GFX12-LABEL: bb.1: # GFX12-NOT: S_WAIT_LOADCNT 0 +# GFX12-LABEL: bb.1: +# GFX12: S_WAIT_LOADCNT 0 # GFX12-LABEL: bb.2: name: waitcnt_vm_loop2_reginterval body: | @@ -600,7 +600,7 @@ body: | # GFX12-LABEL: bb.0: # GFX12: BUFFER_LOAD_FORMAT_X_IDXEN # GFX12: BUFFER_LOAD_FORMAT_X_IDXEN -# GFX12: S_WAIT_LOADCNT 0 +# GFX12-NOT: S_WAIT_LOADCNT 0 # GFX12-LABEL: bb.1: # GFX12: S_WAIT_LOADCNT 0 # GFX12-LABEL: bb.2: `````````` </details> https://github.com/llvm/llvm-project/pull/105550 _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits