https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/117284
>From 1a4e01fad64f26e3213fda69d14ae6ae606c625b Mon Sep 17 00:00:00 2001 From: Matt Arsenault <matthew.arsena...@amd.com> Date: Thu, 7 Mar 2024 15:18:59 +0530 Subject: [PATCH] AMDGPU: Handle v_mfma_f64_16x16x4_f64 write VGPR read srca/srcb hazard change for gfx950 Increase in wait states from 11 to 19. The index for smfmac counts as like srcA/srcB. --- .../lib/Target/AMDGPU/GCNHazardRecognizer.cpp | 6 +- .../CodeGen/AMDGPU/mai-hazards-gfx940.mir | 60 ++++++++++++++----- 2 files changed, 50 insertions(+), 16 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp index 4a4c9788b3d881..218f487f7e12ce 100644 --- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp +++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp @@ -2309,6 +2309,7 @@ int GCNHazardRecognizer::checkMAIHazards90A(MachineInstr *MI) { const int SMFMA32x32WritesVGPROverlappedSrcABWaitStates = 19; const int DMFMA4x4WritesVGPROverlappedMFMASrcABWaitStates = 6; const int DMFMA16x16WritesVGPROverlappedMFMASrcABWaitStates = 11; + const int GFX950_DMFMA16x16WritesVGPROverlappedMFMASrcABWaitStates = 19; const int DMFMA4x4WritesVGPRFullSrcCWaitStates = 4; const int GFX940_SMFMA4x4WritesVGPRFullSrcCWaitStates = 2; const int MaxWaitStates = 19; @@ -2414,7 +2415,10 @@ int GCNHazardRecognizer::checkMAIHazards90A(MachineInstr *MI) { case AMDGPU::V_MFMA_F64_16X16X4F64_vgprcd_e64: case AMDGPU::V_MFMA_F64_16X16X4F64_mac_e64: case AMDGPU::V_MFMA_F64_16X16X4F64_mac_vgprcd_e64: - NeedWaitStates = DMFMA16x16WritesVGPROverlappedMFMASrcABWaitStates; + NeedWaitStates = + ST.hasGFX950Insts() + ? GFX950_DMFMA16x16WritesVGPROverlappedMFMASrcABWaitStates + : DMFMA16x16WritesVGPROverlappedMFMASrcABWaitStates; break; case AMDGPU::V_MFMA_F64_4X4X4F64_e64: case AMDGPU::V_MFMA_F64_4X4X4F64_vgprcd_e64: diff --git a/llvm/test/CodeGen/AMDGPU/mai-hazards-gfx940.mir b/llvm/test/CodeGen/AMDGPU/mai-hazards-gfx940.mir index 1499fd4907a181..2ba873f55a1eb0 100644 --- a/llvm/test/CodeGen/AMDGPU/mai-hazards-gfx940.mir +++ b/llvm/test/CodeGen/AMDGPU/mai-hazards-gfx940.mir @@ -536,8 +536,12 @@ body: | ... # GCN-LABEL: name: dgemm16x16_mfma_write_vgpr_mfma_srca_read_overlap # GCN: V_MFMA -# GCN-NEXT: S_NOP 7 -# GCN-NEXT: S_NOP 2 +# GFX940-NEXT: S_NOP 7 +# GFX940-NEXT: S_NOP 2 + +# GFX950-NEXT: S_NOP 7 +# GFX950-NEXT: S_NOP 7 +# GFX950-NEXT: S_NOP 2 # GCN-NEXT: V_MFMA name: dgemm16x16_mfma_write_vgpr_mfma_srca_read_overlap body: | @@ -562,7 +566,7 @@ body: | # GFX950-NEXT: S_NOP 7 # GFX950-NEXT: S_NOP 7 -# GFX950-NEXT: S_NOP 0 +# GFX950-NEXT: S_NOP 2 # GCN-NEXT: V_MFMA name: dgemm16x16_mfma_write_vgpr_sgemm_mfma_srca_read_overlap body: | @@ -632,8 +636,12 @@ body: | ... # GCN-LABEL: name: dgemm16x16_mfma_write_vgpr_mfma_srcb_read_overlap # GCN: V_MFMA -# GCN-NEXT: S_NOP 7 -# GCN-NEXT: S_NOP 2 +# GFX940-NEXT: S_NOP 7 +# GFX940-NEXT: S_NOP 2 + +# GFX950-NEXT: S_NOP 7 +# GFX950-NEXT: S_NOP 7 +# GFX950-NEXT: S_NOP 2 # GCN-NEXT: V_MFMA name: dgemm16x16_mfma_write_vgpr_mfma_srcb_read_overlap body: | @@ -643,8 +651,12 @@ body: | ... # GCN-LABEL: name: dgemm16x16_mfma_write_vgpr_smfmac_srcb_read_overlap # GCN: V_MFMA -# GCN-NEXT: S_NOP 7 -# GCN-NEXT: S_NOP 2 +# GFX940-NEXT: S_NOP 7 +# GFX940-NEXT: S_NOP 2 + +# GFX950-NEXT: S_NOP 7 +# GFX950-NEXT: S_NOP 7 +# GFX950-NEXT: S_NOP 2 # GCN-NEXT: V_SMFMAC name: dgemm16x16_mfma_write_vgpr_smfmac_srcb_read_overlap body: | @@ -654,8 +666,13 @@ body: | ... # GCN-LABEL: name: dgemm16x16_mfma_write_vgpr_smfmac_srcc_read_overlap # GCN: V_MFMA -# GCN-NEXT: S_NOP 7 -# GCN-NEXT: S_NOP 2 +# GFX940-NEXT: S_NOP 7 +# GFX940-NEXT: S_NOP 2 + +# GFX950-NEXT: S_NOP 7 +# GFX950-NEXT: S_NOP 7 +# GFX950-NEXT: S_NOP 2 + # GCN-NEXT: V_SMFMAC name: dgemm16x16_mfma_write_vgpr_smfmac_srcc_read_overlap body: | @@ -1452,8 +1469,12 @@ body: | ... # GCN-LABEL: name: dgemm16x16_mfma_write_agpr_mfma_srca_read_overlap # GCN: V_MFMA -# GCN-NEXT: S_NOP 7 -# GCN-NEXT: S_NOP 2 +# GFX940-NEXT: S_NOP 7 +# GFX940-NEXT: S_NOP 2 + +# GFX950-NEXT: S_NOP 7 +# GFX950-NEXT: S_NOP 7 +# GFX950-NEXT: S_NOP 2 # GCN-NEXT: V_MFMA name: dgemm16x16_mfma_write_agpr_mfma_srca_read_overlap body: | @@ -1473,8 +1494,13 @@ body: | ... # GCN-LABEL: name: dgemm16x16_mfma_write_agpr_sgemm_mfma_srca_read_overlap # GCN: V_MFMA -# GCN-NEXT: S_NOP 7 -# GCN-NEXT: S_NOP 2 +# GFX940-NEXT: S_NOP 7 +# GFX940-NEXT: S_NOP 2 + +# GFX950-NEXT: S_NOP 7 +# GFX950-NEXT: S_NOP 7 +# GFX950-NEXT: S_NOP 2 + # GCN-NEXT: V_MFMA name: dgemm16x16_mfma_write_agpr_sgemm_mfma_srca_read_overlap body: | @@ -1504,8 +1530,12 @@ body: | ... # GCN-LABEL: name: dgemm16x16_mfma_write_agpr_mfma_srcb_read_overlap # GCN: V_MFMA -# GCN-NEXT: S_NOP 7 -# GCN-NEXT: S_NOP 2 +# GFX940-NEXT: S_NOP 7 +# GFX940-NEXT: S_NOP 2 + +# GFX950-NEXT: S_NOP 7 +# GFX950-NEXT: S_NOP 7 +# GFX950-NEXT: S_NOP 2 # GCN-NEXT: V_MFMA name: dgemm16x16_mfma_write_agpr_mfma_srcb_read_overlap body: | _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits