llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-backend-amdgpu Author: Matt Arsenault (arsenm) <details> <summary>Changes</summary> Read by sgemm/dgemm in srcc after v_mfma_f64_16x16x4_f64 increases from 9 to 17 wait states. --- Full diff: https://github.com/llvm/llvm-project/pull/117283.diff 2 Files Affected: - (modified) llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp (+5-1) - (modified) llvm/test/CodeGen/AMDGPU/mai-hazards-gfx940.mir (+33-12) ``````````diff diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp index be0936ce74835f..4a4c9788b3d881 100644 --- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp +++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp @@ -2302,6 +2302,7 @@ int GCNHazardRecognizer::checkMAIHazards90A(MachineInstr *MI) { const int SMFMA16x16WritesVGPROverlappedDMFMASrcCWaitStates = 9; const int SMFMA32x32WritesVGPROverlappedDMFMASrcCWaitStates = 17; const int DMFMA16x16WritesVGPROverlappedSrcCWaitStates = 9; + const int GFX950_DMFMA16x16WritesVGPROverlappedSrcCWaitStates = 17; const int DMFMA4x4WritesVGPROverlappedSrcCWaitStates = 4; const int SMFMA4x4WritesVGPROverlappedSrcABWaitStates = 5; const int SMFMA16x16WritesVGPROverlappedSrcABWaitStates = 11; @@ -2359,7 +2360,10 @@ int GCNHazardRecognizer::checkMAIHazards90A(MachineInstr *MI) { case AMDGPU::V_MFMA_F64_16X16X4F64_mac_e64: case AMDGPU::V_MFMA_F64_16X16X4F64_mac_vgprcd_e64: if (!isXDL(ST, *MI)) - NeedWaitStates = DMFMA16x16WritesVGPROverlappedSrcCWaitStates; + NeedWaitStates = + ST.hasGFX950Insts() + ? GFX950_DMFMA16x16WritesVGPROverlappedSrcCWaitStates + : DMFMA16x16WritesVGPROverlappedSrcCWaitStates; break; case AMDGPU::V_MFMA_F64_4X4X4F64_e64: case AMDGPU::V_MFMA_F64_4X4X4F64_vgprcd_e64: diff --git a/llvm/test/CodeGen/AMDGPU/mai-hazards-gfx940.mir b/llvm/test/CodeGen/AMDGPU/mai-hazards-gfx940.mir index b9135dbd46fc1f..1499fd4907a181 100644 --- a/llvm/test/CodeGen/AMDGPU/mai-hazards-gfx940.mir +++ b/llvm/test/CodeGen/AMDGPU/mai-hazards-gfx940.mir @@ -298,8 +298,12 @@ body: | ... # GCN-LABEL: name: dgemm16x16_mfma_write_vgpr_mfma_read_overlap # GCN: V_MFMA -# GCN-NEXT: S_NOP 7 -# GCN-NEXT: S_NOP 0 +# GFX940-NEXT: S_NOP 7 +# GFX940-NEXT: S_NOP 0 + +# GFX950-NEXT: S_NOP 7 +# GFX950-NEXT: S_NOP 7 +# GFX950-NEXT: S_NOP 0 # GCN-NEXT: V_MFMA name: dgemm16x16_mfma_write_vgpr_mfma_read_overlap body: | @@ -319,8 +323,12 @@ body: | ... # GCN-LABEL: name: dgemm16x16_mfma_write_vgpr_sgemm_mfma_read_overlap # GCN: V_MFMA -# GCN-NEXT: S_NOP 7 -# GCN-NEXT: S_NOP 0 +# GFX940-NEXT: S_NOP 7 +# GFX940-NEXT: S_NOP 0 + +# GFX950-NEXT: S_NOP 7 +# GFX950-NEXT: S_NOP 7 +# GFX950-NEXT: S_NOP 0 # GCN-NEXT: V_MFMA name: dgemm16x16_mfma_write_vgpr_sgemm_mfma_read_overlap body: | @@ -549,8 +557,12 @@ body: | ... # GCN-LABEL: name: dgemm16x16_mfma_write_vgpr_sgemm_mfma_srca_read_overlap # GCN: V_MFMA -# GCN-NEXT: S_NOP 7 -# GCN-NEXT: S_NOP 2 +# GFX940-NEXT: S_NOP 7 +# GFX940-NEXT: S_NOP 2 + +# GFX950-NEXT: S_NOP 7 +# GFX950-NEXT: S_NOP 7 +# GFX950-NEXT: S_NOP 0 # GCN-NEXT: V_MFMA name: dgemm16x16_mfma_write_vgpr_sgemm_mfma_srca_read_overlap body: | @@ -1333,8 +1345,12 @@ body: | ... # GCN-LABEL: name: dgemm16x16_mfma_write_agpr_mfma_read_overlap # GCN: V_MFMA -# GCN-NEXT: S_NOP 7 -# GCN-NEXT: S_NOP 0 +# GFX940-NEXT: S_NOP 7 +# GFX940-NEXT: S_NOP 0 + +# GFX950-NEXT: S_NOP 7 +# GFX950-NEXT: S_NOP 7 +# GFX950-NEXT: S_NOP 0 # GCN-NEXT: V_MFMA name: dgemm16x16_mfma_write_agpr_mfma_read_overlap body: | @@ -1354,8 +1370,13 @@ body: | ... # GCN-LABEL: name: dgemm16x16_mfma_write_agpr_sgemm_mfma_read_overlap # GCN: V_MFMA -# GCN-NEXT: S_NOP 7 -# GCN-NEXT: S_NOP 0 +# GFX940-NEXT: S_NOP 7 +# GFX940-NEXT: S_NOP 0 + +# GFX950-NEXT: S_NOP 7 +# GFX950-NEXT: S_NOP 7 +# GFX950-NEXT: S_NOP 0 + # GCN-NEXT: V_MFMA name: dgemm16x16_mfma_write_agpr_sgemm_mfma_read_overlap body: | @@ -2502,8 +2523,8 @@ body: | ... # GCN-LABEL: name: xdl_4pass_mfma_write_agpr_smfmac_read_overlap_srcc # GCN: V_MFMA -# GFX940: S_NOP 4 -# GFX950: S_NOP 5 +# GFX940-NEXT: S_NOP 4 +# GFX950-NEXT: S_NOP 5 # GCN-NEXT: V_SMFMAC_ name: xdl_4pass_mfma_write_agpr_smfmac_read_overlap_srcc body: | `````````` </details> https://github.com/llvm/llvm-project/pull/117283 _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits