llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-backend-amdgpu Author: Matt Arsenault (arsenm) <details> <summary>Changes</summary> The 2-pass XDL write VGPR, read by non-XDL SGEMM/DGEMM case was 1 wait state overly conservative. Previously, for gfx940, the XDL/non-XDL cases happened to have the same number of cycles in all cases. Now the XDL consumer case has an additional state for 2 pass sources. --- Full diff: https://github.com/llvm/llvm-project/pull/117285.diff 2 Files Affected: - (modified) llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp (+18-4) - (modified) llvm/test/CodeGen/AMDGPU/mai-hazards-gfx940.mir (+5-10) ``````````diff diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp index 218f487f7e12ce..8008b5f7bcc991 100644 --- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp +++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp @@ -2232,8 +2232,8 @@ int GCNHazardRecognizer::checkMAIHazards908(MachineInstr *MI) { } static int -GFX940_XDL_N_PassWritesVGPROverlappedSMFMASrcCWaitStates(int NumPasses, - bool IsGFX950) { +GFX940_XDL_N_PassWritesVGPROverlappedXDLOrSMFMASrcCWaitStates(int NumPasses, + bool IsGFX950) { // xdl def cycles | gfx940 | gfx950 // 2 pass | 3 4 // 4 pass | 5 6 @@ -2242,6 +2242,17 @@ GFX940_XDL_N_PassWritesVGPROverlappedSMFMASrcCWaitStates(int NumPasses, return NumPasses + 1 + IsGFX950; } +static int +GFX940_XDL_N_PassWritesVGPROverlappedSGEMMDGEMMSrcCWaitStates(int NumPasses, + bool IsGFX950) { + // xdl def cycles | gfx940 | gfx950 + // 2 pass | 3 3 + // 4 pass | 5 6 + // 8 pass | 9 10 + // 16 pass | 17 18 + return NumPasses + 1 + (NumPasses != 2 && IsGFX950); +} + static int GFX940_SMFMA_N_PassWritesVGPROverlappedSMFMASrcCWaitStates(int NumPasses) { // 2 pass -> 2 @@ -2379,8 +2390,11 @@ int GCNHazardRecognizer::checkMAIHazards90A(MachineInstr *MI) { NeedWaitStates = isXDL(ST, *MI1) - ? GFX940_XDL_N_PassWritesVGPROverlappedSMFMASrcCWaitStates( - NumPasses, ST.hasGFX950Insts()) + ? (isXDL(ST, *MI) + ? GFX940_XDL_N_PassWritesVGPROverlappedXDLOrSMFMASrcCWaitStates( + NumPasses, ST.hasGFX950Insts()) + : GFX940_XDL_N_PassWritesVGPROverlappedSGEMMDGEMMSrcCWaitStates( + NumPasses, ST.hasGFX950Insts())) : GFX940_SMFMA_N_PassWritesVGPROverlappedSMFMASrcCWaitStates( NumPasses); break; diff --git a/llvm/test/CodeGen/AMDGPU/mai-hazards-gfx940.mir b/llvm/test/CodeGen/AMDGPU/mai-hazards-gfx940.mir index 2ba873f55a1eb0..d59bcfb16eece2 100644 --- a/llvm/test/CodeGen/AMDGPU/mai-hazards-gfx940.mir +++ b/llvm/test/CodeGen/AMDGPU/mai-hazards-gfx940.mir @@ -156,8 +156,7 @@ body: | ... # GCN-LABEL: name: sgemm4x4_mfma_write_vgpr_mfma_read_overlap # GCN: V_MFMA -# GFX940-NEXT: S_NOP 2 -# GFX950-NEXT: S_NOP 3 +# GCN-NEXT: S_NOP 2 # GCN-NEXT: V_MFMA name: sgemm4x4_mfma_write_vgpr_mfma_read_overlap body: | @@ -348,8 +347,7 @@ body: | ... # GCN-LABEL: name: sgemm4x4_mfma_write_vgpr_dgemm_mfma_read_overlap # GCN: V_MFMA -# GFX940-NEXT: S_NOP 2 -# GFX950-NEXT: S_NOP 3 +# GCN-NEXT: S_NOP 2 # GCN-NEXT: V_MFMA name: sgemm4x4_mfma_write_vgpr_dgemm_mfma_read_overlap body: | @@ -1403,8 +1401,7 @@ body: | ... # GCN-LABEL: name: sgemm4x4_mfma_write_agpr_dgemm_mfma_read_overlap # GCN: V_MFMA -# GFX940-NEXT: S_NOP 2 -# GFX950-NEXT: S_NOP 3 +# GCN-NEXT: S_NOP 2 # GCN-NEXT: V_MFMA name: sgemm4x4_mfma_write_agpr_dgemm_mfma_read_overlap body: | @@ -1885,8 +1882,7 @@ body: | ... # GCN-LABEL: name: xdl_sgemm4x4_mfma_write_agpr_mfma_read_overlap # GCN: V_MFMA -# GFX940-NEXT: S_NOP 2 -# GFX950-NEXT: S_NOP 3 +# GCN-NEXT: S_NOP 2 # GCN-NEXT: V_MFMA name: xdl_sgemm4x4_mfma_write_agpr_mfma_read_overlap body: | @@ -2220,8 +2216,7 @@ body: | # 2 pass source # GCN-LABEL: name: xdl_mfma_2pass_write_vgpr_sgemm_mfma_read_overlap_srcc # GCN: V_MFMA -# GFX940-NEXT: S_NOP 2 -# GFX950-NEXT: S_NOP 3 +# GCN-NEXT: S_NOP 2 # GCN-NEXT: V_MFMA name: xdl_mfma_2pass_write_vgpr_sgemm_mfma_read_overlap_srcc body: | `````````` </details> https://github.com/llvm/llvm-project/pull/117285 _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits