https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/117285
>From 608d98d7fc7c703d3bf7df5246393e1624d8662c Mon Sep 17 00:00:00 2001 From: Matt Arsenault <matthew.arsena...@amd.com> Date: Tue, 12 Mar 2024 13:29:05 +0530 Subject: [PATCH] AMDGPU: Refine gfx950 xdl-write-vgpr hazard cases The 2-pass XDL write VGPR, read by non-XDL SGEMM/DGEMM case was 1 wait state overly conservative. Previously, for gfx940, the XDL/non-XDL cases happened to have the same number of cycles in all cases. Now the XDL consumer case has an additional state for 2 pass sources. --- .../lib/Target/AMDGPU/GCNHazardRecognizer.cpp | 22 +++++++++++++++---- .../CodeGen/AMDGPU/mai-hazards-gfx940.mir | 15 +++++-------- 2 files changed, 23 insertions(+), 14 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp index 218f487f7e12ce..8008b5f7bcc991 100644 --- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp +++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp @@ -2232,8 +2232,8 @@ int GCNHazardRecognizer::checkMAIHazards908(MachineInstr *MI) { } static int -GFX940_XDL_N_PassWritesVGPROverlappedSMFMASrcCWaitStates(int NumPasses, - bool IsGFX950) { +GFX940_XDL_N_PassWritesVGPROverlappedXDLOrSMFMASrcCWaitStates(int NumPasses, + bool IsGFX950) { // xdl def cycles | gfx940 | gfx950 // 2 pass | 3 4 // 4 pass | 5 6 @@ -2242,6 +2242,17 @@ GFX940_XDL_N_PassWritesVGPROverlappedSMFMASrcCWaitStates(int NumPasses, return NumPasses + 1 + IsGFX950; } +static int +GFX940_XDL_N_PassWritesVGPROverlappedSGEMMDGEMMSrcCWaitStates(int NumPasses, + bool IsGFX950) { + // xdl def cycles | gfx940 | gfx950 + // 2 pass | 3 3 + // 4 pass | 5 6 + // 8 pass | 9 10 + // 16 pass | 17 18 + return NumPasses + 1 + (NumPasses != 2 && IsGFX950); +} + static int GFX940_SMFMA_N_PassWritesVGPROverlappedSMFMASrcCWaitStates(int NumPasses) { // 2 pass -> 2 @@ -2379,8 +2390,11 @@ int GCNHazardRecognizer::checkMAIHazards90A(MachineInstr *MI) { NeedWaitStates = isXDL(ST, *MI1) - ? GFX940_XDL_N_PassWritesVGPROverlappedSMFMASrcCWaitStates( - NumPasses, ST.hasGFX950Insts()) + ? (isXDL(ST, *MI) + ? GFX940_XDL_N_PassWritesVGPROverlappedXDLOrSMFMASrcCWaitStates( + NumPasses, ST.hasGFX950Insts()) + : GFX940_XDL_N_PassWritesVGPROverlappedSGEMMDGEMMSrcCWaitStates( + NumPasses, ST.hasGFX950Insts())) : GFX940_SMFMA_N_PassWritesVGPROverlappedSMFMASrcCWaitStates( NumPasses); break; diff --git a/llvm/test/CodeGen/AMDGPU/mai-hazards-gfx940.mir b/llvm/test/CodeGen/AMDGPU/mai-hazards-gfx940.mir index 2ba873f55a1eb0..d59bcfb16eece2 100644 --- a/llvm/test/CodeGen/AMDGPU/mai-hazards-gfx940.mir +++ b/llvm/test/CodeGen/AMDGPU/mai-hazards-gfx940.mir @@ -156,8 +156,7 @@ body: | ... # GCN-LABEL: name: sgemm4x4_mfma_write_vgpr_mfma_read_overlap # GCN: V_MFMA -# GFX940-NEXT: S_NOP 2 -# GFX950-NEXT: S_NOP 3 +# GCN-NEXT: S_NOP 2 # GCN-NEXT: V_MFMA name: sgemm4x4_mfma_write_vgpr_mfma_read_overlap body: | @@ -348,8 +347,7 @@ body: | ... # GCN-LABEL: name: sgemm4x4_mfma_write_vgpr_dgemm_mfma_read_overlap # GCN: V_MFMA -# GFX940-NEXT: S_NOP 2 -# GFX950-NEXT: S_NOP 3 +# GCN-NEXT: S_NOP 2 # GCN-NEXT: V_MFMA name: sgemm4x4_mfma_write_vgpr_dgemm_mfma_read_overlap body: | @@ -1403,8 +1401,7 @@ body: | ... # GCN-LABEL: name: sgemm4x4_mfma_write_agpr_dgemm_mfma_read_overlap # GCN: V_MFMA -# GFX940-NEXT: S_NOP 2 -# GFX950-NEXT: S_NOP 3 +# GCN-NEXT: S_NOP 2 # GCN-NEXT: V_MFMA name: sgemm4x4_mfma_write_agpr_dgemm_mfma_read_overlap body: | @@ -1885,8 +1882,7 @@ body: | ... # GCN-LABEL: name: xdl_sgemm4x4_mfma_write_agpr_mfma_read_overlap # GCN: V_MFMA -# GFX940-NEXT: S_NOP 2 -# GFX950-NEXT: S_NOP 3 +# GCN-NEXT: S_NOP 2 # GCN-NEXT: V_MFMA name: xdl_sgemm4x4_mfma_write_agpr_mfma_read_overlap body: | @@ -2220,8 +2216,7 @@ body: | # 2 pass source # GCN-LABEL: name: xdl_mfma_2pass_write_vgpr_sgemm_mfma_read_overlap_srcc # GCN: V_MFMA -# GFX940-NEXT: S_NOP 2 -# GFX950-NEXT: S_NOP 3 +# GCN-NEXT: S_NOP 2 # GCN-NEXT: V_MFMA name: xdl_mfma_2pass_write_vgpr_sgemm_mfma_read_overlap_srcc body: | _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits