https://github.com/arsenm created https://github.com/llvm/llvm-project/pull/129059
This was trying to hack around the intermediate VGPR requirement to copy to AGPRs on gfx908. We should still use a copy for all reg-to-reg cases. This should matter less these days, as we reserve a VGPR to handle it when required (and no end to end tests need updating). This was also an obstacle to handling this fold for input registers which are larger than 32-bits. >From 12e4b8a7d60c043ff5fd442bdf629288d720d271 Mon Sep 17 00:00:00 2001 From: Matt Arsenault <matthew.arsena...@amd.com> Date: Thu, 27 Feb 2025 20:40:52 +0700 Subject: [PATCH] AMDGPU: Stop introducing v_accvgpr_write_b32 for reg-to-reg copy This was trying to hack around the intermediate VGPR requirement to copy to AGPRs on gfx908. We should still use a copy for all reg-to-reg cases. This should matter less these days, as we reserve a VGPR to handle it when required (and no end to end tests need updating). This was also an obstacle to handling this fold for input registers which are larger than 32-bits. --- llvm/lib/Target/AMDGPU/SIFoldOperands.cpp | 5 ++--- ...si-fold-operands-agpr-copy-reg-sequence.mir | 18 +++++++++--------- 2 files changed, 11 insertions(+), 12 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp index 6cb6863068b5f..eb9aabf8b6317 100644 --- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -1573,9 +1573,8 @@ bool SIFoldOperandsImpl::foldCopyToAGPRRegSequence(MachineInstr *CopyMI) const { Vgpr = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass); BuildMI(MBB, CopyMI, DL, TII->get(AMDGPU::COPY), Vgpr).add(*Def); } - auto Tmp = MRI->createVirtualRegister(&AMDGPU::AGPR_32RegClass); - BuildMI(MBB, CopyMI, DL, TII->get(AMDGPU::V_ACCVGPR_WRITE_B32_e64), Tmp) - .addReg(Vgpr); + Register Tmp = MRI->createVirtualRegister(&AMDGPU::AGPR_32RegClass); + BuildMI(MBB, CopyMI, DL, TII->get(AMDGPU::COPY), Tmp).addReg(Vgpr); B.addReg(Tmp); } diff --git a/llvm/test/CodeGen/AMDGPU/si-fold-operands-agpr-copy-reg-sequence.mir b/llvm/test/CodeGen/AMDGPU/si-fold-operands-agpr-copy-reg-sequence.mir index 95112826b7112..493138c933686 100644 --- a/llvm/test/CodeGen/AMDGPU/si-fold-operands-agpr-copy-reg-sequence.mir +++ b/llvm/test/CodeGen/AMDGPU/si-fold-operands-agpr-copy-reg-sequence.mir @@ -206,11 +206,11 @@ body: | ; CHECK-LABEL: name: s_mov_b32_999_splat_sgpr_128_copy_vgpr_copy_agpr ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 999 ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; CHECK-NEXT: [[V_ACCVGPR_WRITE_B32_e64_:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 [[COPY]], implicit $exec - ; CHECK-NEXT: [[V_ACCVGPR_WRITE_B32_e64_1:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 [[COPY]], implicit $exec - ; CHECK-NEXT: [[V_ACCVGPR_WRITE_B32_e64_2:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 [[COPY]], implicit $exec - ; CHECK-NEXT: [[V_ACCVGPR_WRITE_B32_e64_3:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 [[COPY]], implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:areg_128 = REG_SEQUENCE [[V_ACCVGPR_WRITE_B32_e64_]], %subreg.sub0, [[V_ACCVGPR_WRITE_B32_e64_1]], %subreg.sub1, [[V_ACCVGPR_WRITE_B32_e64_2]], %subreg.sub2, [[V_ACCVGPR_WRITE_B32_e64_3]], %subreg.sub3 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:agpr_32 = COPY [[COPY]] + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:agpr_32 = COPY [[COPY]] + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:agpr_32 = COPY [[COPY]] + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:agpr_32 = COPY [[COPY]] + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:areg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; CHECK-NEXT: $agpr0_agpr1_agpr2_agpr3 = COPY [[REG_SEQUENCE]] ; CHECK-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3 %0:sgpr_32 = S_MOV_B32 999 @@ -232,10 +232,10 @@ body: | ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 1 ; CHECK-NEXT: [[V_ACCVGPR_WRITE_B32_e64_:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 1, implicit $exec ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; CHECK-NEXT: [[V_ACCVGPR_WRITE_B32_e64_1:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 [[COPY]], implicit $exec - ; CHECK-NEXT: [[V_ACCVGPR_WRITE_B32_e64_2:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 [[COPY]], implicit $exec - ; CHECK-NEXT: [[V_ACCVGPR_WRITE_B32_e64_3:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 1, implicit $exec - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:areg_128 = REG_SEQUENCE [[V_ACCVGPR_WRITE_B32_e64_]], %subreg.sub0, [[V_ACCVGPR_WRITE_B32_e64_1]], %subreg.sub1, [[V_ACCVGPR_WRITE_B32_e64_2]], %subreg.sub2, [[V_ACCVGPR_WRITE_B32_e64_3]], %subreg.sub3 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:agpr_32 = COPY [[COPY]] + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:agpr_32 = COPY [[COPY]] + ; CHECK-NEXT: [[V_ACCVGPR_WRITE_B32_e64_1:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 1, implicit $exec + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:areg_128 = REG_SEQUENCE [[V_ACCVGPR_WRITE_B32_e64_]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[V_ACCVGPR_WRITE_B32_e64_1]], %subreg.sub3 ; CHECK-NEXT: $agpr0_agpr1_agpr2_agpr3 = COPY [[REG_SEQUENCE]] ; CHECK-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3 %0:sgpr_32 = S_MOV_B32 999 _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits