llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-backend-amdgpu Author: Matt Arsenault (arsenm) <details> <summary>Changes</summary> --- Patch is 20.86 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/150628.diff 3 Files Affected: - (modified) llvm/lib/Target/AMDGPU/SIFoldOperands.cpp (+4) - (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx90a.ll (+48-114) - (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.ll (+18-71) ``````````diff diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp index e5d1eaad2b8f4..b77da4d612dd4 100644 --- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -1062,9 +1062,13 @@ bool SIFoldOperandsImpl::tryFoldRegSeqSplat( switch (OpTy) { case AMDGPU::OPERAND_REG_INLINE_AC_INT32: case AMDGPU::OPERAND_REG_INLINE_AC_FP32: + case AMDGPU::OPERAND_REG_INLINE_C_INT32: + case AMDGPU::OPERAND_REG_INLINE_C_FP32: OpRC = TRI->getSubRegisterClass(OpRC, AMDGPU::sub0); break; case AMDGPU::OPERAND_REG_INLINE_AC_FP64: + case AMDGPU::OPERAND_REG_INLINE_C_FP64: + case AMDGPU::OPERAND_REG_INLINE_C_INT64: OpRC = TRI->getSubRegisterClass(OpRC, AMDGPU::sub0_sub1); break; default: diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx90a.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx90a.ll index adbc2df4b3474..7d5bc3f4c0c39 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx90a.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx90a.ll @@ -1083,58 +1083,36 @@ define amdgpu_kernel void @test_mfma_f64_16x16x4f64_splat_imm_1(ptr addrspace(1) ; GFX90A-VGPR: ; %bb.0: ; %bb ; GFX90A-VGPR-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 ; GFX90A-VGPR-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 -; GFX90A-VGPR-NEXT: v_mov_b32_e32 v0, 0 -; GFX90A-VGPR-NEXT: v_mov_b32_e32 v1, 0x3ff00000 -; GFX90A-VGPR-NEXT: v_mov_b32_e32 v2, v0 ; GFX90A-VGPR-NEXT: s_waitcnt lgkmcnt(0) -; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[10:11], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-VGPR-NEXT: v_mov_b32_e32 v3, v1 -; GFX90A-VGPR-NEXT: v_mov_b32_e32 v4, v0 -; GFX90A-VGPR-NEXT: v_mov_b32_e32 v5, v1 -; GFX90A-VGPR-NEXT: v_mov_b32_e32 v6, v0 -; GFX90A-VGPR-NEXT: v_mov_b32_e32 v7, v1 -; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[8:9], v[6:7], v[6:7] op_sel:[0,1] -; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[12:13], s[6:7], s[6:7] op_sel:[0,1] -; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[6:7], v[4:5], v[4:5] op_sel:[0,1] -; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[4:5], v[2:3], v[2:3] op_sel:[0,1] -; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[0,1] +; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[8:9], s[2:3], s[2:3] op_sel:[0,1] +; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[10:11], s[6:7], s[6:7] op_sel:[0,1] ; GFX90A-VGPR-NEXT: s_nop 1 -; GFX90A-VGPR-NEXT: v_mfma_f64_16x16x4f64 v[2:9], v[10:11], v[12:13], v[2:9] -; GFX90A-VGPR-NEXT: v_mfma_f64_16x16x4f64 v[2:9], v[10:11], v[12:13], v[2:9] cbsz:1 abid:2 blgp:3 +; GFX90A-VGPR-NEXT: v_mfma_f64_16x16x4f64 v[0:7], v[8:9], v[10:11], 1.0 +; GFX90A-VGPR-NEXT: v_mfma_f64_16x16x4f64 v[0:7], v[8:9], v[10:11], v[0:7] cbsz:1 abid:2 blgp:3 +; GFX90A-VGPR-NEXT: v_mov_b32_e32 v8, 0 ; GFX90A-VGPR-NEXT: s_nop 7 ; GFX90A-VGPR-NEXT: s_nop 7 -; GFX90A-VGPR-NEXT: s_nop 1 -; GFX90A-VGPR-NEXT: global_store_dwordx4 v0, v[6:9], s[0:1] offset:16 -; GFX90A-VGPR-NEXT: global_store_dwordx4 v0, v[2:5], s[0:1] +; GFX90A-VGPR-NEXT: s_nop 0 +; GFX90A-VGPR-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 +; GFX90A-VGPR-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] ; GFX90A-VGPR-NEXT: s_endpgm ; ; GFX942-VGPR-LABEL: test_mfma_f64_16x16x4f64_splat_imm_1: ; GFX942-VGPR: ; %bb.0: ; %bb ; GFX942-VGPR-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 ; GFX942-VGPR-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v0, 0 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v1, 0x3ff00000 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v2, v0 ; GFX942-VGPR-NEXT: s_waitcnt lgkmcnt(0) -; GFX942-VGPR-NEXT: v_mov_b64_e32 v[10:11], s[2:3] -; GFX942-VGPR-NEXT: v_mov_b32_e32 v3, v1 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v4, v0 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v5, v1 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v6, v0 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v7, v1 -; GFX942-VGPR-NEXT: v_mov_b64_e32 v[8:9], v[6:7] -; GFX942-VGPR-NEXT: v_mov_b64_e32 v[12:13], s[6:7] -; GFX942-VGPR-NEXT: v_mov_b64_e32 v[6:7], v[4:5] -; GFX942-VGPR-NEXT: v_mov_b64_e32 v[4:5], v[2:3] -; GFX942-VGPR-NEXT: v_mov_b64_e32 v[2:3], v[0:1] +; GFX942-VGPR-NEXT: v_mov_b64_e32 v[8:9], s[2:3] +; GFX942-VGPR-NEXT: v_mov_b64_e32 v[10:11], s[6:7] ; GFX942-VGPR-NEXT: s_nop 1 -; GFX942-VGPR-NEXT: v_mfma_f64_16x16x4_f64 v[2:9], v[10:11], v[12:13], v[2:9] -; GFX942-VGPR-NEXT: v_mfma_f64_16x16x4_f64 v[2:9], v[10:11], v[12:13], v[2:9] cbsz:1 abid:2 neg:[1,1,0] +; GFX942-VGPR-NEXT: v_mfma_f64_16x16x4_f64 v[0:7], v[8:9], v[10:11], 1.0 +; GFX942-VGPR-NEXT: v_mfma_f64_16x16x4_f64 v[0:7], v[8:9], v[10:11], v[0:7] cbsz:1 abid:2 neg:[1,1,0] +; GFX942-VGPR-NEXT: v_mov_b32_e32 v8, 0 ; GFX942-VGPR-NEXT: s_nop 7 ; GFX942-VGPR-NEXT: s_nop 7 -; GFX942-VGPR-NEXT: s_nop 1 -; GFX942-VGPR-NEXT: global_store_dwordx4 v0, v[6:9], s[0:1] offset:16 -; GFX942-VGPR-NEXT: global_store_dwordx4 v0, v[2:5], s[0:1] +; GFX942-VGPR-NEXT: s_nop 0 +; GFX942-VGPR-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 +; GFX942-VGPR-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] ; GFX942-VGPR-NEXT: s_endpgm bb: %mai.1 = tail call <4 x double> @llvm.amdgcn.mfma.f64.16x16x4f64(double %a, double %b, <4 x double> splat (double 1.0), i32 0, i32 0, i32 0) @@ -1184,58 +1162,36 @@ define amdgpu_kernel void @test_mfma_f64_16x16x4f64_splat_imm_neg1(ptr addrspace ; GFX90A-VGPR: ; %bb.0: ; %bb ; GFX90A-VGPR-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 ; GFX90A-VGPR-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 -; GFX90A-VGPR-NEXT: v_mov_b32_e32 v0, 0 -; GFX90A-VGPR-NEXT: v_mov_b32_e32 v1, 0xbff00000 -; GFX90A-VGPR-NEXT: v_mov_b32_e32 v2, v0 ; GFX90A-VGPR-NEXT: s_waitcnt lgkmcnt(0) -; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[10:11], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-VGPR-NEXT: v_mov_b32_e32 v3, v1 -; GFX90A-VGPR-NEXT: v_mov_b32_e32 v4, v0 -; GFX90A-VGPR-NEXT: v_mov_b32_e32 v5, v1 -; GFX90A-VGPR-NEXT: v_mov_b32_e32 v6, v0 -; GFX90A-VGPR-NEXT: v_mov_b32_e32 v7, v1 -; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[8:9], v[6:7], v[6:7] op_sel:[0,1] -; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[12:13], s[6:7], s[6:7] op_sel:[0,1] -; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[6:7], v[4:5], v[4:5] op_sel:[0,1] -; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[4:5], v[2:3], v[2:3] op_sel:[0,1] -; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[0,1] +; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[8:9], s[2:3], s[2:3] op_sel:[0,1] +; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[10:11], s[6:7], s[6:7] op_sel:[0,1] ; GFX90A-VGPR-NEXT: s_nop 1 -; GFX90A-VGPR-NEXT: v_mfma_f64_16x16x4f64 v[2:9], v[10:11], v[12:13], v[2:9] -; GFX90A-VGPR-NEXT: v_mfma_f64_16x16x4f64 v[2:9], v[10:11], v[12:13], v[2:9] cbsz:1 abid:2 blgp:3 +; GFX90A-VGPR-NEXT: v_mfma_f64_16x16x4f64 v[0:7], v[8:9], v[10:11], -1.0 +; GFX90A-VGPR-NEXT: v_mfma_f64_16x16x4f64 v[0:7], v[8:9], v[10:11], v[0:7] cbsz:1 abid:2 blgp:3 +; GFX90A-VGPR-NEXT: v_mov_b32_e32 v8, 0 ; GFX90A-VGPR-NEXT: s_nop 7 ; GFX90A-VGPR-NEXT: s_nop 7 -; GFX90A-VGPR-NEXT: s_nop 1 -; GFX90A-VGPR-NEXT: global_store_dwordx4 v0, v[6:9], s[0:1] offset:16 -; GFX90A-VGPR-NEXT: global_store_dwordx4 v0, v[2:5], s[0:1] +; GFX90A-VGPR-NEXT: s_nop 0 +; GFX90A-VGPR-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 +; GFX90A-VGPR-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] ; GFX90A-VGPR-NEXT: s_endpgm ; ; GFX942-VGPR-LABEL: test_mfma_f64_16x16x4f64_splat_imm_neg1: ; GFX942-VGPR: ; %bb.0: ; %bb ; GFX942-VGPR-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 ; GFX942-VGPR-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v0, 0 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v1, 0xbff00000 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v2, v0 ; GFX942-VGPR-NEXT: s_waitcnt lgkmcnt(0) -; GFX942-VGPR-NEXT: v_mov_b64_e32 v[10:11], s[2:3] -; GFX942-VGPR-NEXT: v_mov_b32_e32 v3, v1 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v4, v0 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v5, v1 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v6, v0 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v7, v1 -; GFX942-VGPR-NEXT: v_mov_b64_e32 v[8:9], v[6:7] -; GFX942-VGPR-NEXT: v_mov_b64_e32 v[12:13], s[6:7] -; GFX942-VGPR-NEXT: v_mov_b64_e32 v[6:7], v[4:5] -; GFX942-VGPR-NEXT: v_mov_b64_e32 v[4:5], v[2:3] -; GFX942-VGPR-NEXT: v_mov_b64_e32 v[2:3], v[0:1] +; GFX942-VGPR-NEXT: v_mov_b64_e32 v[8:9], s[2:3] +; GFX942-VGPR-NEXT: v_mov_b64_e32 v[10:11], s[6:7] ; GFX942-VGPR-NEXT: s_nop 1 -; GFX942-VGPR-NEXT: v_mfma_f64_16x16x4_f64 v[2:9], v[10:11], v[12:13], v[2:9] -; GFX942-VGPR-NEXT: v_mfma_f64_16x16x4_f64 v[2:9], v[10:11], v[12:13], v[2:9] cbsz:1 abid:2 neg:[1,1,0] +; GFX942-VGPR-NEXT: v_mfma_f64_16x16x4_f64 v[0:7], v[8:9], v[10:11], -1.0 +; GFX942-VGPR-NEXT: v_mfma_f64_16x16x4_f64 v[0:7], v[8:9], v[10:11], v[0:7] cbsz:1 abid:2 neg:[1,1,0] +; GFX942-VGPR-NEXT: v_mov_b32_e32 v8, 0 ; GFX942-VGPR-NEXT: s_nop 7 ; GFX942-VGPR-NEXT: s_nop 7 -; GFX942-VGPR-NEXT: s_nop 1 -; GFX942-VGPR-NEXT: global_store_dwordx4 v0, v[6:9], s[0:1] offset:16 -; GFX942-VGPR-NEXT: global_store_dwordx4 v0, v[2:5], s[0:1] +; GFX942-VGPR-NEXT: s_nop 0 +; GFX942-VGPR-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 +; GFX942-VGPR-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] ; GFX942-VGPR-NEXT: s_endpgm bb: %mai.1 = tail call <4 x double> @llvm.amdgcn.mfma.f64.16x16x4f64(double %a, double %b, <4 x double> splat (double -1.0), i32 0, i32 0, i32 0) @@ -1285,58 +1241,36 @@ define amdgpu_kernel void @test_mfma_f64_16x16x4f64_splat_imm_int_64(ptr addrspa ; GFX90A-VGPR: ; %bb.0: ; %bb ; GFX90A-VGPR-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 ; GFX90A-VGPR-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 -; GFX90A-VGPR-NEXT: v_mov_b32_e32 v1, 0 -; GFX90A-VGPR-NEXT: v_mov_b32_e32 v0, 64 -; GFX90A-VGPR-NEXT: v_mov_b32_e32 v2, v0 ; GFX90A-VGPR-NEXT: s_waitcnt lgkmcnt(0) -; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[10:11], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-VGPR-NEXT: v_mov_b32_e32 v3, v1 -; GFX90A-VGPR-NEXT: v_mov_b32_e32 v4, v0 -; GFX90A-VGPR-NEXT: v_mov_b32_e32 v5, v1 -; GFX90A-VGPR-NEXT: v_mov_b32_e32 v6, v0 -; GFX90A-VGPR-NEXT: v_mov_b32_e32 v7, v1 -; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[8:9], v[6:7], v[6:7] op_sel:[0,1] -; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[12:13], s[6:7], s[6:7] op_sel:[0,1] -; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[6:7], v[4:5], v[4:5] op_sel:[0,1] -; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[4:5], v[2:3], v[2:3] op_sel:[0,1] -; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[0,1] +; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[8:9], s[2:3], s[2:3] op_sel:[0,1] +; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[10:11], s[6:7], s[6:7] op_sel:[0,1] ; GFX90A-VGPR-NEXT: s_nop 1 -; GFX90A-VGPR-NEXT: v_mfma_f64_16x16x4f64 v[2:9], v[10:11], v[12:13], v[2:9] -; GFX90A-VGPR-NEXT: v_mfma_f64_16x16x4f64 v[2:9], v[10:11], v[12:13], v[2:9] cbsz:1 abid:2 blgp:3 +; GFX90A-VGPR-NEXT: v_mfma_f64_16x16x4f64 v[0:7], v[8:9], v[10:11], 64 +; GFX90A-VGPR-NEXT: v_mfma_f64_16x16x4f64 v[0:7], v[8:9], v[10:11], v[0:7] cbsz:1 abid:2 blgp:3 +; GFX90A-VGPR-NEXT: v_mov_b32_e32 v8, 0 ; GFX90A-VGPR-NEXT: s_nop 7 ; GFX90A-VGPR-NEXT: s_nop 7 -; GFX90A-VGPR-NEXT: s_nop 1 -; GFX90A-VGPR-NEXT: global_store_dwordx4 v1, v[6:9], s[0:1] offset:16 -; GFX90A-VGPR-NEXT: global_store_dwordx4 v1, v[2:5], s[0:1] +; GFX90A-VGPR-NEXT: s_nop 0 +; GFX90A-VGPR-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 +; GFX90A-VGPR-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] ; GFX90A-VGPR-NEXT: s_endpgm ; ; GFX942-VGPR-LABEL: test_mfma_f64_16x16x4f64_splat_imm_int_64: ; GFX942-VGPR: ; %bb.0: ; %bb ; GFX942-VGPR-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 ; GFX942-VGPR-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v1, 0 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v0, 64 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v2, v0 ; GFX942-VGPR-NEXT: s_waitcnt lgkmcnt(0) -; GFX942-VGPR-NEXT: v_mov_b64_e32 v[10:11], s[2:3] -; GFX942-VGPR-NEXT: v_mov_b32_e32 v3, v1 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v4, v0 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v5, v1 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v6, v0 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v7, v1 -; GFX942-VGPR-NEXT: v_mov_b64_e32 v[8:9], v[6:7] -; GFX942-VGPR-NEXT: v_mov_b64_e32 v[12:13], s[6:7] -; GFX942-VGPR-NEXT: v_mov_b64_e32 v[6:7], v[4:5] -; GFX942-VGPR-NEXT: v_mov_b64_e32 v[4:5], v[2:3] -; GFX942-VGPR-NEXT: v_mov_b64_e32 v[2:3], v[0:1] +; GFX942-VGPR-NEXT: v_mov_b64_e32 v[8:9], s[2:3] +; GFX942-VGPR-NEXT: v_mov_b64_e32 v[10:11], s[6:7] ; GFX942-VGPR-NEXT: s_nop 1 -; GFX942-VGPR-NEXT: v_mfma_f64_16x16x4_f64 v[2:9], v[10:11], v[12:13], v[2:9] -; GFX942-VGPR-NEXT: v_mfma_f64_16x16x4_f64 v[2:9], v[10:11], v[12:13], v[2:9] cbsz:1 abid:2 neg:[1,1,0] +; GFX942-VGPR-NEXT: v_mfma_f64_16x16x4_f64 v[0:7], v[8:9], v[10:11], 64 +; GFX942-VGPR-NEXT: v_mfma_f64_16x16x4_f64 v[0:7], v[8:9], v[10:11], v[0:7] cbsz:1 abid:2 neg:[1,1,0] +; GFX942-VGPR-NEXT: v_mov_b32_e32 v8, 0 ; GFX942-VGPR-NEXT: s_nop 7 ; GFX942-VGPR-NEXT: s_nop 7 -; GFX942-VGPR-NEXT: s_nop 1 -; GFX942-VGPR-NEXT: global_store_dwordx4 v1, v[6:9], s[0:1] offset:16 -; GFX942-VGPR-NEXT: global_store_dwordx4 v1, v[2:5], s[0:1] +; GFX942-VGPR-NEXT: s_nop 0 +; GFX942-VGPR-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 +; GFX942-VGPR-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] ; GFX942-VGPR-NEXT: s_endpgm bb: %mai.1 = tail call <4 x double> @llvm.amdgcn.mfma.f64.16x16x4f64(double %a, double %b, <4 x double> splat (double bitcast (i64 64 to double)), i32 0, i32 0, i32 0) diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.ll index d7257a49b00bb..b4a6451908a6f 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.ll @@ -3238,27 +3238,11 @@ define amdgpu_kernel void @test_mfma_i32_16x16x4i8_splatimm_src2_64(ptr addrspac ; ; GFX942-VGPR-LABEL: test_mfma_i32_16x16x4i8_splatimm_src2_64: ; GFX942-VGPR: ; %bb.0: ; %bb -; GFX942-VGPR-NEXT: v_mov_b32_e32 v17, 1 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v0, 64 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v1, v0 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v2, v0 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v3, v0 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v4, v0 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v5, v0 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v6, v0 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v7, v0 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v8, v0 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v9, v0 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v10, v0 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v11, v0 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v12, v0 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v13, v0 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v14, v0 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v15, v0 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v18, 2 +; GFX942-VGPR-NEXT: v_mov_b32_e32 v0, 1 +; GFX942-VGPR-NEXT: v_mov_b32_e32 v1, 2 ; GFX942-VGPR-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX942-VGPR-NEXT: v_mov_b32_e32 v16, 0 -; GFX942-VGPR-NEXT: v_mfma_i32_16x16x4_4b_i8 v[0:15], v17, v18, v[0:15] cbsz:1 abid:2 blgp:3 +; GFX942-VGPR-NEXT: v_mfma_i32_16x16x4_4b_i8 v[0:15], v0, v1, 64 cbsz:1 abid:2 blgp:3 ; GFX942-VGPR-NEXT: s_waitcnt lgkmcnt(0) ; GFX942-VGPR-NEXT: s_nop 7 ; GFX942-VGPR-NEXT: s_nop 1 @@ -3463,13 +3447,10 @@ define amdgpu_kernel void @test_mfma_i32_4x4x4i8_splat_imm_src2_1(ptr addrspace( ; GFX942-VGPR: ; %bb.0: ; %bb ; GFX942-VGPR-NEXT: v_mov_b32_e32 v0, 1 ; GFX942-VGPR-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v1, v0 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v2, v0 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v3, v0 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v5, 2 +; GFX942-VGPR-NEXT: v_mov_b32_e32 v1, 2 ; GFX942-VGPR-NEXT: v_mov_b32_e32 v4, 0 ; GFX942-VGPR-NEXT: s_nop 0 -; GFX942-VGPR-NEXT: v_mfma_i32_4x4x4_16b_i8 v[0:3], v0, v5, v[0:3] cbsz:1 abid:2 blgp:3 +; GFX942-VGPR-NEXT: v_mfma_i32_4x4x4_16b_i8 v[0:3], v0, v1, 1 cbsz:1 abid:2 blgp:3 ; GFX942-VGPR-NEXT: s_waitcnt lgkmcnt(0) ; GFX942-VGPR-NEXT: s_nop 3 ; GFX942-VGPR-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] @@ -4483,13 +4464,10 @@ define amdgpu_kernel void @test_mfma_f32_4x4x1f32_imm_splat(ptr addrspace(1) %ar ; GFX942-VGPR: ; %bb.0: ; %bb ; GFX942-VGPR-NEXT: v_mov_b32_e32 v0, 1.0 ; GFX942-VGPR-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v1, v0 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v2, v0 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v3, v0 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v5, 2.0 +; GFX942-VGPR-NEXT: v_mov_b32_e32 v1, 2.0 ; GFX942-VGPR-NEXT: v_mov_b32_e32 v4, 0 ; GFX942-VGPR-NEXT: s_nop 0 -; GFX942-VGPR-NEXT: v_mfma_f32_4x4x1_16b_f32 v[0:3], v0, v5, v[0:3] +; GFX942-VGPR-NEXT: v_mfma_f32_4x4x1_16b_f32 v[0:3], v0, v1, 1.0 ; GFX942-VGPR-NEXT: s_waitcnt lgkmcnt(0) ; GFX942-VGPR-NEXT: s_nop 2 ; GFX942-VGPR-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] @@ -4627,25 +4605,10 @@ define amdgpu_kernel void @test_mfma_f32_16x16x1f32_imm_splat(ptr addrspace(1) % ; GFX942-VGPR-LABEL: test_mfma_f32_16x16x1f32_imm_splat: ; GFX942-VGPR: ; %bb.0: ; %bb ; GFX942-VGPR-NEXT: v_mov_b32_e32 v0, 1.0 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v1, v0 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v2, v0 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v3, v0 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v4, v0 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v5, v0 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v6, v0 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v7, v0 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v8, v0 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v9, v0 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v10, v0 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v11, v0 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v12, v0 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v13, v0 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v14, v0 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v15, v0 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v17, 2.0 +; GFX942-VGPR-NEXT: v_mov_b32_e32 v1, 2.0 ; GFX942-VGPR-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX942-VGPR-NEXT: v_mov_b32_e32 v16, 0 -; GFX942-VGPR-NEXT: v_mfma_f32_16x16x1_4b_f32 v[0:15], v0, v17, v[0:15] +; GFX942-VGPR-NEXT: v_mfma_f32_16x16x1_4b_f32 v[0:15], v0, v1, 1.0 ; GFX942-VGPR-NEXT: s_waitcnt lgkmcnt(0) ; GFX942-VGPR-NEXT: s_nop 7 ; GFX942-VGPR-NEXT: s_nop 0 @@ -4797,36 +4760,20 @@ define amdgpu_kernel void @test_mfma_f32_32x32x8f16_imm_splat(ptr addrspace(1) % ; ; GFX942-VGPR-LABEL: test_mfma_f32_32x32x8f16_imm_splat: ; GFX942-VGPR: ; %bb.0: ; %bb -; GFX942-VGPR-NEXT: v_mov_b32_e32 v16, 0x3c003c00 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v17, v16 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v0, 1.0 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v18, 0x40004000 +; GFX942-VGPR-NEXT: v_mov_b32_e32 v0, 0x3c003c00 ; GFX942-VGPR-NEXT: v_mov_b32_e32 v1, v0 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v2, v0 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v3, v0 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v4, v0 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v5, v0 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v6, v0 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v7, v0 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v8, v0 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v9, v0 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v10, v0 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v11, v0 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v12, v0 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v13, v0 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v14, v0 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v15, v0 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v19, v18 +; GFX942-VGPR-NEXT: v_mov_b32_e32 v2, 0x40004000 +; GFX942-VGPR-NEXT: v_mov_b32_e32 v3, v2 ; GFX942-VGPR-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v20, 0 -; GFX942-VGPR-NEXT: v_mfma_f32_32x32x8_f16 v[0:15], v[16:17], v[18:19], v[0:15] +; GFX942-VGPR-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-VGPR-NEXT: v_mfma_f32_32x32x8_f16 v[0:15], v[0:1], v[2:3], 1.0 ; GFX942-VGPR-NEXT: s_waitcnt lgkmcnt(0) ; GFX942-VGPR-NEXT: s_nop 7 ; GFX942-VGPR-NEXT: s_nop 1 -; GFX942-VGPR-NEXT: global_store_dwordx4 v20, v[12:15], s[0:1] offset:48 -; GFX942-VGPR-NEXT: global_store_dwordx4 v20, v[8... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/150628 _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits