https://github.com/shiltian created https://github.com/llvm/llvm-project/pull/139185
None >From 810682aeeaa305de0a36145770016f7364404908 Mon Sep 17 00:00:00 2001 From: Shilei Tian <i...@tianshilei.me> Date: Thu, 8 May 2025 19:26:31 -0400 Subject: [PATCH] [AMDGPU][Fake16] Support OPSEL for `v_cvt_f16_f32` and `v_cvt_f32_f16` --- llvm/lib/Target/AMDGPU/VOP1Instructions.td | 14 ++++++++++---- .../inst-select-amdgcn.fcmp.constants.w32.mir | 8 ++++---- .../inst-select-amdgcn.fcmp.constants.w64.mir | 8 ++++---- .../AMDGPU/GlobalISel/inst-select-fptosi.mir | 12 ++++++------ .../AMDGPU/GlobalISel/inst-select-fptoui.mir | 12 ++++++------ .../AMDGPU/GlobalISel/inst-select-sitofp.mir | 4 ++-- .../AMDGPU/GlobalISel/inst-select-uitofp.mir | 4 ++-- .../AMDGPU/gfx11_dasm_vop3_dpp16_from_vop1.txt | 4 ++-- .../AMDGPU/gfx11_dasm_vop3_dpp8_from_vop1.txt | 4 ++-- .../AMDGPU/gfx11_dasm_vop3_from_vop1.txt | 4 ++-- .../AMDGPU/gfx12_dasm_vop3_from_vop1.txt | 4 ++-- .../AMDGPU/gfx12_dasm_vop3_from_vop1_dpp16.txt | 4 ++-- .../AMDGPU/gfx12_dasm_vop3_from_vop1_dpp8.txt | 4 ++-- 13 files changed, 46 insertions(+), 40 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td index 352a3f9c2d27f..5522d89855332 100644 --- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td @@ -261,6 +261,11 @@ foreach vt = Reg32Types.types in { >; } +let HasOpSel = 1 in { + def VOP_F16_F32_Fake16_OP_SEL : VOPProfile_Fake16<VOP_F16_F32>; + def VOP_F32_F16_Fake16_OP_SEL : VOPProfile_Fake16<VOP_F32_F16>; +} // End HasOpSel = 1 + let isReMaterializable = 1 in { let SchedRW = [WriteDoubleCvt] in { // OMod clears exceptions when set in this instruction @@ -301,15 +306,16 @@ let FPDPRounding = 1, isReMaterializable = 0 in { defm V_CVT_F16_F32 : VOP1Inst <"v_cvt_f16_f32", VOP_F16_F32, any_fpround>; let OtherPredicates = [UseRealTrue16Insts] in defm V_CVT_F16_F32_t16 : VOP1Inst <"v_cvt_f16_f32_t16", VOPProfile_True16<VOP_F16_F32>, any_fpround>; - let OtherPredicates = [UseFakeTrue16Insts] in - defm V_CVT_F16_F32_fake16 : VOP1Inst <"v_cvt_f16_f32_fake16", VOPProfile_Fake16<VOP_F16_F32>, any_fpround>; + let OtherPredicates = [UseFakeTrue16Insts] in defm V_CVT_F16_F32_fake16 + : VOP1Inst<"v_cvt_f16_f32_fake16", VOP_F16_F32_Fake16_OP_SEL, + any_fpround>; } // End FPDPRounding = 1, isReMaterializable = 0 let OtherPredicates = [NotHasTrue16BitInsts] in defm V_CVT_F32_F16 : VOP1Inst <"v_cvt_f32_f16", VOP_F32_F16, any_fpextend>; let OtherPredicates = [UseRealTrue16Insts] in defm V_CVT_F32_F16_t16 : VOP1Inst <"v_cvt_f32_f16_t16", VOPProfile_True16<VOP_F32_F16>, any_fpextend>; -let OtherPredicates = [UseFakeTrue16Insts] in - defm V_CVT_F32_F16_fake16 : VOP1Inst <"v_cvt_f32_f16_fake16", VOPProfile_Fake16<VOP_F32_F16>, any_fpextend>; +let OtherPredicates = [UseFakeTrue16Insts] in defm V_CVT_F32_F16_fake16 + : VOP1Inst<"v_cvt_f32_f16_fake16", VOP_F32_F16_Fake16_OP_SEL, any_fpextend>; let SubtargetPredicate = HasBF16ConversionInsts in defm V_CVT_F32_BF16 : VOP1Inst_t16 <"v_cvt_f32_bf16", VOP_F32_BF16>; diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fcmp.constants.w32.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fcmp.constants.w32.mir index 49383135ab0c5..66c8d11bfcc9e 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fcmp.constants.w32.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fcmp.constants.w32.mir @@ -26,8 +26,8 @@ body: | ; GFX11-FAKE16-NEXT: {{ $}} ; GFX11-FAKE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-FAKE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-FAKE16-NEXT: [[V_CVT_F16_F32_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_fake16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX11-FAKE16-NEXT: [[V_CVT_F16_F32_fake16_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_fake16_e64 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec + ; GFX11-FAKE16-NEXT: [[V_CVT_F16_F32_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_fake16_e64 0, [[COPY]], 0, 0, 0, implicit $mode, implicit $exec + ; GFX11-FAKE16-NEXT: [[V_CVT_F16_F32_fake16_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_fake16_e64 0, [[COPY1]], 0, 0, 0, implicit $mode, implicit $exec ; GFX11-FAKE16-NEXT: [[V_CMP_F_F16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_F_F16_fake16_e64 0, [[V_CVT_F16_F32_fake16_e64_]], 0, [[V_CVT_F16_F32_fake16_e64_1]], 0, implicit $mode, implicit $exec ; GFX11-FAKE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_F_F16_fake16_e64_]] %0:vgpr(s32) = COPY $vgpr0 @@ -62,8 +62,8 @@ body: | ; GFX11-FAKE16-NEXT: {{ $}} ; GFX11-FAKE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-FAKE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-FAKE16-NEXT: [[V_CVT_F16_F32_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_fake16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX11-FAKE16-NEXT: [[V_CVT_F16_F32_fake16_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_fake16_e64 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec + ; GFX11-FAKE16-NEXT: [[V_CVT_F16_F32_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_fake16_e64 0, [[COPY]], 0, 0, 0, implicit $mode, implicit $exec + ; GFX11-FAKE16-NEXT: [[V_CVT_F16_F32_fake16_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_fake16_e64 0, [[COPY1]], 0, 0, 0, implicit $mode, implicit $exec ; GFX11-FAKE16-NEXT: [[V_CMP_TRU_F16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_TRU_F16_fake16_e64 0, [[V_CVT_F16_F32_fake16_e64_]], 0, [[V_CVT_F16_F32_fake16_e64_1]], 0, implicit $mode, implicit $exec ; GFX11-FAKE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_TRU_F16_fake16_e64_]] %0:vgpr(s32) = COPY $vgpr0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fcmp.constants.w64.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fcmp.constants.w64.mir index 828eb5d3fb40a..c7715eec76d37 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fcmp.constants.w64.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fcmp.constants.w64.mir @@ -26,8 +26,8 @@ body: | ; GFX11-FAKE16-NEXT: {{ $}} ; GFX11-FAKE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-FAKE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-FAKE16-NEXT: [[V_CVT_F16_F32_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_fake16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX11-FAKE16-NEXT: [[V_CVT_F16_F32_fake16_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_fake16_e64 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec + ; GFX11-FAKE16-NEXT: [[V_CVT_F16_F32_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_fake16_e64 0, [[COPY]], 0, 0, 0, implicit $mode, implicit $exec + ; GFX11-FAKE16-NEXT: [[V_CVT_F16_F32_fake16_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_fake16_e64 0, [[COPY1]], 0, 0, 0, implicit $mode, implicit $exec ; GFX11-FAKE16-NEXT: [[V_CMP_F_F16_fake16_e64_:%[0-9]+]]:sreg_64 = V_CMP_F_F16_fake16_e64 0, [[V_CVT_F16_F32_fake16_e64_]], 0, [[V_CVT_F16_F32_fake16_e64_1]], 0, implicit $mode, implicit $exec ; GFX11-FAKE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_F_F16_fake16_e64_]] %0:vgpr(s32) = COPY $vgpr0 @@ -62,8 +62,8 @@ body: | ; GFX11-FAKE16-NEXT: {{ $}} ; GFX11-FAKE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-FAKE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-FAKE16-NEXT: [[V_CVT_F16_F32_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_fake16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GFX11-FAKE16-NEXT: [[V_CVT_F16_F32_fake16_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_fake16_e64 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec + ; GFX11-FAKE16-NEXT: [[V_CVT_F16_F32_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_fake16_e64 0, [[COPY]], 0, 0, 0, implicit $mode, implicit $exec + ; GFX11-FAKE16-NEXT: [[V_CVT_F16_F32_fake16_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_fake16_e64 0, [[COPY1]], 0, 0, 0, implicit $mode, implicit $exec ; GFX11-FAKE16-NEXT: [[V_CMP_TRU_F16_fake16_e64_:%[0-9]+]]:sreg_64 = V_CMP_TRU_F16_fake16_e64 0, [[V_CVT_F16_F32_fake16_e64_]], 0, [[V_CVT_F16_F32_fake16_e64_1]], 0, implicit $mode, implicit $exec ; GFX11-FAKE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_TRU_F16_fake16_e64_]] %0:vgpr(s32) = COPY $vgpr0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fptosi.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fptosi.mir index 03cb907f82a16..2c7eb23dab364 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fptosi.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fptosi.mir @@ -149,7 +149,7 @@ body: | ; GFX11-FAKE16: liveins: $vgpr0 ; GFX11-FAKE16-NEXT: {{ $}} ; GFX11-FAKE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-FAKE16-NEXT: [[V_CVT_F32_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_fake16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX11-FAKE16-NEXT: [[V_CVT_F32_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_fake16_e64 0, [[COPY]], 0, 0, 0, implicit $mode, implicit $exec ; GFX11-FAKE16-NEXT: [[V_CVT_I32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[V_CVT_F32_F16_fake16_e64_]], implicit $mode, implicit $exec ; GFX11-FAKE16-NEXT: $vgpr0 = COPY [[V_CVT_I32_F32_e32_]] %0:vgpr(s32) = COPY $vgpr0 @@ -196,7 +196,7 @@ body: | ; GFX11-FAKE16: liveins: $sgpr0 ; GFX11-FAKE16-NEXT: {{ $}} ; GFX11-FAKE16-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX11-FAKE16-NEXT: [[V_CVT_F32_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_fake16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX11-FAKE16-NEXT: [[V_CVT_F32_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_fake16_e64 0, [[COPY]], 0, 0, 0, implicit $mode, implicit $exec ; GFX11-FAKE16-NEXT: [[V_CVT_I32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[V_CVT_F32_F16_fake16_e64_]], implicit $mode, implicit $exec ; GFX11-FAKE16-NEXT: $vgpr0 = COPY [[V_CVT_I32_F32_e32_]] %0:sgpr(s32) = COPY $sgpr0 @@ -251,7 +251,7 @@ body: | ; GFX11-FAKE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-FAKE16-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768 ; GFX11-FAKE16-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec - ; GFX11-FAKE16-NEXT: [[V_CVT_F32_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_fake16_e64 0, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec + ; GFX11-FAKE16-NEXT: [[V_CVT_F32_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_fake16_e64 0, [[V_XOR_B32_e64_]], 0, 0, 0, implicit $mode, implicit $exec ; GFX11-FAKE16-NEXT: [[V_CVT_I32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[V_CVT_F32_F16_fake16_e64_]], implicit $mode, implicit $exec ; GFX11-FAKE16-NEXT: $vgpr0 = COPY [[V_CVT_I32_F32_e32_]] %0:vgpr(s32) = COPY $vgpr0 @@ -301,7 +301,7 @@ body: | ; GFX11-FAKE16: liveins: $vgpr0 ; GFX11-FAKE16-NEXT: {{ $}} ; GFX11-FAKE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-FAKE16-NEXT: [[V_CVT_F32_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_fake16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX11-FAKE16-NEXT: [[V_CVT_F32_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_fake16_e64 0, [[COPY]], 0, 0, 0, implicit $mode, implicit $exec ; GFX11-FAKE16-NEXT: [[V_CVT_I32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[V_CVT_F32_F16_fake16_e64_]], implicit $mode, implicit $exec ; GFX11-FAKE16-NEXT: S_ENDPGM 0, implicit [[V_CVT_I32_F32_e32_]] %0:vgpr(s32) = COPY $vgpr0 @@ -350,7 +350,7 @@ body: | ; GFX11-FAKE16: liveins: $sgpr0 ; GFX11-FAKE16-NEXT: {{ $}} ; GFX11-FAKE16-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX11-FAKE16-NEXT: [[V_CVT_F32_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_fake16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX11-FAKE16-NEXT: [[V_CVT_F32_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_fake16_e64 0, [[COPY]], 0, 0, 0, implicit $mode, implicit $exec ; GFX11-FAKE16-NEXT: [[V_CVT_I32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[V_CVT_F32_F16_fake16_e64_]], implicit $mode, implicit $exec ; GFX11-FAKE16-NEXT: S_ENDPGM 0, implicit [[V_CVT_I32_F32_e32_]] %0:sgpr(s32) = COPY $sgpr0 @@ -407,7 +407,7 @@ body: | ; GFX11-FAKE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-FAKE16-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768 ; GFX11-FAKE16-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec - ; GFX11-FAKE16-NEXT: [[V_CVT_F32_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_fake16_e64 0, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec + ; GFX11-FAKE16-NEXT: [[V_CVT_F32_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_fake16_e64 0, [[V_XOR_B32_e64_]], 0, 0, 0, implicit $mode, implicit $exec ; GFX11-FAKE16-NEXT: [[V_CVT_I32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[V_CVT_F32_F16_fake16_e64_]], implicit $mode, implicit $exec ; GFX11-FAKE16-NEXT: S_ENDPGM 0, implicit [[V_CVT_I32_F32_e32_]] %0:vgpr(s32) = COPY $vgpr0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fptoui.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fptoui.mir index 521a0e8a2a796..489a6b360124f 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fptoui.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fptoui.mir @@ -99,7 +99,7 @@ body: | ; GFX11-FAKE16: liveins: $vgpr0 ; GFX11-FAKE16-NEXT: {{ $}} ; GFX11-FAKE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-FAKE16-NEXT: [[V_CVT_F32_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_fake16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX11-FAKE16-NEXT: [[V_CVT_F32_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_fake16_e64 0, [[COPY]], 0, 0, 0, implicit $mode, implicit $exec ; GFX11-FAKE16-NEXT: [[V_CVT_U32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 [[V_CVT_F32_F16_fake16_e64_]], implicit $mode, implicit $exec ; GFX11-FAKE16-NEXT: $vgpr0 = COPY [[V_CVT_U32_F32_e32_]] %0:vgpr(s32) = COPY $vgpr0 @@ -146,7 +146,7 @@ body: | ; GFX11-FAKE16: liveins: $sgpr0 ; GFX11-FAKE16-NEXT: {{ $}} ; GFX11-FAKE16-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX11-FAKE16-NEXT: [[V_CVT_F32_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_fake16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX11-FAKE16-NEXT: [[V_CVT_F32_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_fake16_e64 0, [[COPY]], 0, 0, 0, implicit $mode, implicit $exec ; GFX11-FAKE16-NEXT: [[V_CVT_U32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 [[V_CVT_F32_F16_fake16_e64_]], implicit $mode, implicit $exec ; GFX11-FAKE16-NEXT: $vgpr0 = COPY [[V_CVT_U32_F32_e32_]] %0:sgpr(s32) = COPY $sgpr0 @@ -201,7 +201,7 @@ body: | ; GFX11-FAKE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-FAKE16-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768 ; GFX11-FAKE16-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec - ; GFX11-FAKE16-NEXT: [[V_CVT_F32_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_fake16_e64 0, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec + ; GFX11-FAKE16-NEXT: [[V_CVT_F32_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_fake16_e64 0, [[V_XOR_B32_e64_]], 0, 0, 0, implicit $mode, implicit $exec ; GFX11-FAKE16-NEXT: [[V_CVT_U32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 [[V_CVT_F32_F16_fake16_e64_]], implicit $mode, implicit $exec ; GFX11-FAKE16-NEXT: $vgpr0 = COPY [[V_CVT_U32_F32_e32_]] %0:vgpr(s32) = COPY $vgpr0 @@ -251,7 +251,7 @@ body: | ; GFX11-FAKE16: liveins: $vgpr0 ; GFX11-FAKE16-NEXT: {{ $}} ; GFX11-FAKE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-FAKE16-NEXT: [[V_CVT_F32_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_fake16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX11-FAKE16-NEXT: [[V_CVT_F32_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_fake16_e64 0, [[COPY]], 0, 0, 0, implicit $mode, implicit $exec ; GFX11-FAKE16-NEXT: [[V_CVT_U32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 [[V_CVT_F32_F16_fake16_e64_]], implicit $mode, implicit $exec ; GFX11-FAKE16-NEXT: S_ENDPGM 0, implicit [[V_CVT_U32_F32_e32_]] %0:vgpr(s32) = COPY $vgpr0 @@ -300,7 +300,7 @@ body: | ; GFX11-FAKE16: liveins: $sgpr0 ; GFX11-FAKE16-NEXT: {{ $}} ; GFX11-FAKE16-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX11-FAKE16-NEXT: [[V_CVT_F32_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_fake16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX11-FAKE16-NEXT: [[V_CVT_F32_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_fake16_e64 0, [[COPY]], 0, 0, 0, implicit $mode, implicit $exec ; GFX11-FAKE16-NEXT: [[V_CVT_U32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 [[V_CVT_F32_F16_fake16_e64_]], implicit $mode, implicit $exec ; GFX11-FAKE16-NEXT: S_ENDPGM 0, implicit [[V_CVT_U32_F32_e32_]] %0:sgpr(s32) = COPY $sgpr0 @@ -357,7 +357,7 @@ body: | ; GFX11-FAKE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-FAKE16-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768 ; GFX11-FAKE16-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec - ; GFX11-FAKE16-NEXT: [[V_CVT_F32_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_fake16_e64 0, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec + ; GFX11-FAKE16-NEXT: [[V_CVT_F32_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_fake16_e64 0, [[V_XOR_B32_e64_]], 0, 0, 0, implicit $mode, implicit $exec ; GFX11-FAKE16-NEXT: [[V_CVT_U32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 [[V_CVT_F32_F16_fake16_e64_]], implicit $mode, implicit $exec ; GFX11-FAKE16-NEXT: S_ENDPGM 0, implicit [[V_CVT_U32_F32_e32_]] %0:vgpr(s32) = COPY $vgpr0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sitofp.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sitofp.mir index 3888ce87b46fd..72e8fc52917a6 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sitofp.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sitofp.mir @@ -101,7 +101,7 @@ body: | ; GFX11-FAKE16-NEXT: {{ $}} ; GFX11-FAKE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-FAKE16-NEXT: [[V_CVT_F32_I32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_F32_I32_e32 [[COPY]], implicit $mode, implicit $exec - ; GFX11-FAKE16-NEXT: [[V_CVT_F16_F32_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_fake16_e64 0, [[V_CVT_F32_I32_e32_]], 0, 0, implicit $mode, implicit $exec + ; GFX11-FAKE16-NEXT: [[V_CVT_F16_F32_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_fake16_e64 0, [[V_CVT_F32_I32_e32_]], 0, 0, 0, implicit $mode, implicit $exec ; GFX11-FAKE16-NEXT: $vgpr0 = COPY [[V_CVT_F16_F32_fake16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s16) = G_SITOFP %0 @@ -150,7 +150,7 @@ body: | ; GFX11-FAKE16-NEXT: {{ $}} ; GFX11-FAKE16-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GFX11-FAKE16-NEXT: [[V_CVT_F32_I32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_F32_I32_e32 [[COPY]], implicit $mode, implicit $exec - ; GFX11-FAKE16-NEXT: [[V_CVT_F16_F32_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_fake16_e64 0, [[V_CVT_F32_I32_e32_]], 0, 0, implicit $mode, implicit $exec + ; GFX11-FAKE16-NEXT: [[V_CVT_F16_F32_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_fake16_e64 0, [[V_CVT_F32_I32_e32_]], 0, 0, 0, implicit $mode, implicit $exec ; GFX11-FAKE16-NEXT: $vgpr0 = COPY [[V_CVT_F16_F32_fake16_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s16) = G_SITOFP %0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-uitofp.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-uitofp.mir index 35d622dc57d18..1e3c4c8e596b5 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-uitofp.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-uitofp.mir @@ -115,7 +115,7 @@ body: | ; GFX11-FAKE16-NEXT: {{ $}} ; GFX11-FAKE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-FAKE16-NEXT: [[V_CVT_F32_U32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e32 [[COPY]], implicit $mode, implicit $exec - ; GFX11-FAKE16-NEXT: [[V_CVT_F16_F32_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_fake16_e64 0, [[V_CVT_F32_U32_e32_]], 0, 0, implicit $mode, implicit $exec + ; GFX11-FAKE16-NEXT: [[V_CVT_F16_F32_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_fake16_e64 0, [[V_CVT_F32_U32_e32_]], 0, 0, 0, implicit $mode, implicit $exec ; GFX11-FAKE16-NEXT: $vgpr0 = COPY [[V_CVT_F16_F32_fake16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s16) = G_UITOFP %0 @@ -164,7 +164,7 @@ body: | ; GFX11-FAKE16-NEXT: {{ $}} ; GFX11-FAKE16-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GFX11-FAKE16-NEXT: [[V_CVT_F32_U32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e32 [[COPY]], implicit $mode, implicit $exec - ; GFX11-FAKE16-NEXT: [[V_CVT_F16_F32_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_fake16_e64 0, [[V_CVT_F32_U32_e32_]], 0, 0, implicit $mode, implicit $exec + ; GFX11-FAKE16-NEXT: [[V_CVT_F16_F32_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_fake16_e64 0, [[V_CVT_F32_U32_e32_]], 0, 0, 0, implicit $mode, implicit $exec ; GFX11-FAKE16-NEXT: $vgpr0 = COPY [[V_CVT_F16_F32_fake16_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s16) = G_UITOFP %0 diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16_from_vop1.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16_from_vop1.txt index 282ff229c57e6..4f1b40f0989ff 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16_from_vop1.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16_from_vop1.txt @@ -448,7 +448,7 @@ 0xff,0xc1,0x8a,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30 # GFX11-REAL16: v_cvt_f16_f32_e64_dpp v255.h, -|v255| op_sel:[0,1] clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc1,0x8a,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] -# GFX11-FAKE16: v_cvt_f16_f32_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0x8a,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] +# GFX11-FAKE16: v_cvt_f16_f32_e64_dpp v255, -|v255| op_sel:[0,1] clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc1,0x8a,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] 0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff # GFX11-REAL16: v_cvt_f16_i16_e64_dpp v5.l, v1.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] @@ -644,7 +644,7 @@ 0xff,0x89,0x8b,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30 # GFX11-REAL16: v_cvt_f32_f16_e64_dpp v255, -|v255.h| op_sel:[1,0] clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x89,0x8b,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] -# GFX11-FAKE16: v_cvt_f32_f16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0x8b,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] +# GFX11-FAKE16: v_cvt_f32_f16_e64_dpp v255, -|v255| op_sel:[1,0] clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x89,0x8b,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] 0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff # GFX11: v_cvt_f32_i32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8_from_vop1.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8_from_vop1.txt index 5995762ce6ff1..228cb94582473 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8_from_vop1.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8_from_vop1.txt @@ -124,7 +124,7 @@ 0xff,0xc1,0x8a,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00 # GFX11-REAL16: v_cvt_f16_f32_e64_dpp v255.h, -|v255| op_sel:[0,1] clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc1,0x8a,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00] -# GFX11-FAKE16: v_cvt_f16_f32_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0x8a,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00] +# GFX11-FAKE16: v_cvt_f16_f32_e64_dpp v255, -|v255| op_sel:[0,1] clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc1,0x8a,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00] 0x05,0x00,0xd1,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 # GFX11-REAL16: v_cvt_f16_i16_e64_dpp v5.l, v1.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xd1,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] @@ -200,7 +200,7 @@ 0xff,0x89,0x8b,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00 # GFX11-REAL16: v_cvt_f32_f16_e64_dpp v255, -|v255.h| op_sel:[1,0] clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x89,0x8b,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00] -# GFX11-FAKE16: v_cvt_f32_f16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0x8b,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00] +# GFX11-FAKE16: v_cvt_f32_f16_e64_dpp v255, -|v255| op_sel:[1,0] clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x89,0x8b,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00] 0x05,0x00,0x85,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 # GFX11: v_cvt_f32_i32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x85,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_from_vop1.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_from_vop1.txt index d7e73909286a2..a1b2ac8477783 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_from_vop1.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_from_vop1.txt @@ -504,7 +504,7 @@ 0xff,0xc1,0x8a,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf # GFX11-REAL16: v_cvt_f16_f32_e64 v255.h, -|0xaf123456| op_sel:[0,1] clamp div:2 ; encoding: [0xff,0xc1,0x8a,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf] -# GFX11-FAKE16: v_cvt_f16_f32_e64 v255, -|0xaf123456| clamp div:2 ; encoding: [0xff,0x81,0x8a,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf] +# GFX11-FAKE16: v_cvt_f16_f32_e64 v255, -|0xaf123456| op_sel:[0,1] clamp div:2 ; encoding: [0xff,0xc1,0x8a,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf] 0x05,0x00,0xd1,0xd5,0x01,0x01,0x00,0x00 # GFX11-REAL16: v_cvt_f16_i16_e64 v5.l, v1.l ; encoding: [0x05,0x00,0xd1,0xd5,0x01,0x01,0x00,0x00] @@ -699,7 +699,7 @@ 0x05,0x08,0x8b,0xd5,0xff,0x01,0x00,0x00 # GFX11-REAL16: v_cvt_f32_f16_e64 v5, v255.h op_sel:[1,0] ; encoding: [0x05,0x08,0x8b,0xd5,0xff,0x01,0x00,0x00] -# GFX11-FAKE16: v_cvt_f32_f16_e64 v5, v255 ; encoding: [0x05,0x00,0x8b,0xd5,0xff,0x01,0x00,0x00] +# GFX11-FAKE16: v_cvt_f32_f16_e64 v5, v255 op_sel:[1,0] ; encoding: [0x05,0x08,0x8b,0xd5,0xff,0x01,0x00,0x00] 0x05,0x00,0x8f,0xd5,0x01,0x01,0x00,0x00 # GFX11: v_cvt_f32_f64_e64 v5, v[1:2] ; encoding: [0x05,0x00,0x8f,0xd5,0x01,0x01,0x00,0x00] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1.txt index 3ccf6feac4cca..8cc1d769b5307 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1.txt @@ -556,7 +556,7 @@ 0xff,0xc1,0x8a,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf # GFX12-REAL16: v_cvt_f16_f32_e64 v255.h, -|0xaf123456| op_sel:[0,1] clamp div:2 ; encoding: [0xff,0xc1,0x8a,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf] -# GFX12-FAKE16: v_cvt_f16_f32_e64 v255, -|0xaf123456| clamp div:2 ; encoding: [0xff,0x81,0x8a,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf] +# GFX12-FAKE16: v_cvt_f16_f32_e64 v255, -|0xaf123456| op_sel:[0,1] clamp div:2 ; encoding: [0xff,0xc1,0x8a,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf] 0x05,0x00,0xd1,0xd5,0x01,0x01,0x00,0x00 # GFX12-REAL16: v_cvt_f16_i16_e64 v5.l, v1.l ; encoding: [0x05,0x00,0xd1,0xd5,0x01,0x01,0x00,0x00] @@ -751,7 +751,7 @@ 0x05,0x08,0x8b,0xd5,0xff,0x01,0x00,0x00 # GFX12-REAL16: v_cvt_f32_f16_e64 v5, v255.h op_sel:[1,0] ; encoding: [0x05,0x08,0x8b,0xd5,0xff,0x01,0x00,0x00] -# GFX12-FAKE16: v_cvt_f32_f16_e64 v5, v255 ; encoding: [0x05,0x00,0x8b,0xd5,0xff,0x01,0x00,0x00] +# GFX12-FAKE16: v_cvt_f32_f16_e64 v5, v255 op_sel:[1,0] ; encoding: [0x05,0x08,0x8b,0xd5,0xff,0x01,0x00,0x00] 0x05,0x00,0x8f,0xd5,0x01,0x01,0x00,0x00 # GFX12: v_cvt_f32_f64_e64 v5, v[1:2] ; encoding: [0x05,0x00,0x8f,0xd5,0x01,0x01,0x00,0x00] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1_dpp16.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1_dpp16.txt index a020b0ae46a37..8982c399c6af7 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1_dpp16.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1_dpp16.txt @@ -478,7 +478,7 @@ 0xff,0xc1,0x8a,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30 # GFX12-REAL16: v_cvt_f16_f32_e64_dpp v255.h, -|v255| op_sel:[0,1] clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc1,0x8a,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] -# GFX12-FAKE16: v_cvt_f16_f32_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0x8a,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] +# GFX12-FAKE16: v_cvt_f16_f32_e64_dpp v255, -|v255| op_sel:[0,1] clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc1,0x8a,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] 0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff # GFX12-REAL16: v_cvt_f16_i16_e64_dpp v5.l, v1.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] @@ -674,7 +674,7 @@ 0xff,0x89,0x8b,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30 # GFX12-REAL16: v_cvt_f32_f16_e64_dpp v255, -|v255.h| op_sel:[1,0] clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x89,0x8b,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] -# GFX12-FAKE16: v_cvt_f32_f16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0x8b,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] +# GFX12-FAKE16: v_cvt_f32_f16_e64_dpp v255, -|v255| op_sel:[1,0] clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x89,0x8b,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] 0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff # GFX12: v_cvt_f32_i32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1_dpp8.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1_dpp8.txt index ad3c673b4e390..81565cae6ea04 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1_dpp8.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1_dpp8.txt @@ -154,7 +154,7 @@ 0xff,0xc1,0x8a,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00 # GFX12-REAL16: v_cvt_f16_f32_e64_dpp v255.h, -|v255| op_sel:[0,1] clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc1,0x8a,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00] -# GFX12-FAKE16: v_cvt_f16_f32_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0x8a,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00] +# GFX12-FAKE16: v_cvt_f16_f32_e64_dpp v255, -|v255| op_sel:[0,1] clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc1,0x8a,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00] 0x05,0x00,0xd1,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 # GFX12-REAL16: v_cvt_f16_i16_e64_dpp v5.l, v1.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xd1,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] @@ -230,7 +230,7 @@ 0xff,0x89,0x8b,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00 # GFX12-REAL16: v_cvt_f32_f16_e64_dpp v255, -|v255.h| op_sel:[1,0] clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x89,0x8b,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00] -# GFX12-FAKE16: v_cvt_f32_f16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0x8b,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00] +# GFX12-FAKE16: v_cvt_f32_f16_e64_dpp v255, -|v255| op_sel:[1,0] clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x89,0x8b,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00] 0x05,0x00,0x85,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 # GFX12: v_cvt_f32_i32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x85,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits