https://github.com/arsenm created https://github.com/llvm/llvm-project/pull/117594
These instructions have non-standard use of OPSEL bits to select dest write byte. The src2_modifiers operand is used without having its corresponding src2 operand by introducing dummy src2. Co-authored-by: Pravin Jagtap <pravin.jag...@amd.com> >From a87b139e074e856cd0c61ef61e8f092feff6bff6 Mon Sep 17 00:00:00 2001 From: Pravin Jagtap <pravin.jag...@amd.com> Date: Wed, 10 Apr 2024 05:47:54 -0400 Subject: [PATCH] AMDGPU: MC support for v_cvt_scalef32_pk_fp4_{f|bf}16 on gfx950. These instructions have non-standard use of OPSEL bits to select dest write byte. The src2_modifiers operand is used without having its corresponding src2 operand by introducing dummy src2. Co-authored-by: Pravin Jagtap <pravin.jag...@amd.com> --- .../AMDGPU/AsmParser/AMDGPUAsmParser.cpp | 4 +- llvm/lib/Target/AMDGPU/VOP3Instructions.td | 26 ++++++++++++ llvm/test/MC/AMDGPU/gfx950_asm_features.s | 40 +++++++++++++++++++ llvm/test/MC/AMDGPU/gfx950_err.s | 24 +++++++++++ .../Disassembler/AMDGPU/gfx950_dasm_vop3.txt | 30 ++++++++++++++ 5 files changed, 123 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp index a1d45822837c5f..afd35842ba87f4 100644 --- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -8824,7 +8824,9 @@ void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands, const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0; - if (Opc == AMDGPU::V_CVT_SR_BF8_F32_vi || + if (Opc == AMDGPU::V_CVT_SCALEF32_PK_FP4_F16_vi || + Opc == AMDGPU::V_CVT_SCALEF32_PK_FP4_BF16_vi || + Opc == AMDGPU::V_CVT_SR_BF8_F32_vi || Opc == AMDGPU::V_CVT_SR_FP8_F32_vi || Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_gfx12 || Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_gfx12) { diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td index fdffb2c36dcccf..7776688156419a 100644 --- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td @@ -899,6 +899,23 @@ def VOP3_CVT_SCALE_FP4FP8BF8_F32_Profile : VOP3_Profile<VOPProfile<[i32, f32, f3 let HasOMod = 0; } +def VOP3_CVT_SCALE_FP4_F16BF16_Profile : VOP3_Profile<VOPProfile<[i32, v2f16, f32, f32]>, + VOP3_OPSEL> { + let InsVOP3OpSel = (ins FP32InputMods:$src0_modifiers, Src0RC64:$src0, + FP32InputMods:$src1_modifiers, Src1RC64:$src1, + FP32InputMods:$src2_modifiers, VGPR_32:$src2, + op_sel0:$op_sel); + let HasClamp = 0; + let HasSrc2 = 0; + let HasSrc2Mods = 1; + let HasOpSel = 1; + let AsmVOP3OpSel = !subst(", $src2_modifiers", "", + getAsmVOP3OpSel<3, HasClamp, HasOMod, + HasSrc0FloatMods, HasSrc1FloatMods, + HasSrc2FloatMods>.ret); + let HasExtVOP3DPP = 0; +} + class VOP3_CVT_SCALE_PK_F16BF16F32_FP4FP8BF8_Profile<ValueType DstTy> : VOP3_Profile<VOPProfile<[DstTy, i32, f32, untyped]>, VOP3_OPSEL> { let InsVOP3OpSel = (ins FP32InputMods:$src0_modifiers, Src0RC64:$src0, @@ -965,6 +982,13 @@ let SubtargetPredicate = HasFP4ConversionScaleInsts, mayRaiseFPException = 0 in defm V_CVT_SCALEF32_PK_FP4_F32 : VOP3Inst<"v_cvt_scalef32_pk_fp4_f32", VOP3_CVT_SCALE_FP4FP8BF8_F32_Profile>; defm V_CVT_SCALEF32_PK_F16_FP4 : VOP3Inst<"v_cvt_scalef32_pk_f16_fp4", VOP3_CVT_SCALE_PK_F16BF16F32_FP4FP8BF8_Profile<v2f16>>; defm V_CVT_SCALEF32_PK_BF16_FP4 : VOP3Inst<"v_cvt_scalef32_pk_bf16_fp4", VOP3_CVT_SCALE_PK_F16BF16F32_FP4FP8BF8_Profile<v2bf16>>; + + // These instructions have non-standard use of op_sel. In particular they are + // using op_sel bits 2 and 3 while only having two sources. + let Constraints = "$vdst = $src2", DisableEncoding = "$src2" in { + defm V_CVT_SCALEF32_PK_FP4_F16 : VOP3Inst<"v_cvt_scalef32_pk_fp4_f16", VOP3_CVT_SCALE_FP4_F16BF16_Profile>; + defm V_CVT_SCALEF32_PK_FP4_BF16 : VOP3Inst<"v_cvt_scalef32_pk_fp4_bf16", VOP3_CVT_SCALE_FP4_F16BF16_Profile>; + } } let SubtargetPredicate = HasFP6BF6ConversionScaleInsts, mayRaiseFPException = 0 in { @@ -1930,6 +1954,8 @@ defm V_CVT_SCALEF32_PK_F32_FP4 : VOP3OpSel_Real_gfx9 <0x23f>; defm V_CVT_SCALEF32_PK_FP4_F32 : VOP3OpSel_Real_gfx9 <0x23d>; defm V_CVT_SCALEF32_PK_F16_FP4 : VOP3OpSel_Real_gfx9 <0x250>; defm V_CVT_SCALEF32_PK_BF16_FP4 : VOP3OpSel_Real_gfx9 <0x251>; +defm V_CVT_SCALEF32_PK_FP4_F16 : VOP3OpSel_Real_gfx9_forced_opsel2 <0x24c>; +defm V_CVT_SCALEF32_PK_FP4_BF16: VOP3OpSel_Real_gfx9_forced_opsel2 <0x24d>; } let OtherPredicates = [HasFP6BF6ConversionScaleInsts] in { defm V_CVT_SCALEF32_PK32_F32_FP6 : VOP3_Real_gfx9<0x256, "v_cvt_scalef32_pk32_f32_fp6">; diff --git a/llvm/test/MC/AMDGPU/gfx950_asm_features.s b/llvm/test/MC/AMDGPU/gfx950_asm_features.s index e505b6ff4ad58b..12340dfaa78e91 100644 --- a/llvm/test/MC/AMDGPU/gfx950_asm_features.s +++ b/llvm/test/MC/AMDGPU/gfx950_asm_features.s @@ -1025,3 +1025,43 @@ v_cvt_scalef32_pk_bf16_bf8 v1, v2, s3 op_sel:[1,0,0] // NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: // GFX950: v_cvt_scalef32_pk_bf16_bf8 v1, s2, 3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x6a,0xd2,0x02,0x06,0x01,0x00] v_cvt_scalef32_pk_bf16_bf8 v1, s2, 3 op_sel:[1,0,0] + +// NOT-GFX950: error: instruction not supported on this GPU +// GFX950: v_cvt_scalef32_pk_fp4_f16 v1, v2, v3 ; encoding: [0x01,0x00,0x4c,0xd2,0x02,0x07,0x02,0x00] +v_cvt_scalef32_pk_fp4_f16 v1, v2, v3 + +// NOT-GFX950: error: instruction not supported on this GPU +// GFX950: v_cvt_scalef32_pk_fp4_f16 v1, s2, 3 ; encoding: [0x01,0x00,0x4c,0xd2,0x02,0x06,0x01,0x00] +v_cvt_scalef32_pk_fp4_f16 v1, s2, 3 + +// NOT-GFX950: error: instruction not supported on this GPU +// GFX950: v_cvt_scalef32_pk_fp4_f16 v1, v2, v3 op_sel:[0,0,1,1] ; encoding: [0x01,0x60,0x4c,0xd2,0x02,0x07,0x02,0x00] +v_cvt_scalef32_pk_fp4_f16 v1, v2, v3 op_sel:[0,0,1,1] + +// NOT-GFX950: error: instruction not supported on this GPU +// GFX950: v_cvt_scalef32_pk_fp4_f16 v1, v2, v3 op_sel:[0,0,0,1] ; encoding: [0x01,0x40,0x4c,0xd2,0x02,0x07,0x02,0x00] +v_cvt_scalef32_pk_fp4_f16 v1, v2, v3 op_sel:[0,0,0,1] + +// NOT-GFX950: error: instruction not supported on this GPU +// GFX950: v_cvt_scalef32_pk_fp4_f16 v1, -|s2|, v3 ; encoding: [0x01,0x01,0x4c,0xd2,0x02,0x06,0x02,0x20] +v_cvt_scalef32_pk_fp4_f16 v1, -|s2|, v3 + +// NOT-GFX950: error: instruction not supported on this GPU +// GFX950: v_cvt_scalef32_pk_fp4_bf16 v1, v2, v3 ; encoding: [0x01,0x00,0x4d,0xd2,0x02,0x07,0x02,0x00] +v_cvt_scalef32_pk_fp4_bf16 v1, v2, v3 + +// NOT-GFX950: error: instruction not supported on this GPU +// GFX950: v_cvt_scalef32_pk_fp4_bf16 v1, s2, 3 ; encoding: [0x01,0x00,0x4d,0xd2,0x02,0x06,0x01,0x00] +v_cvt_scalef32_pk_fp4_bf16 v1, s2, 3 + +// NOT-GFX950: error: instruction not supported on this GPU +// GFX950: v_cvt_scalef32_pk_fp4_bf16 v1, v2, v3 op_sel:[0,0,1,1] ; encoding: [0x01,0x60,0x4d,0xd2,0x02,0x07,0x02,0x00] +v_cvt_scalef32_pk_fp4_bf16 v1, v2, v3 op_sel:[0,0,1,1] + +// NOT-GFX950: error: instruction not supported on this GPU +// GFX950: v_cvt_scalef32_pk_fp4_bf16 v1, v2, v3 op_sel:[0,0,0,1] ; encoding: [0x01,0x40,0x4d,0xd2,0x02,0x07,0x02,0x00] +v_cvt_scalef32_pk_fp4_bf16 v1, v2, v3 op_sel:[0,0,0,1] + +// NOT-GFX950: error: instruction not supported on this GPU +// GFX950: v_cvt_scalef32_pk_fp4_bf16 v1, -|s2|, v3 ; encoding: [0x01,0x01,0x4d,0xd2,0x02,0x06,0x02,0x20] +v_cvt_scalef32_pk_fp4_bf16 v1, -|s2|, v3 diff --git a/llvm/test/MC/AMDGPU/gfx950_err.s b/llvm/test/MC/AMDGPU/gfx950_err.s index 3163370f649100..e463c002e40801 100644 --- a/llvm/test/MC/AMDGPU/gfx950_err.s +++ b/llvm/test/MC/AMDGPU/gfx950_err.s @@ -293,3 +293,27 @@ v_cvt_scalef32_pk_bf16_bf8 v[20:25], v[10:25], v8 div:2 // GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand v_cvt_scalef32_pk_bf16_bf8 v[20:25], v[10:25], v8 clamp div:2 + +// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cvt_scalef32_pk_fp4_f16 v1, v2, v3 clamp + +// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand +v_cvt_scalef32_pk_fp4_f16 v1, v2, v3 mul:2 + +// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand +v_cvt_scalef32_pk_fp4_f16 v1, v2, v3 div:2 + +// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand +v_cvt_scalef32_pk_fp4_f16 v1, v2, v3 clamp div:2 + +// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cvt_scalef32_pk_fp4_bf16 v1, v2, v3 clamp + +// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand +v_cvt_scalef32_pk_fp4_bf16 v1, v2, v3 mul:2 + +// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand +v_cvt_scalef32_pk_fp4_bf16 v1, v2, v3 div:2 + +// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand +v_cvt_scalef32_pk_fp4_bf16 v1, v2, v3 clamp div:2 diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx950_dasm_vop3.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx950_dasm_vop3.txt index 99b5f6f84ea94d..3f74743c38f67b 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx950_dasm_vop3.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx950_dasm_vop3.txt @@ -713,3 +713,33 @@ # GFX950: v_cvt_scalef32_pk_bf16_bf8 v1, s2, 3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x6a,0xd2,0x02,0x06,0x01,0x00] 0x01,0x08,0x6a,0xd2,0x02,0x06,0x01,0x00 + +# GFX950: v_cvt_scalef32_pk_fp4_f16 v1, v2, v3 ; encoding: [0x01,0x00,0x4c,0xd2,0x02,0x07,0x02,0x00] +0x01,0x00,0x4c,0xd2,0x02,0x07,0x02,0x00 + +# GFX950: v_cvt_scalef32_pk_fp4_f16 v1, s2, 3 ; encoding: [0x01,0x00,0x4c,0xd2,0x02,0x06,0x01,0x00] +0x01,0x00,0x4c,0xd2,0x02,0x06,0x01,0x00 + +# GFX950: v_cvt_scalef32_pk_fp4_f16 v1, v2, v3 op_sel:[0,0,1,1] ; encoding: [0x01,0x60,0x4c,0xd2,0x02,0x07,0x02,0x00] +0x01,0x60,0x4c,0xd2,0x02,0x07,0x02,0x00 + +# GFX950: v_cvt_scalef32_pk_fp4_f16 v1, v2, v3 op_sel:[0,0,0,1] ; encoding: [0x01,0x40,0x4c,0xd2,0x02,0x07,0x02,0x00] +0x01,0x40,0x4c,0xd2,0x02,0x07,0x02,0x00 + +# GFX950: v_cvt_scalef32_pk_fp4_f16 v1, -|s2|, v3 ; encoding: [0x01,0x01,0x4c,0xd2,0x02,0x06,0x02,0x20] +0x01,0x01,0x4c,0xd2,0x02,0x06,0x02,0x20 + +# GFX950: v_cvt_scalef32_pk_fp4_bf16 v1, v2, v3 ; encoding: [0x01,0x00,0x4d,0xd2,0x02,0x07,0x02,0x00] +0x01,0x00,0x4d,0xd2,0x02,0x07,0x02,0x00 + +# GFX950: v_cvt_scalef32_pk_fp4_bf16 v1, s2, 3 ; encoding: [0x01,0x00,0x4d,0xd2,0x02,0x06,0x01,0x00] +0x01,0x00,0x4d,0xd2,0x02,0x06,0x01,0x00 + +# GFX950: v_cvt_scalef32_pk_fp4_bf16 v1, v2, v3 op_sel:[0,0,1,1] ; encoding: [0x01,0x60,0x4d,0xd2,0x02,0x07,0x02,0x00] +0x01,0x60,0x4d,0xd2,0x02,0x07,0x02,0x00 + +# GFX950: v_cvt_scalef32_pk_fp4_bf16 v1, v2, v3 op_sel:[0,0,0,1] ; encoding: [0x01,0x40,0x4d,0xd2,0x02,0x07,0x02,0x00] +0x01,0x40,0x4d,0xd2,0x02,0x07,0x02,0x00 + +# GFX950: v_cvt_scalef32_pk_fp4_bf16 v1, -|s2|, v3 ; encoding: [0x01,0x01,0x4d,0xd2,0x02,0x06,0x02,0x20] +0x01,0x01,0x4d,0xd2,0x02,0x06,0x02,0x20 _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits