https://github.com/arsenm created https://github.com/llvm/llvm-project/pull/117417
OPSEL ASM Syntax for v_cvt_scalef32_pk_f32_fp4 : opsel:[x,y,z] where, x & y i.e. OPSEL[1 : 0] selects which src_byte to read. OPSEL ASM Syntax for v_cvt_scalef32_pk_fp4_f32 : opsel:[a,b,c,d] where, c & d i.e. OPSEL[3 : 2] selects which dst_byte to write. Co-authored-by: Pravin Jagtap <pravin.jag...@amd.com> >From f2ef9b191d96e9bf81d2ae251b62bc4ce89bd027 Mon Sep 17 00:00:00 2001 From: Pravin Jagtap <pravin.jag...@amd.com> Date: Sun, 7 Apr 2024 04:30:50 -0400 Subject: [PATCH] AMDGPU: MC support for v_cvt_scale_fp4<->f32 of gfx950. OPSEL ASM Syntax for v_cvt_scalef32_pk_f32_fp4 : opsel:[x,y,z] where, x & y i.e. OPSEL[1 : 0] selects which src_byte to read. OPSEL ASM Syntax for v_cvt_scalef32_pk_fp4_f32 : opsel:[a,b,c,d] where, c & d i.e. OPSEL[3 : 2] selects which dst_byte to write. Co-authored-by: Pravin Jagtap <pravin.jag...@amd.com> --- llvm/lib/Target/AMDGPU/AMDGPU.td | 14 ++- llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h | 3 + llvm/lib/Target/AMDGPU/VOP3Instructions.td | 21 ++-- llvm/test/MC/AMDGPU/gfx950_asm_features.s | 96 +++++++++++++++++++ llvm/test/MC/AMDGPU/gfx950_err.s | 24 +++++ .../Disassembler/AMDGPU/gfx950_dasm_vop3.txt | 72 ++++++++++++++ 6 files changed, 222 insertions(+), 8 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td index 2ebbd16aa65de6..64e88cf03b429b 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -396,11 +396,17 @@ def FeatureBF8ConversionScaleInsts : SubtargetFeature<"bf8-cvt-scale-insts", "Has bf8 conversion scale instructions" >; +def FeatureFP4ConversionScaleInsts : SubtargetFeature<"fp4-cvt-scale-insts", + "HasFP4ConversionScaleInsts", + "true", + "Has fp4 conversion scale instructions" +>; + def FeatureGFX950Insts : SubtargetFeature<"gfx950-insts", "GFX950Insts", "true", "Additional instructions for GFX950+", - [FeaturePermlane16Swap, FeaturePermlane32Swap, FeatureFP8ConversionScaleInsts, FeatureBF8ConversionScaleInsts] + [FeaturePermlane16Swap, FeaturePermlane32Swap, FeatureFP8ConversionScaleInsts, FeatureBF8ConversionScaleInsts, FeatureFP4ConversionScaleInsts] >; def FeatureGFX10Insts : SubtargetFeature<"gfx10-insts", @@ -1545,7 +1551,8 @@ def FeatureISAVersion9_5_Common : FeatureSet< FeatureBF16ConversionInsts, FeatureBitOp3Insts, FeatureFP8ConversionScaleInsts, - FeatureBF8ConversionScaleInsts + FeatureBF8ConversionScaleInsts, + FeatureFP4ConversionScaleInsts ])>; def FeatureISAVersion9_4_0 : FeatureSet< @@ -2425,6 +2432,9 @@ def HasFP8ConversionScaleInsts : Predicate<"Subtarget->hasFP8ConversionScaleInst def HasBF8ConversionScaleInsts : Predicate<"Subtarget->hasBF8ConversionScaleInsts()">, AssemblerPredicate<(all_of FeatureBF8ConversionScaleInsts)>; +def HasFP4ConversionScaleInsts : Predicate<"Subtarget->hasFP4ConversionScaleInsts()">, + AssemblerPredicate<(all_of FeatureFP4ConversionScaleInsts)>; + def HasGDS : Predicate<"Subtarget->hasGDS()">; def HasGWS : Predicate<"Subtarget->hasGWS()">; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h index ea00751cad1f24..20f573da0ec82b 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -52,6 +52,7 @@ class AMDGPUSubtarget { bool HasTrue16BitInsts = false; bool HasFP8ConversionScaleInsts = false; bool HasBF8ConversionScaleInsts = false; + bool HasFP4ConversionScaleInsts = false; bool EnableRealTrue16Insts = false; bool HasBF16ConversionInsts = false; bool HasMadMixInsts = false; @@ -181,6 +182,8 @@ class AMDGPUSubtarget { bool hasBF8ConversionScaleInsts() const { return HasBF8ConversionScaleInsts; } + bool hasFP4ConversionScaleInsts() const { return HasFP4ConversionScaleInsts; } + bool hasMadMacF32Insts() const { return HasMadMacF32Insts || !isGCN(); } diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td index 4425a26d06b12c..cc5255c1e6ef4d 100644 --- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td @@ -887,7 +887,7 @@ class VOP3_CVT_SCALE_F1632_FP8BF8_Profile<ValueType DstTy> : VOP3_Profile<VOPPro let HasOMod = 0; } -def VOP3_CVT_SCALE_FP8BF8_F32_Profile : VOP3_Profile<VOPProfile<[i32, f32, f32, f32]>, +def VOP3_CVT_SCALE_FP4FP8BF8_F32_Profile : VOP3_Profile<VOPProfile<[i32, f32, f32, f32]>, VOP3_OPSEL> { let InsVOP3OpSel = (ins FP32InputMods:$src0_modifiers, Src0RC64:$src0, FP32InputMods:$src1_modifiers, Src1RC64:$src1, @@ -899,7 +899,7 @@ def VOP3_CVT_SCALE_FP8BF8_F32_Profile : VOP3_Profile<VOPProfile<[i32, f32, f32, let HasOMod = 0; } -def VOP3_CVT_SCALE_PK_F32_FP8BF8_Profile : VOP3_Profile<VOPProfile<[v2f32, i32, f32, untyped]>, +def VOP3_CVT_SCALE_PK_F32_FP4FP8BF8_Profile : VOP3_Profile<VOPProfile<[v2f32, i32, f32, untyped]>, VOP3_OPSEL> { let InsVOP3OpSel = (ins FP32InputMods:$src0_modifiers, Src0RC64:$src0, FP32InputMods:$src1_modifiers, Src1RC64:$src1, @@ -928,8 +928,8 @@ def VOP3_CVT_SCALE_PK_FP8BF8_F16BF16_Profile : VOP3_Profile<VOPProfile<[i32, v2f let SubtargetPredicate = HasFP8ConversionScaleInsts, mayRaiseFPException = 0 in { defm V_CVT_SCALEF32_F16_FP8 : VOP3Inst<"v_cvt_scalef32_f16_fp8", VOP3_CVT_SCALE_F1632_FP8BF8_Profile<f16>>; defm V_CVT_SCALEF32_F32_FP8 : VOP3Inst<"v_cvt_scalef32_f32_fp8", VOP3_CVT_SCALE_F1632_FP8BF8_Profile<f32>>; - defm V_CVT_SCALEF32_PK_FP8_F32 : VOP3Inst<"v_cvt_scalef32_pk_fp8_f32", VOP3_CVT_SCALE_FP8BF8_F32_Profile>; - defm V_CVT_SCALEF32_PK_F32_FP8 : VOP3Inst<"v_cvt_scalef32_pk_f32_fp8", VOP3_CVT_SCALE_PK_F32_FP8BF8_Profile>; + defm V_CVT_SCALEF32_PK_FP8_F32 : VOP3Inst<"v_cvt_scalef32_pk_fp8_f32", VOP3_CVT_SCALE_FP4FP8BF8_F32_Profile>; + defm V_CVT_SCALEF32_PK_F32_FP8 : VOP3Inst<"v_cvt_scalef32_pk_f32_fp8", VOP3_CVT_SCALE_PK_F32_FP4FP8BF8_Profile>; defm V_CVT_SCALEF32_PK_FP8_F16 : VOP3Inst<"v_cvt_scalef32_pk_fp8_f16", VOP3_CVT_SCALE_PK_FP8BF8_F16BF16_Profile>; defm V_CVT_SCALEF32_PK_FP8_BF16 : VOP3Inst<"v_cvt_scalef32_pk_fp8_bf16", VOP3_CVT_SCALE_PK_FP8BF8_F16BF16_Profile>; } @@ -937,12 +937,17 @@ let SubtargetPredicate = HasFP8ConversionScaleInsts, mayRaiseFPException = 0 in let SubtargetPredicate = HasBF8ConversionScaleInsts, mayRaiseFPException = 0 in { defm V_CVT_SCALEF32_F16_BF8 : VOP3Inst<"v_cvt_scalef32_f16_bf8", VOP3_CVT_SCALE_F1632_FP8BF8_Profile<f16>>; defm V_CVT_SCALEF32_F32_BF8 : VOP3Inst<"v_cvt_scalef32_f32_bf8", VOP3_CVT_SCALE_F1632_FP8BF8_Profile<f32>>; - defm V_CVT_SCALEF32_PK_BF8_F32 : VOP3Inst<"v_cvt_scalef32_pk_bf8_f32", VOP3_CVT_SCALE_FP8BF8_F32_Profile>; - defm V_CVT_SCALEF32_PK_F32_BF8 : VOP3Inst<"v_cvt_scalef32_pk_f32_bf8", VOP3_CVT_SCALE_PK_F32_FP8BF8_Profile>; + defm V_CVT_SCALEF32_PK_BF8_F32 : VOP3Inst<"v_cvt_scalef32_pk_bf8_f32", VOP3_CVT_SCALE_FP4FP8BF8_F32_Profile>; + defm V_CVT_SCALEF32_PK_F32_BF8 : VOP3Inst<"v_cvt_scalef32_pk_f32_bf8", VOP3_CVT_SCALE_PK_F32_FP4FP8BF8_Profile>; defm V_CVT_SCALEF32_PK_BF8_F16 : VOP3Inst<"v_cvt_scalef32_pk_bf8_f16", VOP3_CVT_SCALE_PK_FP8BF8_F16BF16_Profile>; defm V_CVT_SCALEF32_PK_BF8_BF16 : VOP3Inst<"v_cvt_scalef32_pk_bf8_bf16", VOP3_CVT_SCALE_PK_FP8BF8_F16BF16_Profile>; } +let SubtargetPredicate = HasFP4ConversionScaleInsts, mayRaiseFPException = 0 in { + defm V_CVT_SCALEF32_PK_F32_FP4 : VOP3Inst<"v_cvt_scalef32_pk_f32_fp4", VOP3_CVT_SCALE_PK_F32_FP4FP8BF8_Profile>; + defm V_CVT_SCALEF32_PK_FP4_F32 : VOP3Inst<"v_cvt_scalef32_pk_fp4_f32", VOP3_CVT_SCALE_FP4FP8BF8_F32_Profile>; +} + let SubtargetPredicate = isGFX10Plus in { let isCommutable = 1, isReMaterializable = 1 in { defm V_XOR3_B32 : VOP3Inst <"v_xor3_b32", VOP3_Profile<VOP_I32_I32_I32_I32>>; @@ -1881,3 +1886,7 @@ defm V_CVT_SCALEF32_PK_F32_BF8 : VOP3OpSel_Real_gfx9 <0x23a>; defm V_CVT_SCALEF32_PK_BF8_F16 : VOP3OpSel_Real_gfx9 <0x241>; defm V_CVT_SCALEF32_PK_BF8_BF16: VOP3OpSel_Real_gfx9 <0x245>; } +let OtherPredicates = [HasFP4ConversionScaleInsts] in { +defm V_CVT_SCALEF32_PK_F32_FP4 : VOP3OpSel_Real_gfx9 <0x23f>; +defm V_CVT_SCALEF32_PK_FP4_F32 : VOP3OpSel_Real_gfx9 <0x23d>; +} diff --git a/llvm/test/MC/AMDGPU/gfx950_asm_features.s b/llvm/test/MC/AMDGPU/gfx950_asm_features.s index 8cdf5a0bfdad35..1c825c105eb215 100644 --- a/llvm/test/MC/AMDGPU/gfx950_asm_features.s +++ b/llvm/test/MC/AMDGPU/gfx950_asm_features.s @@ -693,3 +693,99 @@ v_cvt_scalef32_pk_bf8_bf16 v1, -v2, |v3| op_sel:[0,0,1] // NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: // GFX950: v_cvt_scalef32_pk_bf8_bf16 v1, s2, 3 op_sel:[0,0,1] ; encoding: [0x01,0x40,0x45,0xd2,0x02,0x06,0x01,0x00] v_cvt_scalef32_pk_bf8_bf16 v1, s2, 3 op_sel:[0,0,1] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_f32_fp4 v[2:3], v2, v3 ; encoding: [0x02,0x00,0x3f,0xd2,0x02,0x07,0x02,0x00] +v_cvt_scalef32_pk_f32_fp4 v[2:3], v2, v3 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_f32_fp4 v[2:3], v2, s3 ; encoding: [0x02,0x00,0x3f,0xd2,0x02,0x07,0x00,0x00] +v_cvt_scalef32_pk_f32_fp4 v[2:3], v2, s3 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_f32_fp4 v[2:3], s2, 3 ; encoding: [0x02,0x00,0x3f,0xd2,0x02,0x06,0x01,0x00] +v_cvt_scalef32_pk_f32_fp4 v[2:3], s2, 3 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_f32_fp4 v[2:3], v2, v3 op_sel:[1,0,0] ; encoding: [0x02,0x08,0x3f,0xd2,0x02,0x07,0x02,0x00] +v_cvt_scalef32_pk_f32_fp4 v[2:3], v2, v3 op_sel:[1,0,0] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_f32_fp4 v[2:3], v2, s3 op_sel:[1,0,0] ; encoding: [0x02,0x08,0x3f,0xd2,0x02,0x07,0x00,0x00] +v_cvt_scalef32_pk_f32_fp4 v[2:3], v2, s3 op_sel:[1,0,0] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_f32_fp4 v[2:3], s2, 3 op_sel:[1,0,0] ; encoding: [0x02,0x08,0x3f,0xd2,0x02,0x06,0x01,0x00] +v_cvt_scalef32_pk_f32_fp4 v[2:3], s2, 3 op_sel:[1,0,0] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_f32_fp4 v[2:3], v2, v3 op_sel:[0,1,0] ; encoding: [0x02,0x10,0x3f,0xd2,0x02,0x07,0x02,0x00] +v_cvt_scalef32_pk_f32_fp4 v[2:3], v2, v3 op_sel:[0,1,0] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_f32_fp4 v[2:3], v2, s3 op_sel:[0,1,0] ; encoding: [0x02,0x10,0x3f,0xd2,0x02,0x07,0x00,0x00] +v_cvt_scalef32_pk_f32_fp4 v[2:3], v2, s3 op_sel:[0,1,0] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_f32_fp4 v[2:3], s2, 3 op_sel:[0,1,0] ; encoding: [0x02,0x10,0x3f,0xd2,0x02,0x06,0x01,0x00] +v_cvt_scalef32_pk_f32_fp4 v[2:3], s2, 3 op_sel:[0,1,0] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_f32_fp4 v[2:3], v2, v3 op_sel:[1,1,0] ; encoding: [0x02,0x18,0x3f,0xd2,0x02,0x07,0x02,0x00] +v_cvt_scalef32_pk_f32_fp4 v[2:3], v2, v3 op_sel:[1,1,0] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_f32_fp4 v[2:3], v2, s3 op_sel:[1,1,0] ; encoding: [0x02,0x18,0x3f,0xd2,0x02,0x07,0x00,0x00] +v_cvt_scalef32_pk_f32_fp4 v[2:3], v2, s3 op_sel:[1,1,0] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_f32_fp4 v[2:3], s2, 3 op_sel:[1,1,0] ; encoding: [0x02,0x18,0x3f,0xd2,0x02,0x06,0x01,0x00] +v_cvt_scalef32_pk_f32_fp4 v[2:3], s2, 3 op_sel:[1,1,0] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_fp4_f32 v1, v1, v2, v3 ; encoding: [0x01,0x00,0x3d,0xd2,0x01,0x05,0x0e,0x04] +v_cvt_scalef32_pk_fp4_f32 v1, v1, v2, v3 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_fp4_f32 v1, v1, -v2, |v3| ; encoding: [0x01,0x04,0x3d,0xd2,0x01,0x05,0x0e,0x44] +v_cvt_scalef32_pk_fp4_f32 v1, v1, -v2, |v3| + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_fp4_f32 v1, v1, s2, 3 ; encoding: [0x01,0x00,0x3d,0xd2,0x01,0x05,0x0c,0x02] +v_cvt_scalef32_pk_fp4_f32 v1, v1, s2, 3 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_fp4_f32 v1, v1, v2, v3 op_sel:[0,0,1,0] ; encoding: [0x01,0x20,0x3d,0xd2,0x01,0x05,0x0e,0x04] +v_cvt_scalef32_pk_fp4_f32 v1, v1, v2, v3 op_sel:[0,0,1,0] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_fp4_f32 v1, v1, -v2, |v3| op_sel:[0,0,1,0] ; encoding: [0x01,0x24,0x3d,0xd2,0x01,0x05,0x0e,0x44] +v_cvt_scalef32_pk_fp4_f32 v1, v1, -v2, |v3| op_sel:[0,0,1,0] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_fp4_f32 v1, v1, s2, 3 op_sel:[0,0,1,0] ; encoding: [0x01,0x20,0x3d,0xd2,0x01,0x05,0x0c,0x02] +v_cvt_scalef32_pk_fp4_f32 v1, v1, s2, 3 op_sel:[0,0,1,0] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_fp4_f32 v1, v1, v2, v3 op_sel:[0,0,0,1] ; encoding: [0x01,0x40,0x3d,0xd2,0x01,0x05,0x0e,0x04] +v_cvt_scalef32_pk_fp4_f32 v1, v1, v2, v3 op_sel:[0,0,0,1] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_fp4_f32 v1, v1, -v2, |v3| op_sel:[0,0,0,1] ; encoding: [0x01,0x44,0x3d,0xd2,0x01,0x05,0x0e,0x44] +v_cvt_scalef32_pk_fp4_f32 v1, v1, -v2, |v3| op_sel:[0,0,0,1] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_fp4_f32 v1, v1, s2, 3 op_sel:[0,0,0,1] ; encoding: [0x01,0x40,0x3d,0xd2,0x01,0x05,0x0c,0x02] +v_cvt_scalef32_pk_fp4_f32 v1, v1, s2, 3 op_sel:[0,0,0,1] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_fp4_f32 v1, v1, v2, v3 op_sel:[0,0,1,1] ; encoding: [0x01,0x60,0x3d,0xd2,0x01,0x05,0x0e,0x04] +v_cvt_scalef32_pk_fp4_f32 v1, v1, v2, v3 op_sel:[0,0,1,1] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_fp4_f32 v1, v1, -v2, |v3| op_sel:[0,0,1,1] ; encoding: [0x01,0x64,0x3d,0xd2,0x01,0x05,0x0e,0x44] +v_cvt_scalef32_pk_fp4_f32 v1, v1, -v2, |v3| op_sel:[0,0,1,1] + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_cvt_scalef32_pk_fp4_f32 v1, v1, s2, 3 op_sel:[0,0,1,1] ; encoding: [0x01,0x60,0x3d,0xd2,0x01,0x05,0x0c,0x02] +v_cvt_scalef32_pk_fp4_f32 v1, v1, s2, 3 op_sel:[0,0,1,1] \ No newline at end of file diff --git a/llvm/test/MC/AMDGPU/gfx950_err.s b/llvm/test/MC/AMDGPU/gfx950_err.s index 7db9c61197113d..56f4eff0a2793a 100644 --- a/llvm/test/MC/AMDGPU/gfx950_err.s +++ b/llvm/test/MC/AMDGPU/gfx950_err.s @@ -77,3 +77,27 @@ v_cvt_scalef32_pk_bf8_bf16 v1, v2, v3 div:2 // GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand v_cvt_scalef32_pk_bf8_bf16 v1, v2, v3 clamp div:2 + +// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cvt_scalef32_pk_f32_fp4 v[2:3], v2, v3 clamp + +// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand +v_cvt_scalef32_pk_f32_fp4 v[2:3], v2, v3 mul:2 + +// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand +v_cvt_scalef32_pk_f32_fp4 v[2:3], v2, v3 div:2 + +// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand +v_cvt_scalef32_pk_f32_fp4 v[2:3], v2, v3 clamp div:2 + +// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cvt_scalef32_pk_fp4_f32 v1, v1, v2, v3 clamp + +// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand +v_cvt_scalef32_pk_fp4_f32 v1, v1, v2, v3 mul:2 + +// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand +v_cvt_scalef32_pk_fp4_f32 v1, v1, v2, v3 div:2 + +// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand +v_cvt_scalef32_pk_fp4_f32 v1, v1, v2, v3 clamp div:2 diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx950_dasm_vop3.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx950_dasm_vop3.txt index 757e533f77f789..1eac7628a22e7f 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx950_dasm_vop3.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx950_dasm_vop3.txt @@ -467,3 +467,75 @@ # GFX950: v_cvt_scalef32_pk_bf8_bf16 v1, s2, 3 op_sel:[0,0,1] ; encoding: [0x01,0x40,0x45,0xd2,0x02,0x06,0x01,0x00] 0x01,0x40,0x45,0xd2,0x02,0x06,0x01,0x00 + +# GFX950: v_cvt_scalef32_pk_f32_fp4 v[2:3], v2, v3 ; encoding: [0x02,0x00,0x3f,0xd2,0x02,0x07,0x02,0x00] +0x02,0x00,0x3f,0xd2,0x02,0x07,0x02,0x00 + +# GFX950: v_cvt_scalef32_pk_f32_fp4 v[2:3], v2, s3 ; encoding: [0x02,0x00,0x3f,0xd2,0x02,0x07,0x00,0x00] +0x02,0x00,0x3f,0xd2,0x02,0x07,0x00,0x00 + +# GFX950: v_cvt_scalef32_pk_f32_fp4 v[2:3], s2, 3 ; encoding: [0x02,0x00,0x3f,0xd2,0x02,0x06,0x01,0x00] +0x02,0x00,0x3f,0xd2,0x02,0x06,0x01,0x00 + +# GFX950: v_cvt_scalef32_pk_f32_fp4 v[2:3], v2, v3 op_sel:[1,0,0] ; encoding: [0x02,0x08,0x3f,0xd2,0x02,0x07,0x02,0x00] +0x02,0x08,0x3f,0xd2,0x02,0x07,0x02,0x00 + +# GFX950: v_cvt_scalef32_pk_f32_fp4 v[2:3], v2, s3 op_sel:[1,0,0] ; encoding: [0x02,0x08,0x3f,0xd2,0x02,0x07,0x00,0x00] +0x02,0x08,0x3f,0xd2,0x02,0x07,0x00,0x00 + +# GFX950: v_cvt_scalef32_pk_f32_fp4 v[2:3], s2, 3 op_sel:[1,0,0] ; encoding: [0x02,0x08,0x3f,0xd2,0x02,0x06,0x01,0x00] +0x02,0x08,0x3f,0xd2,0x02,0x06,0x01,0x00 + +# GFX950: v_cvt_scalef32_pk_f32_fp4 v[2:3], v2, v3 op_sel:[0,1,0] ; encoding: [0x02,0x10,0x3f,0xd2,0x02,0x07,0x02,0x00] +0x02,0x10,0x3f,0xd2,0x02,0x07,0x02,0x00 + +# GFX950: v_cvt_scalef32_pk_f32_fp4 v[2:3], v2, s3 op_sel:[0,1,0] ; encoding: [0x02,0x10,0x3f,0xd2,0x02,0x07,0x00,0x00] +0x02,0x10,0x3f,0xd2,0x02,0x07,0x00,0x00 + +# GFX950: v_cvt_scalef32_pk_f32_fp4 v[2:3], s2, 3 op_sel:[0,1,0] ; encoding: [0x02,0x10,0x3f,0xd2,0x02,0x06,0x01,0x00] +0x02,0x10,0x3f,0xd2,0x02,0x06,0x01,0x00 + +# GFX950: v_cvt_scalef32_pk_f32_fp4 v[2:3], v2, v3 op_sel:[1,1,0] ; encoding: [0x02,0x18,0x3f,0xd2,0x02,0x07,0x02,0x00] +0x02,0x18,0x3f,0xd2,0x02,0x07,0x02,0x00 + +# GFX950: v_cvt_scalef32_pk_f32_fp4 v[2:3], v2, s3 op_sel:[1,1,0] ; encoding: [0x02,0x18,0x3f,0xd2,0x02,0x07,0x00,0x00] +0x02,0x18,0x3f,0xd2,0x02,0x07,0x00,0x00 + +# GFX950: v_cvt_scalef32_pk_f32_fp4 v[2:3], s2, 3 op_sel:[1,1,0] ; encoding: [0x02,0x18,0x3f,0xd2,0x02,0x06,0x01,0x00] +0x02,0x18,0x3f,0xd2,0x02,0x06,0x01,0x00 + +# GFX950: v_cvt_scalef32_pk_fp4_f32 v1, v1, v2, v3 ; encoding: [0x01,0x00,0x3d,0xd2,0x01,0x05,0x0e,0x04] +0x01,0x00,0x3d,0xd2,0x01,0x05,0x0e,0x04 + +# GFX950: v_cvt_scalef32_pk_fp4_f32 v1, v1, -v2, |v3| ; encoding: [0x01,0x04,0x3d,0xd2,0x01,0x05,0x0e,0x44] +0x01,0x04,0x3d,0xd2,0x01,0x05,0x0e,0x44 + +# GFX950: v_cvt_scalef32_pk_fp4_f32 v1, v1, s2, 3 ; encoding: [0x01,0x00,0x3d,0xd2,0x01,0x05,0x0c,0x02] +0x01,0x00,0x3d,0xd2,0x01,0x05,0x0c,0x02 + +# GFX950: v_cvt_scalef32_pk_fp4_f32 v1, v1, v2, v3 op_sel:[0,0,1,0] ; encoding: [0x01,0x20,0x3d,0xd2,0x01,0x05,0x0e,0x04] +0x01,0x20,0x3d,0xd2,0x01,0x05,0x0e,0x04 + +# GFX950: v_cvt_scalef32_pk_fp4_f32 v1, v1, -v2, |v3| op_sel:[0,0,1,0] ; encoding: [0x01,0x24,0x3d,0xd2,0x01,0x05,0x0e,0x44] +0x01,0x24,0x3d,0xd2,0x01,0x05,0x0e,0x44 + +# GFX950: v_cvt_scalef32_pk_fp4_f32 v1, v1, s2, 3 op_sel:[0,0,1,0] ; encoding: [0x01,0x20,0x3d,0xd2,0x01,0x05,0x0c,0x02] +0x01,0x20,0x3d,0xd2,0x01,0x05,0x0c,0x02 + +# GFX950: v_cvt_scalef32_pk_fp4_f32 v1, v1, v2, v3 op_sel:[0,0,0,1] ; encoding: [0x01,0x40,0x3d,0xd2,0x01,0x05,0x0e,0x04] +0x01,0x40,0x3d,0xd2,0x01,0x05,0x0e,0x04 + +# GFX950: v_cvt_scalef32_pk_fp4_f32 v1, v1, -v2, |v3| op_sel:[0,0,0,1] ; encoding: [0x01,0x44,0x3d,0xd2,0x01,0x05,0x0e,0x44] +0x01,0x44,0x3d,0xd2,0x01,0x05,0x0e,0x44 + +# GFX950: v_cvt_scalef32_pk_fp4_f32 v1, v1, s2, 3 op_sel:[0,0,0,1] ; encoding: [0x01,0x40,0x3d,0xd2,0x01,0x05,0x0c,0x02] +0x01,0x40,0x3d,0xd2,0x01,0x05,0x0c,0x02 + +# GFX950: v_cvt_scalef32_pk_fp4_f32 v1, v1, v2, v3 op_sel:[0,0,1,1] ; encoding: [0x01,0x60,0x3d,0xd2,0x01,0x05,0x0e,0x04] +0x01,0x60,0x3d,0xd2,0x01,0x05,0x0e,0x04 + +# GFX950: v_cvt_scalef32_pk_fp4_f32 v1, v1, -v2, |v3| op_sel:[0,0,1,1] ; encoding: [0x01,0x64,0x3d,0xd2,0x01,0x05,0x0e,0x44] +0x01,0x64,0x3d,0xd2,0x01,0x05,0x0e,0x44 + +# GFX950: v_cvt_scalef32_pk_fp4_f32 v1, v1, s2, 3 op_sel:[0,0,1,1] ; encoding: [0x01,0x60,0x3d,0xd2,0x01,0x05,0x0c,0x02] +0x01,0x60,0x3d,0xd2,0x01,0x05,0x0c,0x02 _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits