llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-backend-amdgpu Author: Matt Arsenault (arsenm) <details> <summary>Changes</summary> --- Full diff: https://github.com/llvm/llvm-project/pull/116311.diff 6 Files Affected: - (modified) llvm/lib/Target/AMDGPU/AMDGPU.td (+11-1) - (modified) llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h (+5) - (modified) llvm/lib/Target/AMDGPU/SIInstrInfo.td (+1) - (modified) llvm/lib/Target/AMDGPU/VOP1Instructions.td (+6) - (modified) llvm/test/MC/AMDGPU/gfx950_asm_vop1.s (+74-1) - (modified) llvm/test/MC/Disassembler/AMDGPU/gfx950_dasm_vop1.txt (+109-1) ``````````diff diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td index 9ff51287eb19a4..d722cede255174 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -438,6 +438,12 @@ def FeatureRealTrue16Insts : SubtargetFeature<"real-true16", "Use true 16-bit registers" >; +def FeatureBF16ConversionInsts : SubtargetFeature<"bf16-cvt-insts", + "HasBF16ConversionInsts", + "true", + "Has bf16 conversion instructions" +>; + def FeatureVOP3P : SubtargetFeature<"vop3p", "HasVOP3PInsts", "true", @@ -1502,7 +1508,8 @@ def FeatureISAVersion9_5_Common : FeatureSet< FeatureCvtFP8VOP1Bug, FeatureGFX950Insts, FeatureAddressableLocalMemorySize163840, - FeaturePrngInst + FeaturePrngInst, + FeatureBF16ConversionInsts ])>; def FeatureISAVersion9_4_0 : FeatureSet< @@ -2138,6 +2145,9 @@ def UseFakeTrue16Insts : True16PredicateClass<"Subtarget->hasTrue16BitInsts() && // FIXME When we default to RealTrue16 instead of Fake, change the line as follows. // AssemblerPredicate<(all_of FeatureTrue16BitInsts, (not FeatureRealTrue16Insts))>; +def HasBF16ConversionInsts : Predicate<"Subtarget->hasBF16ConversionInsts()">, + AssemblerPredicate<(all_of FeatureBF16ConversionInsts)>; + def HasVOP3PInsts : Predicate<"Subtarget->hasVOP3PInsts()">, AssemblerPredicate<(all_of FeatureVOP3P)>; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h index 334322f533e54a..ece26a4adb3754 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -51,6 +51,7 @@ class AMDGPUSubtarget { bool Has16BitInsts = false; bool HasTrue16BitInsts = false; bool EnableRealTrue16Insts = false; + bool HasBF16ConversionInsts = false; bool HasMadMixInsts = false; bool HasMadMacF32Insts = false; bool HasDsSrc2Insts = false; @@ -166,6 +167,10 @@ class AMDGPUSubtarget { // supported and the support for fake True16 instructions is removed. bool useRealTrue16Insts() const; + bool hasBF16ConversionInsts() const { + return HasBF16ConversionInsts; + } + bool hasMadMixInsts() const { return HasMadMixInsts; } diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td index 9f8e6a082d9652..71bd5ece32bc48 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -2774,6 +2774,7 @@ def VOP_I32_I32 : VOPProfile <[i32, i32, untyped, untyped]>; def VOP_F16_F32 : VOPProfile <[f16, f32, untyped, untyped]>; def VOP_F32_F16 : VOPProfile <[f32, f16, untyped, untyped]>; def VOP_I64_I64 : VOPProfile <[i64, i64, untyped, untyped]>; +def VOP_F32_BF16 : VOPProfile <[f32, bf16, untyped, untyped]>; def VOP_F32_F32_F16 : VOPProfile <[f32, f32, f16, untyped]>; def VOP_F32_F32_F32 : VOPProfile <[f32, f32, f32, untyped]>; diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td index e99f562688926d..1ff065f0629161 100644 --- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td @@ -311,6 +311,9 @@ let OtherPredicates = [UseRealTrue16Insts] in let OtherPredicates = [UseFakeTrue16Insts] in defm V_CVT_F32_F16_fake16 : VOP1Inst <"v_cvt_f32_f16_fake16", VOPProfile_Fake16<VOP_F32_F16>, any_fpextend>; +let SubtargetPredicate = HasBF16ConversionInsts in +defm V_CVT_F32_BF16 : VOP1Inst_t16 <"v_cvt_f32_bf16", VOP_F32_BF16>; + let ReadsModeReg = 0, mayRaiseFPException = 0 in { defm V_CVT_RPI_I32_F32 : VOP1Inst <"v_cvt_rpi_i32_f32", VOP_I32_F32, cvt_rpi_i32_f32>; defm V_CVT_FLR_I32_F32 : VOP1Inst <"v_cvt_flr_i32_f32", VOP_I32_F32, cvt_flr_i32_f32>; @@ -1514,6 +1517,9 @@ defm V_SCREEN_PARTITION_4SE_B32 : VOP1_Real_gfx9 <0x37>; let AssemblerPredicate = isGFX940Plus in defm V_MOV_B64 : VOP1_Real_gfx9 <0x38>; +let AssemblerPredicate = HasGFX950Insts in +defm V_CVT_F32_BF16 : VOP1_Real_gfx9 <0x5b>; + defm V_CVT_F32_FP8 : VOP1_Real_NoDstSel_SDWA_gfx9<0x54>; defm V_CVT_F32_BF8 : VOP1_Real_NoDstSel_SDWA_gfx9<0x55>; defm V_CVT_PK_F32_FP8 : VOP1_Real_NoDstSel_SDWA_gfx9<0x56>; diff --git a/llvm/test/MC/AMDGPU/gfx950_asm_vop1.s b/llvm/test/MC/AMDGPU/gfx950_asm_vop1.s index 0cb292ffe63dde..66dae85ee8e3e5 100644 --- a/llvm/test/MC/AMDGPU/gfx950_asm_vop1.s +++ b/llvm/test/MC/AMDGPU/gfx950_asm_vop1.s @@ -1,4 +1,5 @@ -// RUN: llvm-mc -arch=amdgcn -mcpu=gfx950 -show-encoding %s | FileCheck --check-prefix=GFX950 %s +// RUN: llvm-mc -triple=amdgcn -mcpu=gfx950 -show-encoding %s | FileCheck --check-prefix=GFX950 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx940 %s 2>&1 | FileCheck -check-prefix=GFX940-ERR --strict-whitespace %s v_prng_b32 v5, v1 // GFX950: v_prng_b32_e32 v5, v1 ; encoding: [0x01,0xb1,0x0a,0x7e] @@ -55,3 +56,75 @@ v_prng_b32 v5, src_scc v_prng_b32 v255, 0xaf123456 // GFX950: v_prng_b32_e32 v255, 0xaf123456 ; encoding: [0xff,0xb0,0xfe,0x7f,0x56,0x34,0x12,0xaf] // GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU + +v_cvt_f32_bf16 v5, v1 +// GFX950: v_cvt_f32_bf16_e32 v5, v1 ; encoding: [0x01,0xb7,0x0a,0x7e] +// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU + +v_cvt_f32_bf16 v5, v127 +// GFX950: v_cvt_f32_bf16_e32 v5, v127 ; encoding: [0x7f,0xb7,0x0a,0x7e] +// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU + +v_cvt_f32_bf16 v5, s1 +// GFX950: v_cvt_f32_bf16_e32 v5, s1 ; encoding: [0x01,0xb6,0x0a,0x7e] +// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU + +v_cvt_f32_bf16 v5, vcc_lo +// GFX950: v_cvt_f32_bf16_e32 v5, vcc_lo ; encoding: [0x6a,0xb6,0x0a,0x7e] +// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU + +v_cvt_f32_bf16 v5, vcc_hi +// GFX950: v_cvt_f32_bf16_e32 v5, vcc_hi ; encoding: [0x6b,0xb6,0x0a,0x7e] +// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU + +v_cvt_f32_bf16 v5, ttmp15 +// GFX950: v_cvt_f32_bf16_e32 v5, ttmp15 ; encoding: [0x7b,0xb6,0x0a,0x7e] +// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU + +v_cvt_f32_bf16 v5, m0 +// GFX950: v_cvt_f32_bf16_e32 v5, m0 ; encoding: [0x7c,0xb6,0x0a,0x7e] +// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU + +v_cvt_f32_bf16 v5, exec_lo +// GFX950: v_cvt_f32_bf16_e32 v5, exec_lo ; encoding: [0x7e,0xb6,0x0a,0x7e] +// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU + +v_cvt_f32_bf16 v5, exec_hi +// GFX950: v_cvt_f32_bf16_e32 v5, exec_hi ; encoding: [0x7f,0xb6,0x0a,0x7e] +// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU + +v_cvt_f32_bf16 v5, -1 +// GFX950: v_cvt_f32_bf16_e32 v5, -1 ; encoding: [0xc1,0xb6,0x0a,0x7e] +// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU + +v_cvt_f32_bf16 v5, 0.5 +// GFX950: v_cvt_f32_bf16_e32 v5, 0.5 ; encoding: [0xf0,0xb6,0x0a,0x7e] +// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU + +v_cvt_f32_bf16 v5, src_scc +// GFX950: v_cvt_f32_bf16_e32 v5, src_scc ; encoding: [0xfd,0xb6,0x0a,0x7e] +// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU + +v_cvt_f32_bf16 v127, 0x8000 +// GFX950: v_cvt_f32_bf16_e32 v127, 0x8000 ; encoding: [0xff,0xb6,0xfe,0x7e,0x00,0x80,0x00,0x00] +// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU + +v_cvt_f32_bf16 v5, -v1 +// GFX950: v_cvt_f32_bf16_e64 v5, -v1 ; encoding: [0x05,0x00,0x9b,0xd1,0x01,0x01,0x00,0x20] +// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU + +v_cvt_f32_bf16 v5, |v1| +// GFX950: v_cvt_f32_bf16_e64 v5, |v1| ; encoding: [0x05,0x01,0x9b,0xd1,0x01,0x01,0x00,0x00] +// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU + +v_cvt_f32_bf16 v5, -|v1| +// GFX950: v_cvt_f32_bf16_e64 v5, -|v1| ; encoding: [0x05,0x01,0x9b,0xd1,0x01,0x01,0x00,0x20] +// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU + +v_cvt_f32_bf16 v5, v1 clamp mul:2 +// GFX950: v_cvt_f32_bf16_e64 v5, v1 clamp mul:2 ; encoding: [0x05,0x80,0x9b,0xd1,0x01,0x01,0x00,0x08] +// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU + +v_cvt_f32_bf16_e64 v5, v1 clamp div:2 +// GFX950: v_cvt_f32_bf16_e64 v5, v1 clamp div:2 ; encoding: [0x05,0x80,0x9b,0xd1,0x01,0x01,0x00,0x18] +// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx950_dasm_vop1.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx950_dasm_vop1.txt index 91ab05e99f1e7c..336a26907891a2 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx950_dasm_vop1.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx950_dasm_vop1.txt @@ -40,4 +40,112 @@ 0xfd,0xb0,0x0a,0x7e # GFX950: v_prng_b32_e32 v255, 0xaf123456 ; encoding: [0xff,0xb0,0xfe,0x7f,0x56,0x34,0x12,0xaf] -0xff,0xb0,0xfe,0x7f,0x56,0x34,0x12,0xaf \ No newline at end of file +0xff,0xb0,0xfe,0x7f,0x56,0x34,0x12,0xaf + +# GFX950: v_cvt_f32_bf16_e32 v5, v1 ; encoding: [0x01,0xb7,0x0a,0x7e] +0x01,0xb7,0x0a,0x7e + +# GFX950: v_cvt_f32_bf16_e32 v5, v127 ; encoding: [0x7f,0xb7,0x0a,0x7e] +0x7f,0xb7,0x0a,0x7e + +# GFX950: v_cvt_f32_bf16_e32 v5, s1 ; encoding: [0x01,0xb6,0x0a,0x7e] +0x01,0xb6,0x0a,0x7e + +# GFX950: v_cvt_f32_bf16_e32 v5, vcc_lo ; encoding: [0x6a,0xb6,0x0a,0x7e] +0x6a,0xb6,0x0a,0x7e + +# GFX950: v_cvt_f32_bf16_e32 v5, vcc_hi ; encoding: [0x6b,0xb6,0x0a,0x7e] +0x6b,0xb6,0x0a,0x7e + +# GFX950: v_cvt_f32_bf16_e32 v5, ttmp15 ; encoding: [0x7b,0xb6,0x0a,0x7e] +0x7b,0xb6,0x0a,0x7e + +# GFX950: v_cvt_f32_bf16_e32 v5, m0 ; encoding: [0x7c,0xb6,0x0a,0x7e] +0x7c,0xb6,0x0a,0x7e + +# GFX950: v_cvt_f32_bf16_e32 v5, exec_lo ; encoding: [0x7e,0xb6,0x0a,0x7e] +0x7e,0xb6,0x0a,0x7e + +# GFX950: v_cvt_f32_bf16_e32 v5, exec_hi ; encoding: [0x7f,0xb6,0x0a,0x7e] +0x7f,0xb6,0x0a,0x7e + +# GFX950: v_cvt_f32_bf16_e32 v5, -1 ; encoding: [0xc1,0xb6,0x0a,0x7e] +0xc1,0xb6,0x0a,0x7e + +# GFX950: v_cvt_f32_bf16_e32 v5, 0.5 ; encoding: [0xf0,0xb6,0x0a,0x7e] +0xf0,0xb6,0x0a,0x7e + +# GFX950: v_cvt_f32_bf16_e32 v5, src_scc ; encoding: [0xfd,0xb6,0x0a,0x7e] +0xfd,0xb6,0x0a,0x7e + +# GFX950: v_cvt_f32_bf16_e32 v127, 0x8000 ; encoding: [0xff,0xb6,0xfe,0x7e,0x00,0x80,0x00,0x00] +0xff,0xb6,0xfe,0x7e,0x00,0x80,0x00,0x00 + +# GFX950: v_cvt_f32_bf16_e64 v5, -v1 ; encoding: [0x05,0x00,0x9b,0xd1,0x01,0x01,0x00,0x20] +0x05,0x00,0x9b,0xd1,0x01,0x01,0x00,0x20 + +# GFX950: v_cvt_f32_bf16_e64 v5, |v1| ; encoding: [0x05,0x01,0x9b,0xd1,0x01,0x01,0x00,0x00] +0x05,0x01,0x9b,0xd1,0x01,0x01,0x00,0x00 + +# GFX950: v_cvt_f32_bf16_e64 v5, -|v1| ; encoding: [0x05,0x01,0x9b,0xd1,0x01,0x01,0x00,0x20] +0x05,0x01,0x9b,0xd1,0x01,0x01,0x00,0x20 + +# GFX950: v_cvt_f32_bf16_e64 v5, 0.5 clamp mul:2 ; encoding: [0x05,0x80,0x9b,0xd1,0xf0,0x00,0x00,0x08] +0x05,0x80,0x9b,0xd1,0xf0,0x00,0x00,0x08 + +# GFX950: v_cvt_f32_bf16_e64 v5, v1 clamp div:2 ; encoding: [0x05,0x80,0x9b,0xd1,0x01,0x01,0x00,0x18] +0x05,0x80,0x9b,0xd1,0x01,0x01,0x00,0x18 + +# GFX950: v_cvt_f32_bf16_e64 v5, v1 clamp div:2 ; encoding: [0x05,0x80,0x9b,0xd1,0x01,0x01,0x00,0x18] +0x05,0x80,0x9b,0xd1,0x01,0x01,0x00,0x18 + +# GFX950: v_cvt_f32_bf16_e32 v5, v1 ; encoding: [0x01,0xb7,0x0a,0x7e] +0x01,0xb7,0x0a,0x7e + +# GFX950: v_cvt_f32_bf16_e32 v5, v127 ; encoding: [0x7f,0xb7,0x0a,0x7e] +0x7f,0xb7,0x0a,0x7e + +# GFX950: v_cvt_f32_bf16_e32 v5, s1 ; encoding: [0x01,0xb6,0x0a,0x7e] +0x01,0xb6,0x0a,0x7e + +# GFX950: v_cvt_f32_bf16_e32 v5, vcc_lo ; encoding: [0x6a,0xb6,0x0a,0x7e] +0x6a,0xb6,0x0a,0x7e + +# GFX950: v_cvt_f32_bf16_e32 v5, vcc_hi ; encoding: [0x6b,0xb6,0x0a,0x7e] +0x6b,0xb6,0x0a,0x7e + +# GFX950: v_cvt_f32_bf16_e32 v5, ttmp15 ; encoding: [0x7b,0xb6,0x0a,0x7e] +0x7b,0xb6,0x0a,0x7e + +# GFX950: v_cvt_f32_bf16_e32 v5, m0 ; encoding: [0x7c,0xb6,0x0a,0x7e] +0x7c,0xb6,0x0a,0x7e + +# GFX950: v_cvt_f32_bf16_e32 v5, exec_lo ; encoding: [0x7e,0xb6,0x0a,0x7e] +0x7e,0xb6,0x0a,0x7e + +# GFX950: v_cvt_f32_bf16_e32 v5, exec_hi ; encoding: [0x7f,0xb6,0x0a,0x7e] +0x7f,0xb6,0x0a,0x7e + +# GFX950: v_cvt_f32_bf16_e32 v5, -1 ; encoding: [0xc1,0xb6,0x0a,0x7e] +0xc1,0xb6,0x0a,0x7e + +# GFX950: v_cvt_f32_bf16_e32 v5, 0.5 ; encoding: [0xf0,0xb6,0x0a,0x7e] +0xf0,0xb6,0x0a,0x7e + +# GFX950: v_cvt_f32_bf16_e32 v5, src_scc ; encoding: [0xfd,0xb6,0x0a,0x7e] +0xfd,0xb6,0x0a,0x7e + +# GFX950: v_cvt_f32_bf16_e32 v127, 0x8000 ; encoding: [0xff,0xb6,0xfe,0x7e,0x00,0x80,0x00,0x00] +0xff,0xb6,0xfe,0x7e,0x00,0x80,0x00,0x00 + +# GFX950: v_cvt_f32_bf16_e64 v5, -v1 ; encoding: [0x05,0x00,0x9b,0xd1,0x01,0x01,0x00,0x20] +0x05,0x00,0x9b,0xd1,0x01,0x01,0x00,0x20 + +# GFX950: v_cvt_f32_bf16_e64 v5, |v1| ; encoding: [0x05,0x01,0x9b,0xd1,0x01,0x01,0x00,0x00] +0x05,0x01,0x9b,0xd1,0x01,0x01,0x00,0x00 + +# GFX950: v_cvt_f32_bf16_e64 v5, -|v1| ; encoding: [0x05,0x01,0x9b,0xd1,0x01,0x01,0x00,0x20] +0x05,0x01,0x9b,0xd1,0x01,0x01,0x00,0x20 + +# GFX950: v_cvt_f32_bf16_e64 v5, v1 clamp mul:2 ; encoding: [0x05,0x80,0x9b,0xd1,0x01,0x01,0x00,0x08] +0x05,0x80,0x9b,0xd1,0x01,0x01,0x00,0x08 `````````` </details> https://github.com/llvm/llvm-project/pull/116311 _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits