https://github.com/arsenm created https://github.com/llvm/llvm-project/pull/117600
None >From f1278e9505bb92cbe1d108a8e745352e23dba3ef Mon Sep 17 00:00:00 2001 From: Matt Arsenault <matthew.arsena...@amd.com> Date: Wed, 22 May 2024 19:36:47 +0200 Subject: [PATCH] AMDGPU: Add encodings for minimum3/maximum3 f32 for gfx950 --- llvm/lib/Target/AMDGPU/AMDGPU.td | 4 ++- llvm/lib/Target/AMDGPU/VOP3Instructions.td | 3 ++ llvm/test/MC/AMDGPU/gfx950_asm_features.s | 33 +++++++++++++++++++ llvm/test/MC/AMDGPU/gfx950_err.s | 33 +++++++++++++++++++ .../Disassembler/AMDGPU/gfx950_dasm_vop3.txt | 24 ++++++++++++++ 5 files changed, 96 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td index 6b3c565da4a237..bda0b85f16a9c4 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -431,7 +431,9 @@ def FeatureGFX950Insts : SubtargetFeature<"gfx950-insts", FeatureBF8ConversionScaleInsts, FeatureFP4ConversionScaleInsts, FeatureFP6BF6ConversionScaleInsts, - FeatureF16BF16ToFP6BF6ConversionScaleInsts] + FeatureF16BF16ToFP6BF6ConversionScaleInsts, + FeatureMinimum3Maximum3F32 + ] >; def FeatureGFX10Insts : SubtargetFeature<"gfx10-insts", diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td index 11700a4c34f9f3..5d4d56e8b0ad22 100644 --- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td @@ -1937,6 +1937,9 @@ defm V_CVT_PK_BF16_F32: VOP3OpSel_Real_gfx9 <0x268>; defm V_CVT_SR_FP8_F32 : VOP3OpSel_Real_gfx9_forced_opsel2 <0x2a4>; defm V_CVT_SR_BF8_F32 : VOP3OpSel_Real_gfx9_forced_opsel2 <0x2a5>; +defm V_MINIMUM3_F32 : VOP3_Real_vi <0x2a8>; +defm V_MAXIMUM3_F32 : VOP3_Real_vi <0x2a9>; + defm V_BITOP3_B16 : VOP3_Real_BITOP3_gfx9<0x233, "v_bitop3_b16">; defm V_BITOP3_B32 : VOP3_Real_BITOP3_gfx9<0x234, "v_bitop3_b32">; let OtherPredicates = [HasFP8ConversionScaleInsts] in { diff --git a/llvm/test/MC/AMDGPU/gfx950_asm_features.s b/llvm/test/MC/AMDGPU/gfx950_asm_features.s index 5aa1e2d5ccb11d..68d93b4abf5a72 100644 --- a/llvm/test/MC/AMDGPU/gfx950_asm_features.s +++ b/llvm/test/MC/AMDGPU/gfx950_asm_features.s @@ -1149,3 +1149,36 @@ buffer_atomic_pk_add_bf16 v5, off, s[8:11], 0.5 offset:4095 // NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: // GFX950: buffer_atomic_pk_add_bf16 v5, off, s[8:11], -4.0 offset:4095 ; encoding: [0xff,0x0f,0x48,0xe1,0x00,0x05,0x02,0xf7] buffer_atomic_pk_add_bf16 v5, off, s[8:11], -4.0 offset:4095 + + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_maximum3_f32 v1, v2, v3, v4 ; encoding: [0x01,0x00,0xa9,0xd2,0x02,0x07,0x12,0x04] +v_maximum3_f32 v1, v2, v3, v4 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_maximum3_f32 v1, -v2, -v3, -v4 ; encoding: [0x01,0x00,0xa9,0xd2,0x02,0x07,0x12,0xe4] +v_maximum3_f32 v1, -v2, -v3, -v4 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_maximum3_f32 v1, -|v2|, -|v3|, -|v4| ; encoding: [0x01,0x07,0xa9,0xd2,0x02,0x07,0x12,0xe4] +v_maximum3_f32 v1, -|v2|, -|v3|, -|v4| + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_maximum3_f32 v1, 0, 1.0, v3 ; encoding: [0x01,0x00,0xa9,0xd2,0x80,0xe4,0x0d,0x04] +v_maximum3_f32 v1, 0.0, 1.0, v3 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_maximum3_f32 v2, 0, v3, 1.0 ; encoding: [0x02,0x00,0xa9,0xd2,0x80,0x06,0xca,0x03] +v_maximum3_f32 v2, 0.0, v3, 1.0 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_maximum3_f32 v1, s8, v3, 1.0 ; encoding: [0x01,0x00,0xa9,0xd2,0x08,0x06,0xca,0x03] +v_maximum3_f32 v1, s8, v3, 1.0 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_maximum3_f32 v1, v2, s8, v3 ; encoding: [0x01,0x00,0xa9,0xd2,0x02,0x11,0x0c,0x04] +v_maximum3_f32 v1, v2, s8, v3 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_minimum3_f32 v0, v1, v2, v3 ; encoding: [0x00,0x00,0xa8,0xd2,0x01,0x05,0x0e,0x04] +v_minimum3_f32 v0, v1, v2, v3 diff --git a/llvm/test/MC/AMDGPU/gfx950_err.s b/llvm/test/MC/AMDGPU/gfx950_err.s index fd3da56c5130c4..03b651260b2886 100644 --- a/llvm/test/MC/AMDGPU/gfx950_err.s +++ b/llvm/test/MC/AMDGPU/gfx950_err.s @@ -353,3 +353,36 @@ buffer_atomic_pk_add_bf16 v5, off, s[8:11], s3 offset:4095 dlc // GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_pk_add_bf16 v5, off, s[8:11], s3 offset:4095 glc slc dlc + +// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU +v_maximum3_f16 v0, v1, v2, v3 + +// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU +v_minimum3_f16 v0, v1, v2, v3 + +// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU +v_maximum_f16 v0, v1, v2 + +// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU +v_minimum_f16 v0, v1, v2 + +// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU +v_maximum_f32 v0, v1, v2 + +// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU +v_minimum_f32 v0, v1, v2 + +// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand (violates constant bus restrictions) +v_maximum3_f32 v0, s1, s2, v3 + +// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand (violates constant bus restrictions) +v_maximum3_f32 v0, v3, s1, s2 + +// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand (violates constant bus restrictions) +v_maximum3_f32 v0, s1, v3, s2 + +// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand (violates constant bus restrictions) +v_minimum3_f32 v0, s1, s2, v3 + +// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: literal operands are not supported +v_minimum3_f32 v0, v1, v2, 0xdeadbeef diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx950_dasm_vop3.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx950_dasm_vop3.txt index ca8b1750a579e2..f7cb738375d224 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx950_dasm_vop3.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx950_dasm_vop3.txt @@ -857,3 +857,27 @@ # GFX950: v_cvt_scalef32_2xpk16_bf6_f32 v[20:25], v[10:25], v[10:25], 11 ; encoding: [0x14,0x00,0x53,0xd2,0x0a,0x15,0x2e,0x02] 0x14,0x00,0x53,0xd2,0x0a,0x15,0x2e,0x02 + +# GFX950: v_maximum3_f32 v1, -v2, -v3, -v4 ; encoding: [0x01,0x00,0xa9,0xd2,0x02,0x07,0x12,0xe4] +0x01,0x00,0xa9,0xd2,0x02,0x07,0x12,0xe4 + +# GFX950: v_maximum3_f32 v1, -|v2|, -|v3|, -|v4| ; encoding: [0x01,0x07,0xa9,0xd2,0x02,0x07,0x12,0xe4] +0x01,0x07,0xa9,0xd2,0x02,0x07,0x12,0xe4 + +# GFX950: v_maximum3_f32 v1, 0, 1.0, v3 ; encoding: [0x01,0x00,0xa9,0xd2,0x80,0xe4,0x0d,0x04] +0x01,0x00,0xa9,0xd2,0x80,0xe4,0x0d,0x04 + +# GFX950: v_maximum3_f32 v1, s8, v3, 1.0 ; encoding: [0x01,0x00,0xa9,0xd2,0x08,0x06,0xca,0x03] +0x01,0x00,0xa9,0xd2,0x08,0x06,0xca,0x03 + +# GFX950: v_maximum3_f32 v1, v2, s8, v3 ; encoding: [0x01,0x00,0xa9,0xd2,0x02,0x11,0x0c,0x04] +0x01,0x00,0xa9,0xd2,0x02,0x11,0x0c,0x04 + +# GFX950: v_maximum3_f32 v1, v2, v3, v4 ; encoding: [0x01,0x00,0xa9,0xd2,0x02,0x07,0x12,0x04] +0x01,0x00,0xa9,0xd2,0x02,0x07,0x12,0x04 + +# GFX950: v_maximum3_f32 v2, 0, v3, 1.0 ; encoding: [0x02,0x00,0xa9,0xd2,0x80,0x06,0xca,0x03] +0x02,0x00,0xa9,0xd2,0x80,0x06,0xca,0x03 + +# GFX950: v_minimum3_f32 v0, v1, v2, v3 ; encoding: [0x00,0x00,0xa8,0xd2,0x01,0x05,0x0e,0x04] +0x00,0x00,0xa8,0xd2,0x01,0x05,0x0e,0x04 _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits