https://github.com/shiltian updated https://github.com/llvm/llvm-project/pull/146305
>From 165b8e3d9d18350e2d976aac8614f093f6189a6a Mon Sep 17 00:00:00 2001 From: Shilei Tian <i...@tianshilei.me> Date: Sun, 29 Jun 2025 23:47:02 -0400 Subject: [PATCH] [AMDGPU] Add support for `v_cvt_f16_bf8` on gfx1250 Co-authored-by: Mekhanoshin, Stanislav <stanislav.mekhanos...@amd.com> --- clang/include/clang/Basic/BuiltinsAMDGPU.def | 1 + .../CodeGenOpenCL/builtins-amdgcn-gfx1250.cl | 38 +++++++++++ .../builtins-amdgcn-error-gfx1250-param.cl | 4 ++ llvm/include/llvm/IR/IntrinsicsAMDGPU.td | 6 ++ .../Target/AMDGPU/AMDGPURegisterBankInfo.cpp | 1 + llvm/lib/Target/AMDGPU/VOP1Instructions.td | 5 ++ llvm/test/MC/AMDGPU/gfx1250_asm_vop1-fake16.s | 12 ++++ llvm/test/MC/AMDGPU/gfx1250_asm_vop1.s | 15 +++++ .../MC/AMDGPU/gfx1250_asm_vop1_dpp16-fake16.s | 8 +++ llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16.s | 12 ++++ .../MC/AMDGPU/gfx1250_asm_vop1_dpp8-fake16.s | 8 +++ llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8.s | 12 ++++ .../gfx1250_asm_vop3_from_vop1-fake16.s | 27 ++++++++ .../MC/AMDGPU/gfx1250_asm_vop3_from_vop1.s | 27 ++++++++ .../gfx1250_asm_vop3_from_vop1_dpp16-fake16.s | 20 ++++++ .../AMDGPU/gfx1250_asm_vop3_from_vop1_dpp16.s | 24 +++++++ .../gfx1250_asm_vop3_from_vop1_dpp8-fake16.s | 28 ++++++++ .../AMDGPU/gfx1250_asm_vop3_from_vop1_dpp8.s | 32 ++++++++++ .../Disassembler/AMDGPU/gfx1250_dasm_vop1.txt | 19 ++++++ .../AMDGPU/gfx1250_dasm_vop1_dpp16.txt | 11 ++++ .../AMDGPU/gfx1250_dasm_vop1_dpp8.txt | 11 ++++ .../AMDGPU/gfx1250_dasm_vop3_from_vop1.txt | 64 ++++++++----------- .../gfx1250_dasm_vop3_from_vop1_dpp16.txt | 24 +++++++ .../gfx1250_dasm_vop3_from_vop1_dpp8.txt | 28 ++++++++ 24 files changed, 400 insertions(+), 37 deletions(-) diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def index aba52421f4338..0104482e9117f 100644 --- a/clang/include/clang/Basic/BuiltinsAMDGPU.def +++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def @@ -657,6 +657,7 @@ TARGET_BUILTIN(__builtin_amdgcn_s_setprio_inc_wg, "vIs", "n", "setprio-inc-wg-in TARGET_BUILTIN(__builtin_amdgcn_s_monitor_sleep, "vIs", "n", "gfx1250-insts") TARGET_BUILTIN(__builtin_amdgcn_cvt_f16_fp8, "hiIi", "nc", "gfx1250-insts") +TARGET_BUILTIN(__builtin_amdgcn_cvt_f16_bf8, "hiIi", "nc", "gfx1250-insts") TARGET_BUILTIN(__builtin_amdgcn_cvt_pk_f16_fp8, "V2hs", "nc", "gfx1250-insts") TARGET_BUILTIN(__builtin_amdgcn_cvt_pk_f16_bf8, "V2hs", "nc", "gfx1250-insts") diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl index 17fdc25f4a57d..4d4afedae3658 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl @@ -62,6 +62,44 @@ void test_cvt_f16_fp8(global half* out, int a) out[3] = __builtin_amdgcn_cvt_f16_fp8(a, 3); } +// CHECK-LABEL: @test_cvt_f16_bf8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr +// CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr +// CHECK-NEXT: store ptr addrspace(1) [[OUT:%.*]], ptr [[OUT_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store i32 [[A:%.*]], ptr [[A_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = call half @llvm.amdgcn.cvt.f16.bf8(i32 [[TMP0]], i32 0) +// CHECK-NEXT: [[TMP2:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds half, ptr addrspace(1) [[TMP2]], i64 0 +// CHECK-NEXT: store half [[TMP1]], ptr addrspace(1) [[ARRAYIDX]], align 2 +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[A_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP4:%.*]] = call half @llvm.amdgcn.cvt.f16.bf8(i32 [[TMP3]], i32 1) +// CHECK-NEXT: [[TMP5:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds half, ptr addrspace(1) [[TMP5]], i64 1 +// CHECK-NEXT: store half [[TMP4]], ptr addrspace(1) [[ARRAYIDX1]], align 2 +// CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[A_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP7:%.*]] = call half @llvm.amdgcn.cvt.f16.bf8(i32 [[TMP6]], i32 2) +// CHECK-NEXT: [[TMP8:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds half, ptr addrspace(1) [[TMP8]], i64 2 +// CHECK-NEXT: store half [[TMP7]], ptr addrspace(1) [[ARRAYIDX2]], align 2 +// CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP10:%.*]] = call half @llvm.amdgcn.cvt.f16.bf8(i32 [[TMP9]], i32 3) +// CHECK-NEXT: [[TMP11:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds half, ptr addrspace(1) [[TMP11]], i64 3 +// CHECK-NEXT: store half [[TMP10]], ptr addrspace(1) [[ARRAYIDX3]], align 2 +// CHECK-NEXT: ret void +// +void test_cvt_f16_bf8(global half* out, int a) +{ + out[0] = __builtin_amdgcn_cvt_f16_bf8(a, 0); + out[1] = __builtin_amdgcn_cvt_f16_bf8(a, 1); + out[2] = __builtin_amdgcn_cvt_f16_bf8(a, 2); + out[3] = __builtin_amdgcn_cvt_f16_bf8(a, 3); +} + // CHECK-LABEL: @test_cvt_pk_f16_fp8( // CHECK-NEXT: entry: // CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) diff --git a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250-param.cl b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250-param.cl index 1e98869caeb80..69857087bae08 100644 --- a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250-param.cl +++ b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250-param.cl @@ -12,3 +12,7 @@ void test_s_monitor_sleep(short a) { void test__builtin_amdgcn_cvt_f16_fp8(int a, int b) { __builtin_amdgcn_cvt_f16_fp8(a, b); // expected-error {{'__builtin_amdgcn_cvt_f16_fp8' must be a constant integer}} } + +void test__builtin_amdgcn_cvt_f16_bf8(int a, int b) { + __builtin_amdgcn_cvt_f16_bf8(a, b); // expected-error {{'__builtin_amdgcn_cvt_f16_bf8' must be a constant integer}} +} diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td index 0bfd87c061fba..c09c3630719f4 100644 --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -3515,6 +3515,12 @@ def int_amdgcn_cvt_f16_fp8 : ClangBuiltin<"__builtin_amdgcn_cvt_f16_fp8">, [llvm_i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<1>>]>; +// llvm.amdgcn.cvt.f16.bf8 half vdst, int srcA, imm byte_sel [0..3] +def int_amdgcn_cvt_f16_bf8 : ClangBuiltin<"__builtin_amdgcn_cvt_f16_bf8">, + DefaultAttrsIntrinsic<[llvm_half_ty], + [llvm_i32_ty, llvm_i32_ty], + [IntrNoMem, ImmArg<ArgIndex<1>>]>; + //===----------------------------------------------------------------------===// // Special Intrinsics for backend internal use only. No frontend // should emit calls to these. diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp index 2cf9c73e3ec81..778d257c88a38 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -4597,6 +4597,7 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { case Intrinsic::amdgcn_cvt_sr_bf16_f32: case Intrinsic::amdgcn_cvt_sr_f16_f32: case Intrinsic::amdgcn_cvt_f16_fp8: + case Intrinsic::amdgcn_cvt_f16_bf8: case Intrinsic::amdgcn_cvt_scalef32_pk32_fp6_f16: case Intrinsic::amdgcn_cvt_scalef32_pk32_bf6_f16: case Intrinsic::amdgcn_cvt_scalef32_pk32_fp6_bf16: diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td index 55e7eb15bd5a0..cf02c5b2454b3 100644 --- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td @@ -747,6 +747,8 @@ let SubtargetPredicate = isGFX1250Plus in { let mayRaiseFPException = 0, SchedRW = [WriteFloatCvt] in { defm V_CVT_F16_FP8 : VOP1Inst_t16_with_profiles<"v_cvt_f16_fp8", V_CVT_F16_F8_Profile, V_CVT_F16_F8_True16_Profile, V_CVT_F16_F8_Fake16_Profile>; + defm V_CVT_F16_BF8 : VOP1Inst_t16_with_profiles<"v_cvt_f16_bf8", + V_CVT_F16_F8_Profile, V_CVT_F16_F8_True16_Profile, V_CVT_F16_F8_Fake16_Profile>; defm V_CVT_PK_F16_FP8 : VOP1Inst_t16_with_profiles<"v_cvt_pk_f16_fp8", VOPProfile_CVT_PK_F16_F8, VOPProfile_CVT_PK_F16_F8_true16, VOPProfile_CVT_PK_F16_F8_fake16, int_amdgcn_cvt_pk_f16_fp8>; @@ -757,9 +759,11 @@ let SubtargetPredicate = isGFX1250Plus in { let True16Predicate = UseRealTrue16Insts in { def : Cvt_F_F8_Pat_ByteSel<int_amdgcn_cvt_f16_fp8, V_CVT_F16_FP8_t16_e64, 1>; + def : Cvt_F_F8_Pat_ByteSel<int_amdgcn_cvt_f16_bf8, V_CVT_F16_BF8_t16_e64, 1>; } let True16Predicate = UseFakeTrue16Insts in { def : Cvt_F_F8_Pat_ByteSel<int_amdgcn_cvt_f16_fp8, V_CVT_F16_FP8_fake16_e64, 1>; + def : Cvt_F_F8_Pat_ByteSel<int_amdgcn_cvt_f16_bf8, V_CVT_F16_BF8_fake16_e64, 1>; } } // End SubtargetPredicate = isGFX1250Plus @@ -1099,6 +1103,7 @@ defm V_CVT_F32_BF16 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x072, "v_c defm V_CVT_PK_F16_FP8 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x075>; defm V_CVT_PK_F16_BF8 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x076>; defm V_CVT_F16_FP8 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x077>; +defm V_CVT_F16_BF8 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x078>; //===----------------------------------------------------------------------===// // GFX10. diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1-fake16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1-fake16.s index d3b9d403e5088..7b07c84d56680 100644 --- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1-fake16.s +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1-fake16.s @@ -46,6 +46,18 @@ v_cvt_f32_bf16 v5, src_scc v_cvt_f32_bf16 v127, 0x8000 // GFX1250: v_cvt_f32_bf16_e32 v127, 0x8000 ; encoding: [0xff,0xe4,0xfe,0x7e,0x00,0x80,0x00,0x00] +v_cvt_f16_bf8 v1, v2 +// GFX1250: v_cvt_f16_bf8_e32 v1, v2 ; encoding: [0x02,0xf1,0x02,0x7e] + +v_cvt_f16_bf8 v1, s2 +// GFX1250: v_cvt_f16_bf8_e32 v1, s2 ; encoding: [0x02,0xf0,0x02,0x7e] + +v_cvt_f16_bf8 v1, 2 +// GFX1250: v_cvt_f16_bf8_e32 v1, 2 ; encoding: [0x82,0xf0,0x02,0x7e] + +v_cvt_f16_bf8 v1, 0x1234 +// GFX1250: v_cvt_f16_bf8_e32 v1, 0x1234 ; encoding: [0xff,0xf0,0x02,0x7e,0x34,0x12,0x00,0x00] + v_cvt_f16_fp8 v1, v2 // GFX1250: v_cvt_f16_fp8_e32 v1, v2 ; encoding: [0x02,0xef,0x02,0x7e] diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1.s index dd070651e58ca..30c62c957874d 100644 --- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1.s +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1.s @@ -49,6 +49,21 @@ v_cvt_f32_bf16 v127, 0x8000 v_cvt_f32_bf16 v5, v1.h // GFX1250: v_cvt_f32_bf16_e32 v5, v1.h ; encoding: [0x81,0xe5,0x0a,0x7e] +v_cvt_f16_bf8 v1.l, v2 +// GFX1250: v_cvt_f16_bf8_e32 v1.l, v2 ; encoding: [0x02,0xf1,0x02,0x7e] + +v_cvt_f16_bf8 v1.l, s2 +// GFX1250: v_cvt_f16_bf8_e32 v1.l, s2 ; encoding: [0x02,0xf0,0x02,0x7e] + +v_cvt_f16_bf8 v1.l, 2 +// GFX1250: v_cvt_f16_bf8_e32 v1.l, 2 ; encoding: [0x82,0xf0,0x02,0x7e] + +v_cvt_f16_bf8 v1.l, 0x1234 +// GFX1250: v_cvt_f16_bf8_e32 v1.l, 0x1234 ; encoding: [0xff,0xf0,0x02,0x7e,0x34,0x12,0x00,0x00] + +v_cvt_f16_bf8 v1.h, v2 +// GFX1250: v_cvt_f16_bf8_e32 v1.h, v2 ; encoding: [0x02,0xf1,0x02,0x7f] + v_cvt_f16_fp8 v1.l, v2 // GFX1250: v_cvt_f16_fp8_e32 v1.l, v2 ; encoding: [0x02,0xef,0x02,0x7e] diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16-fake16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16-fake16.s index f2751b7aecb49..e53812bb3fd04 100644 --- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16-fake16.s +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16-fake16.s @@ -58,6 +58,14 @@ v_cvt_f32_bf16 v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl: // GFX1250: v_cvt_f32_bf16_dpp v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xe4,0xfe,0x7e,0x7f,0x6f,0x35,0x30] // GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU +v_cvt_f16_bf8 v1, v2 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf +// GFX1250: v_cvt_f16_bf8_dpp v1, v2 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf0,0x02,0x7e,0x02,0x39,0x00,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_f16_bf8 v1, v2 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf fi:1 +// GFX1250: v_cvt_f16_bf8_dpp v1, v2 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0xfa,0xf0,0x02,0x7e,0x02,0x39,0x04,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + v_cvt_f16_fp8 v1, v2 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf // GFX1250: v_cvt_f16_fp8_dpp v1, v2 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xee,0x02,0x7e,0x02,0x39,0x00,0xff] // GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16.s index 525963a8c5ba5..bd767d14fab5f 100644 --- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16.s +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16.s @@ -62,6 +62,18 @@ v_cvt_f32_bf16 v5, v1.h quad_perm:[3,2,1,0] // GFX1250: v_cvt_f32_bf16_dpp v5, v1.h quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe4,0x0a,0x7e,0x81,0x1b,0x00,0xff] // GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU +v_cvt_f16_bf8 v1.l, v2 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf +// GFX1250: v_cvt_f16_bf8_dpp v1.l, v2 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf0,0x02,0x7e,0x02,0x39,0x00,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_f16_bf8 v1.l, v2 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf fi:1 +// GFX1250: v_cvt_f16_bf8_dpp v1.l, v2 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0xfa,0xf0,0x02,0x7e,0x02,0x39,0x04,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_f16_bf8 v1.h, v2 quad_perm:[0,1,2,3] +// GFX1250: v_cvt_f16_bf8_dpp v1.h, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf0,0x02,0x7f,0x02,0xe4,0x00,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + v_cvt_f16_fp8 v1.l, v2 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf // GFX1250: v_cvt_f16_fp8_dpp v1.l, v2 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xee,0x02,0x7e,0x02,0x39,0x00,0xff] // GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8-fake16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8-fake16.s index 1182f4279e159..cbc0ebd3edda0 100644 --- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8-fake16.s +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8-fake16.s @@ -14,6 +14,14 @@ v_cvt_f32_bf16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX1250: v_cvt_f32_bf16_dpp v127, v127 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xe4,0xfe,0x7e,0x7f,0x00,0x00,0x00] // GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU +v_cvt_f16_bf8 v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX1250: v_cvt_f16_bf8_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xf0,0x02,0x7e,0x02,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_f16_bf8 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX1250: v_cvt_f16_bf8_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0xf0,0x02,0x7e,0x02,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + v_cvt_f16_fp8 v1, v2 dpp8:[7,6,5,4,3,2,1,0] // GFX1250: v_cvt_f16_fp8_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xee,0x02,0x7e,0x02,0x77,0x39,0x05] // GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8.s index 14291a3dea5e1..8b9980a31daf3 100644 --- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8.s +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8.s @@ -18,6 +18,18 @@ v_cvt_f32_bf16 v5, v1.h dpp8:[7,6,5,4,3,2,1,0] // GFX1250: v_cvt_f32_bf16_dpp v5, v1.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xe4,0x0a,0x7e,0x81,0x77,0x39,0x05] // GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU +v_cvt_f16_bf8 v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX1250: v_cvt_f16_bf8_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xf0,0x02,0x7e,0x02,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_f16_bf8 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX1250: v_cvt_f16_bf8_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0xf0,0x02,0x7e,0x02,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_f16_bf8 v1.h, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX1250: v_cvt_f16_bf8_dpp v1.h, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xf0,0x02,0x7f,0x02,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + v_cvt_f16_fp8 v1, v2 dpp8:[7,6,5,4,3,2,1,0] // GFX1250: v_cvt_f16_fp8_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xee,0x02,0x7e,0x02,0x77,0x39,0x05] // GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1-fake16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1-fake16.s index 44e0e3efd965f..b333541a0f573 100644 --- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1-fake16.s +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1-fake16.s @@ -76,6 +76,33 @@ v_cvt_f32_bf16_e64 v5, -1 op_sel:[1] v_cvt_f32_bf16_e64 v5, src_scc op_sel:[1] // GFX1250: v_cvt_f32_bf16_e64 v5, src_scc op_sel:[1,0] ; encoding: [0x05,0x08,0xf2,0xd5,0xfd,0x00,0x00,0x00] +v_cvt_f16_bf8 v150, v2 +// GFX1250: v_cvt_f16_bf8_e64 v150, v2 ; encoding: [0x96,0x00,0xf8,0xd5,0x02,0x01,0x00,0x00] + +v_cvt_f16_bf8 v150, s2 +// GFX1250: v_cvt_f16_bf8_e64 v150, s2 ; encoding: [0x96,0x00,0xf8,0xd5,0x02,0x00,0x00,0x00] + +v_cvt_f16_bf8 v150, 2 +// GFX1250: v_cvt_f16_bf8_e64 v150, 2 ; encoding: [0x96,0x00,0xf8,0xd5,0x82,0x00,0x00,0x00] + +v_cvt_f16_bf8 v150, 0x1234 +// GFX1250: v_cvt_f16_bf8_e64 v150, 0x1234 ; encoding: [0x96,0x00,0xf8,0xd5,0xff,0x00,0x00,0x00,0x34,0x12,0x00,0x00] + +v_cvt_f16_bf8 v1, v2 byte_sel:2 +// GFX1250: v_cvt_f16_bf8_e64 v1, v2 byte_sel:2 ; encoding: [0x01,0x08,0xf8,0xd5,0x02,0x01,0x00,0x00] + +v_cvt_f16_bf8 v1, v2 byte_sel:1 +// GFX1250: v_cvt_f16_bf8_e64 v1, v2 byte_sel:1 ; encoding: [0x01,0x10,0xf8,0xd5,0x02,0x01,0x00,0x00] + +v_cvt_f16_bf8 v1, v2 byte_sel:3 +// GFX1250: v_cvt_f16_bf8_e64 v1, v2 byte_sel:3 ; encoding: [0x01,0x18,0xf8,0xd5,0x02,0x01,0x00,0x00] + +v_cvt_f16_bf8 v128, v2 op_sel:[0,1] +// GFX1250: v_cvt_f16_bf8_e64 v128, v2 op_sel:[0,1] ; encoding: [0x80,0x40,0xf8,0xd5,0x02,0x01,0x00,0x00] + +v_cvt_f16_bf8 v1, v2 op_sel:[0,1] byte_sel:2 +// GFX1250: v_cvt_f16_bf8_e64 v1, v2 op_sel:[0,1] byte_sel:2 ; encoding: [0x01,0x48,0xf8,0xd5,0x02,0x01,0x00,0x00] + v_cvt_f16_fp8 v150, v2 // GFX1250: v_cvt_f16_fp8_e64 v150, v2 ; encoding: [0x96,0x00,0xf7,0xd5,0x02,0x01,0x00,0x00] diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1.s index 5546841e9154b..df595fe562e0e 100644 --- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1.s +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1.s @@ -79,6 +79,33 @@ v_cvt_f32_bf16_e64 v5, src_scc op_sel:[1] v_cvt_f32_bf16_e64 v5, v128.h // GFX1250: v_cvt_f32_bf16_e64 v5, v128.h op_sel:[1,0] ; encoding: [0x05,0x08,0xf2,0xd5,0x80,0x01,0x00,0x00] +v_cvt_f16_bf8 v150.l, v2 +// GFX1250: v_cvt_f16_bf8_e64 v150.l, v2 ; encoding: [0x96,0x00,0xf8,0xd5,0x02,0x01,0x00,0x00] + +v_cvt_f16_bf8 v150.l, s2 +// GFX1250: v_cvt_f16_bf8_e64 v150.l, s2 ; encoding: [0x96,0x00,0xf8,0xd5,0x02,0x00,0x00,0x00] + +v_cvt_f16_bf8 v150.l, 2 +// GFX1250: v_cvt_f16_bf8_e64 v150.l, 2 ; encoding: [0x96,0x00,0xf8,0xd5,0x82,0x00,0x00,0x00] + +v_cvt_f16_bf8 v150.l, 0x1234 +// GFX1250: v_cvt_f16_bf8_e64 v150.l, 0x1234 ; encoding: [0x96,0x00,0xf8,0xd5,0xff,0x00,0x00,0x00,0x34,0x12,0x00,0x00] + +v_cvt_f16_bf8 v1.l, v2 byte_sel:2 +// GFX1250: v_cvt_f16_bf8_e64 v1.l, v2 byte_sel:2 ; encoding: [0x01,0x08,0xf8,0xd5,0x02,0x01,0x00,0x00] + +v_cvt_f16_bf8 v1.l, v2 byte_sel:1 +// GFX1250: v_cvt_f16_bf8_e64 v1.l, v2 byte_sel:1 ; encoding: [0x01,0x10,0xf8,0xd5,0x02,0x01,0x00,0x00] + +v_cvt_f16_bf8 v1.l, v2 byte_sel:3 +// GFX1250: v_cvt_f16_bf8_e64 v1.l, v2 byte_sel:3 ; encoding: [0x01,0x18,0xf8,0xd5,0x02,0x01,0x00,0x00] + +v_cvt_f16_bf8 v128.h, v2 +// GFX1250: v_cvt_f16_bf8_e64 v128.h, v2 op_sel:[0,1] ; encoding: [0x80,0x40,0xf8,0xd5,0x02,0x01,0x00,0x00] + +v_cvt_f16_bf8 v1.h, v2 byte_sel:2 +// GFX1250: v_cvt_f16_bf8_e64 v1.h, v2 op_sel:[0,1] byte_sel:2 ; encoding: [0x01,0x48,0xf8,0xd5,0x02,0x01,0x00,0x00] + v_cvt_f16_fp8 v150.l, v2 // GFX1250: v_cvt_f16_fp8_e64 v150.l, v2 ; encoding: [0x96,0x00,0xf7,0xd5,0x02,0x01,0x00,0x00] diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp16-fake16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp16-fake16.s index 8f2bd6b9ddb77..b4000ce9425fe 100644 --- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp16-fake16.s +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp16-fake16.s @@ -46,6 +46,26 @@ v_cvt_f32_bf16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf // GFX1250: v_cvt_f32_bf16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xf2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] // GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU +v_cvt_f16_bf8 v1, v2 byte_sel:2 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf +// GFX1250: v_cvt_f16_bf8_e64_dpp v1, v2 byte_sel:2 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x08,0xf8,0xd5,0xfa,0x00,0x00,0x00,0x02,0x39,0x00,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_f16_bf8 v1, v2 byte_sel:1 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf +// GFX1250: v_cvt_f16_bf8_e64_dpp v1, v2 byte_sel:1 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x10,0xf8,0xd5,0xfa,0x00,0x00,0x00,0x02,0x39,0x00,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_f16_bf8 v1, v2 byte_sel:3 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf +// GFX1250: v_cvt_f16_bf8_e64_dpp v1, v2 byte_sel:3 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x18,0xf8,0xd5,0xfa,0x00,0x00,0x00,0x02,0x39,0x00,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_f16_bf8 v150, v2 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf +// GFX1250: v_cvt_f16_bf8_e64_dpp v150, v2 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf ; encoding: [0x96,0x00,0xf8,0xd5,0xfa,0x00,0x00,0x00,0x02,0x39,0x00,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_f16_bf8 v1, v2 op_sel:[0,1] byte_sel:3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf +// GFX1250: v_cvt_f16_bf8_e64_dpp v1, v2 op_sel:[0,1] byte_sel:3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x58,0xf8,0xd5,0xfa,0x00,0x00,0x00,0x02,0xe4,0x00,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + v_cvt_f16_fp8 v1, v2 byte_sel:2 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf // GFX1250: v_cvt_f16_fp8_e64_dpp v1, v2 byte_sel:2 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x08,0xf7,0xd5,0xfa,0x00,0x00,0x00,0x02,0x39,0x00,0xff] // GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp16.s index 9f2cc29dcd0b5..82d8245c86249 100644 --- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp16.s +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp16.s @@ -50,6 +50,30 @@ v_cvt_f32_bf16_e64_dpp v5, v128.h quad_perm:[3,2,1,0] // GFX1250: v_cvt_f32_bf16_e64_dpp v5, v128.h op_sel:[1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0xf2,0xd5,0xfa,0x00,0x00,0x00,0x80,0x1b,0x00,0xff] // GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU +v_cvt_f16_bf8 v1.l, v2 byte_sel:2 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf +// GFX1250: v_cvt_f16_bf8_e64_dpp v1.l, v2 byte_sel:2 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x08,0xf8,0xd5,0xfa,0x00,0x00,0x00,0x02,0x39,0x00,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_f16_bf8 v1.l, v2 byte_sel:1 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf +// GFX1250: v_cvt_f16_bf8_e64_dpp v1.l, v2 byte_sel:1 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x10,0xf8,0xd5,0xfa,0x00,0x00,0x00,0x02,0x39,0x00,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_f16_bf8 v1.l, v2 byte_sel:3 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf +// GFX1250: v_cvt_f16_bf8_e64_dpp v1.l, v2 byte_sel:3 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x18,0xf8,0xd5,0xfa,0x00,0x00,0x00,0x02,0x39,0x00,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_f16_bf8 v150.l, v2 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf +// GFX1250: v_cvt_f16_bf8_e64_dpp v150.l, v2 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf ; encoding: [0x96,0x00,0xf8,0xd5,0xfa,0x00,0x00,0x00,0x02,0x39,0x00,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_f16_bf8 v1.h, v2 byte_sel:3 quad_perm:[0,1,2,3] +// GFX1250: v_cvt_f16_bf8_e64_dpp v1.h, v2 op_sel:[0,1] byte_sel:3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x58,0xf8,0xd5,0xfa,0x00,0x00,0x00,0x02,0xe4,0x00,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_f16_bf8 v128.l, v2 quad_perm:[0,1,2,3] +// GFX1250: v_cvt_f16_bf8_e64_dpp v128.l, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x80,0x00,0xf8,0xd5,0xfa,0x00,0x00,0x00,0x02,0xe4,0x00,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + v_cvt_f16_fp8 v1.l, v2 byte_sel:2 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf // GFX1250: v_cvt_f16_fp8_e64_dpp v1.l, v2 byte_sel:2 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x08,0xf7,0xd5,0xfa,0x00,0x00,0x00,0x02,0x39,0x00,0xff] // GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp8-fake16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp8-fake16.s index 608e07b9f0f5c..8e0bdfeb78853 100644 --- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp8-fake16.s +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp8-fake16.s @@ -2,6 +2,34 @@ // RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=-real-true16 -show-encoding < %s | FileCheck --check-prefix=GFX1250 %s // RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX12-ERR --implicit-check-not=error: --strict-whitespace %s +v_cvt_f16_bf8 v150, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX1250: v_cvt_f16_bf8_e64_dpp v150, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x96,0x00,0xf8,0xd5,0xe9,0x00,0x00,0x00,0x02,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_f16_bf8 v1, v2 byte_sel:3 dpp8:[7,6,5,4,3,2,1,0] +// GFX1250: v_cvt_f16_bf8_e64_dpp v1, v2 byte_sel:3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x01,0x18,0xf8,0xd5,0xe9,0x00,0x00,0x00,0x02,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_f16_bf8 v1, v2 byte_sel:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX1250: v_cvt_f16_bf8_e64_dpp v1, v2 byte_sel:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x01,0x08,0xf8,0xd5,0xe9,0x00,0x00,0x00,0x02,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_f16_bf8 v1, v2 byte_sel:1 dpp8:[7,6,5,4,3,2,1,0] +// GFX1250: v_cvt_f16_bf8_e64_dpp v1, v2 byte_sel:1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x01,0x10,0xf8,0xd5,0xe9,0x00,0x00,0x00,0x02,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_f16_bf8 v150, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX1250: v_cvt_f16_bf8_e64_dpp v150, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x96,0x00,0xf8,0xd5,0xe9,0x00,0x00,0x00,0x02,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_f16_bf8 v150, v2 byte_sel:3 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX1250: v_cvt_f16_bf8_e64_dpp v150, v2 byte_sel:3 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x96,0x18,0xf8,0xd5,0xea,0x00,0x00,0x00,0x02,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_f16_bf8 v1, v2 op_sel:[0,1] byte_sel:3 dpp8:[7,6,5,4,3,2,1,0] +// GFX1250: v_cvt_f16_bf8_e64_dpp v1, v2 op_sel:[0,1] byte_sel:3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x01,0x58,0xf8,0xd5,0xe9,0x00,0x00,0x00,0x02,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + v_cvt_f16_fp8 v150, v2 dpp8:[7,6,5,4,3,2,1,0] // GFX1250: v_cvt_f16_fp8_e64_dpp v150, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x96,0x00,0xf7,0xd5,0xe9,0x00,0x00,0x00,0x02,0x77,0x39,0x05] // GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp8.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp8.s index 94e1d375630e5..eaf63eabea422 100644 --- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp8.s +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp8.s @@ -2,6 +2,38 @@ // RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 -show-encoding < %s | FileCheck --check-prefix=GFX1250 %s // RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX12-ERR --implicit-check-not=error: --strict-whitespace %s +v_cvt_f16_bf8 v150.l, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX1250: v_cvt_f16_bf8_e64_dpp v150.l, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x96,0x00,0xf8,0xd5,0xe9,0x00,0x00,0x00,0x02,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_f16_bf8 v1.l, v2 byte_sel:3 dpp8:[7,6,5,4,3,2,1,0] +// GFX1250: v_cvt_f16_bf8_e64_dpp v1.l, v2 byte_sel:3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x01,0x18,0xf8,0xd5,0xe9,0x00,0x00,0x00,0x02,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_f16_bf8 v1.l, v2 byte_sel:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX1250: v_cvt_f16_bf8_e64_dpp v1.l, v2 byte_sel:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x01,0x08,0xf8,0xd5,0xe9,0x00,0x00,0x00,0x02,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_f16_bf8 v1.l, v2 byte_sel:1 dpp8:[7,6,5,4,3,2,1,0] +// GFX1250: v_cvt_f16_bf8_e64_dpp v1.l, v2 byte_sel:1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x01,0x10,0xf8,0xd5,0xe9,0x00,0x00,0x00,0x02,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_f16_bf8 v150.l, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX1250: v_cvt_f16_bf8_e64_dpp v150.l, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x96,0x00,0xf8,0xd5,0xe9,0x00,0x00,0x00,0x02,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_f16_bf8 v150.l, v2 byte_sel:3 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX1250: v_cvt_f16_bf8_e64_dpp v150.l, v2 byte_sel:3 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x96,0x18,0xf8,0xd5,0xea,0x00,0x00,0x00,0x02,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_f16_bf8 v1.h, v2 byte_sel:3 dpp8:[7,6,5,4,3,2,1,0] +// GFX1250: v_cvt_f16_bf8_e64_dpp v1.h, v2 op_sel:[0,1] byte_sel:3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x01,0x58,0xf8,0xd5,0xe9,0x00,0x00,0x00,0x02,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_f16_bf8 v128.l, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX1250: v_cvt_f16_bf8_e64_dpp v128.l, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x80,0x00,0xf8,0xd5,0xe9,0x00,0x00,0x00,0x02,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + v_cvt_f16_fp8 v150.l, v2 dpp8:[7,6,5,4,3,2,1,0] // GFX1250: v_cvt_f16_fp8_e64_dpp v150.l, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x96,0x00,0xf7,0xd5,0xe9,0x00,0x00,0x00,0x02,0x77,0x39,0x05] // GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1.txt index 9518a48120970..622c57a20f860 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1.txt @@ -50,6 +50,25 @@ 0x81,0xe5,0x0a,0x7e # GFX1250: v_cvt_f32_bf16_e32 v5, v1.h ; encoding: [0x81,0xe5,0x0a,0x7e] +0xff,0xf0,0x02,0x7e,0x34,0x12,0x00,0x00 +# GFX1250-REAL16: v_cvt_f16_bf8_e32 v1.l, 0x1234 ; encoding: [0xff,0xf0,0x02,0x7e,0x34,0x12,0x00,0x00] +# GFX1250-FAKE16: v_cvt_f16_bf8_e32 v1, 0x1234 ; encoding: [0xff,0xf0,0x02,0x7e,0x34,0x12,0x00,0x00] + +0x82,0xf0,0x02,0x7e +# GFX1250-REAL16: v_cvt_f16_bf8_e32 v1.l, 2 ; encoding: [0x82,0xf0,0x02,0x7e] +# GFX1250-FAKE16: v_cvt_f16_bf8_e32 v1, 2 ; encoding: [0x82,0xf0,0x02,0x7e] + +0x02,0xf0,0x02,0x7e +# GFX1250-REAL16: v_cvt_f16_bf8_e32 v1.l, s2 ; encoding: [0x02,0xf0,0x02,0x7e] +# GFX1250-FAKE16: v_cvt_f16_bf8_e32 v1, s2 ; encoding: [0x02,0xf0,0x02,0x7e] + +0x02,0xf1,0x02,0x7e +# GFX1250-REAL16: v_cvt_f16_bf8_e32 v1.l, v2 ; encoding: [0x02,0xf1,0x02,0x7e] +# GFX1250-FAKE16: v_cvt_f16_bf8_e32 v1, v2 ; encoding: [0x02,0xf1,0x02,0x7e] + +0x02,0xf1,0x02,0x7f +# GFX1250-REAL16: v_cvt_f16_bf8_e32 v1.h, v2 ; encoding: [0x02,0xf1,0x02,0x7f] + 0xff,0xee,0x02,0x7e,0x34,0x12,0x00,0x00 # GFX1250-REAL16: v_cvt_f16_fp8_e32 v1.l, 0x1234 ; encoding: [0xff,0xee,0x02,0x7e,0x34,0x12,0x00,0x00] # GFX1250-FAKE16: v_cvt_f16_fp8_e32 v1, 0x1234 ; encoding: [0xff,0xee,0x02,0x7e,0x34,0x12,0x00,0x00] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1_dpp16.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1_dpp16.txt index 16bdfc0b3fdfd..149a054742ded 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1_dpp16.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1_dpp16.txt @@ -47,6 +47,17 @@ 0xfa,0xe4,0x0a,0x7e,0x81,0x1b,0x00,0xff # GFX1250: v_cvt_f32_bf16_dpp v5, v1.h quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe4,0x0a,0x7e,0x81,0x1b,0x00,0xff] +0xfa,0xf0,0x02,0x7e,0x02,0x39,0x00,0xff +# GFX1250-REAL16: v_cvt_f16_bf8_dpp v1.l, v2 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf0,0x02,0x7e,0x02,0x39,0x00,0xff] +# GFX1250-FAKE16: v_cvt_f16_bf8_dpp v1, v2 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf0,0x02,0x7e,0x02,0x39,0x00,0xff] + +0xfa,0xf0,0x02,0x7e,0x02,0x39,0x04,0xff +# GFX1250-REAL16: v_cvt_f16_bf8_dpp v1.l, v2 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0xfa,0xf0,0x02,0x7e,0x02,0x39,0x04,0xff] +# GFX1250-FAKE16: v_cvt_f16_bf8_dpp v1, v2 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0xfa,0xf0,0x02,0x7e,0x02,0x39,0x04,0xff] + +0xfa,0xf0,0x02,0x7f,0x02,0xe4,0x00,0xff +# GFX1250-REAL16: v_cvt_f16_bf8_dpp v1.h, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf0,0x02,0x7f,0x02,0xe4,0x00,0xff] + 0xfa,0xee,0x02,0x7e,0x02,0x39,0x00,0xff # GFX1250-REAL16: v_cvt_f16_fp8_dpp v1.l, v2 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xee,0x02,0x7e,0x02,0x39,0x00,0xff] # GFX1250-FAKE16: v_cvt_f16_fp8_dpp v1, v2 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xee,0x02,0x7e,0x02,0x39,0x00,0xff] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1_dpp8.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1_dpp8.txt index 694417f233ae9..274b58769911a 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1_dpp8.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1_dpp8.txt @@ -14,6 +14,17 @@ 0xe9,0xe4,0x0a,0x7e,0x81,0x77,0x39,0x05 # GFX1250: v_cvt_f32_bf16_dpp v5, v1.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xe4,0x0a,0x7e,0x81,0x77,0x39,0x05] +0xe9,0xf0,0x02,0x7e,0x02,0x77,0x39,0x05 +# GFX1250-REAL16: v_cvt_f16_bf8_dpp v1.l, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xf0,0x02,0x7e,0x02,0x77,0x39,0x05] +# GFX1250-FAKE16: v_cvt_f16_bf8_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xf0,0x02,0x7e,0x02,0x77,0x39,0x05] + +0xe9,0xf0,0x02,0x7f,0x02,0x77,0x39,0x05 +# GFX1250-REAL16: v_cvt_f16_bf8_dpp v1.h, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xf0,0x02,0x7f,0x02,0x77,0x39,0x05] + +0xea,0xf0,0x02,0x7e,0x02,0x77,0x39,0x05 +# GFX1250-REAL16: v_cvt_f16_bf8_dpp v1.l, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0xf0,0x02,0x7e,0x02,0x77,0x39,0x05] +# GFX1250-FAKE16: v_cvt_f16_bf8_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0xf0,0x02,0x7e,0x02,0x77,0x39,0x05] + 0xe9,0xee,0x02,0x7e,0x02,0x77,0x39,0x05 # GFX1250-REAL16: v_cvt_f16_fp8_dpp v1.l, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xee,0x02,0x7e,0x02,0x77,0x39,0x05] # GFX1250-FAKE16: v_cvt_f16_fp8_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xee,0x02,0x7e,0x02,0x77,0x39,0x05] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1.txt index fc544304bf023..b09ddc20b8034 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1.txt @@ -2,51 +2,41 @@ # RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX1250,GFX1250-REAL16 %s # RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=-real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX1250,GFX1250-FAKE16 %s -0x05,0x00,0xf2,0xd5,0xc1,0x00,0x00,0x00 -# GFX1250: v_cvt_f32_bf16_e64 v5, -1 ; encoding: [0x05,0x00,0xf2,0xd5,0xc1,0x00,0x00,0x00] +0x01,0x10,0xf8,0xd5,0x02,0x01,0x00,0x00 +# GFX1250-REAL16: v_cvt_f16_bf8_e64 v1.l, v2 byte_sel:1 ; encoding: [0x01,0x10,0xf8,0xd5,0x02,0x01,0x00,0x00] +# GFX1250-FAKE16: v_cvt_f16_bf8_e64 v1, v2 byte_sel:1 ; encoding: [0x01,0x10,0xf8,0xd5,0x02,0x01,0x00,0x00] -0x05,0x00,0xf2,0xd5,0x7f,0x00,0x00,0x00 -# GFX1250: v_cvt_f32_bf16_e64 v5, exec_hi ; encoding: [0x05,0x00,0xf2,0xd5,0x7f,0x00,0x00,0x00] +0x01,0x08,0xf8,0xd5,0x02,0x01,0x00,0x00 +# GFX1250-REAL16: v_cvt_f16_bf8_e64 v1.l, v2 byte_sel:2 ; encoding: [0x01,0x08,0xf8,0xd5,0x02,0x01,0x00,0x00] +# GFX1250-FAKE16: v_cvt_f16_bf8_e64 v1, v2 byte_sel:2 ; encoding: [0x01,0x08,0xf8,0xd5,0x02,0x01,0x00,0x00] -0x05,0x00,0xf2,0xd5,0x7e,0x00,0x00,0x00 -# GFX1250: v_cvt_f32_bf16_e64 v5, exec_lo ; encoding: [0x05,0x00,0xf2,0xd5,0x7e,0x00,0x00,0x00] +0x01,0x18,0xf8,0xd5,0x02,0x01,0x00,0x00 +# GFX1250-REAL16: v_cvt_f16_bf8_e64 v1.l, v2 byte_sel:3 ; encoding: [0x01,0x18,0xf8,0xd5,0x02,0x01,0x00,0x00] +# GFX1250-FAKE16: v_cvt_f16_bf8_e64 v1, v2 byte_sel:3 ; encoding: [0x01,0x18,0xf8,0xd5,0x02,0x01,0x00,0x00] -0x05,0x00,0xf2,0xd5,0x7d,0x00,0x00,0x00 -# GFX1250: v_cvt_f32_bf16_e64 v5, m0 ; encoding: [0x05,0x00,0xf2,0xd5,0x7d,0x00,0x00,0x00] +0x96,0x00,0xf8,0xd5,0xff,0x00,0x00,0x00,0x34,0x12,0x00,0x00 +# GFX1250-REAL16: v_cvt_f16_bf8_e64 v150.l, 0x1234 ; encoding: [0x96,0x00,0xf8,0xd5,0xff,0x00,0x00,0x00,0x34,0x12,0x00,0x00] +# GFX1250-FAKE16: v_cvt_f16_bf8_e64 v150, 0x1234 ; encoding: [0x96,0x00,0xf8,0xd5,0xff,0x00,0x00,0x00,0x34,0x12,0x00,0x00] -0x05,0x00,0xf2,0xd5,0x7c,0x00,0x00,0x00 -# GFX1250: v_cvt_f32_bf16_e64 v5, null ; encoding: [0x05,0x00,0xf2,0xd5,0x7c,0x00,0x00,0x00] +0x96,0x00,0xf8,0xd5,0x82,0x00,0x00,0x00 +# GFX1250-REAL16: v_cvt_f16_bf8_e64 v150.l, 2 ; encoding: [0x96,0x00,0xf8,0xd5,0x82,0x00,0x00,0x00] +# GFX1250-FAKE16: v_cvt_f16_bf8_e64 v150, 2 ; encoding: [0x96,0x00,0xf8,0xd5,0x82,0x00,0x00,0x00] -0x05,0x00,0xf2,0xd5,0x01,0x00,0x00,0x00 -# GFX1250: v_cvt_f32_bf16_e64 v5, s1 ; encoding: [0x05,0x00,0xf2,0xd5,0x01,0x00,0x00,0x00] +0x96,0x00,0xf8,0xd5,0x02,0x00,0x00,0x00 +# GFX1250-REAL16: v_cvt_f16_bf8_e64 v150.l, s2 ; encoding: [0x96,0x00,0xf8,0xd5,0x02,0x00,0x00,0x00] +# GFX1250-FAKE16: v_cvt_f16_bf8_e64 v150, s2 ; encoding: [0x96,0x00,0xf8,0xd5,0x02,0x00,0x00,0x00] -0x05,0x00,0xf2,0xd5,0x69,0x00,0x00,0x00 -# GFX1250: v_cvt_f32_bf16_e64 v5, s105 ; encoding: [0x05,0x00,0xf2,0xd5,0x69,0x00,0x00,0x00] +0x96,0x00,0xf8,0xd5,0x02,0x01,0x00,0x00 +# GFX1250-REAL16: v_cvt_f16_bf8_e64 v150.l, v2 ; encoding: [0x96,0x00,0xf8,0xd5,0x02,0x01,0x00,0x00] +# GFX1250-FAKE16: v_cvt_f16_bf8_e64 v150, v2 ; encoding: [0x96,0x00,0xf8,0xd5,0x02,0x01,0x00,0x00] -0x05,0x00,0xf2,0xd5,0x7b,0x00,0x00,0x00 -# GFX1250: v_cvt_f32_bf16_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xf2,0xd5,0x7b,0x00,0x00,0x00] +0x80,0x40,0xf8,0xd5,0x02,0x01,0x00,0x00 +# GFX1250-REAL16: v_cvt_f16_bf8_e64 v128.h, v2 op_sel:[0,1] ; encoding: [0x80,0x40,0xf8,0xd5,0x02,0x01,0x00,0x00] +# GFX1250-FAKE16: v_cvt_f16_bf8_e64 v128, v2 op_sel:[0,1] ; encoding: [0x80,0x40,0xf8,0xd5,0x02,0x01,0x00,0x00] -0x05,0x00,0xf2,0xd5,0x01,0x01,0x00,0x00 -# GFX1250-REAL16: v_cvt_f32_bf16_e64 v5, v1.l ; encoding: [0x05,0x00,0xf2,0xd5,0x01,0x01,0x00,0x00] -# GFX1250-FAKE16: v_cvt_f32_bf16_e64 v5, v1 ; encoding: [0x05,0x00,0xf2,0xd5,0x01,0x01,0x00,0x00] - -0x05,0x00,0xf2,0xd5,0xff,0x01,0x00,0x00 -# GFX1250-REAL16: v_cvt_f32_bf16_e64 v5, v255.l ; encoding: [0x05,0x00,0xf2,0xd5,0xff,0x01,0x00,0x00] -# GFX1250-FAKE16: v_cvt_f32_bf16_e64 v5, v255 ; encoding: [0x05,0x00,0xf2,0xd5,0xff,0x01,0x00,0x00] - -0x05,0x00,0xf2,0xd5,0x6b,0x00,0x00,0x00 -# GFX1250: v_cvt_f32_bf16_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xf2,0xd5,0x6b,0x00,0x00,0x00] - -0x05,0x00,0xf2,0xd5,0x6a,0x00,0x00,0x00 -# GFX1250: v_cvt_f32_bf16_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xf2,0xd5,0x6a,0x00,0x00,0x00] - -0x05,0x08,0xf2,0xd5,0x01,0x01,0x00,0x00 -# GFX1250-REAL16: v_cvt_f32_bf16_e64 v5, v1.h op_sel:[1,0] ; encoding: [0x05,0x08,0xf2,0xd5,0x01,0x01,0x00,0x00] -# GFX1250-FAKE16: v_cvt_f32_bf16_e64 v5, v1 op_sel:[1,0] ; encoding: [0x05,0x08,0xf2,0xd5,0x01,0x01,0x00,0x00] - -0x05,0x08,0xf2,0xd5,0xff,0x01,0x00,0x00 -# GFX1250-REAL16: v_cvt_f32_bf16_e64 v5, v255.h op_sel:[1,0] ; encoding: [0x05,0x08,0xf2,0xd5,0xff,0x01,0x00,0x00] -# GFX1250-FAKE16: v_cvt_f32_bf16_e64 v5, v255 op_sel:[1,0] ; encoding: [0x05,0x08,0xf2,0xd5,0xff,0x01,0x00,0x00] +0x01,0x48,0xf8,0xd5,0x02,0x01,0x00,0x00 +# GFX1250-REAL16: v_cvt_f16_bf8_e64 v1.h, v2 op_sel:[0,1] byte_sel:2 ; encoding: [0x01,0x48,0xf8,0xd5,0x02,0x01,0x00,0x00] +# GFX1250-FAKE16: v_cvt_f16_bf8_e64 v1, v2 op_sel:[0,1] byte_sel:2 ; encoding: [0x01,0x48,0xf8,0xd5,0x02,0x01,0x00,0x00] 0x01,0x10,0xf7,0xd5,0x02,0x01,0x00,0x00 # GFX1250-REAL16: v_cvt_f16_fp8_e64 v1.l, v2 byte_sel:1 ; encoding: [0x01,0x10,0xf7,0xd5,0x02,0x01,0x00,0x00] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1_dpp16.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1_dpp16.txt index 57135e8356bfd..dedb25599eea8 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1_dpp16.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1_dpp16.txt @@ -50,6 +50,30 @@ # GFX1250-REAL16: v_cvt_f32_bf16_e64_dpp v5, v128.h op_sel:[1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0xf2,0xd5,0xfa,0x00,0x00,0x00,0x80,0x1b,0x00,0xff] # GFX1250-FAKE16: v_cvt_f32_bf16_e64_dpp v5, v128 op_sel:[1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0xf2,0xd5,0xfa,0x00,0x00,0x00,0x80,0x1b,0x00,0xff] +0x01,0x10,0xf8,0xd5,0xfa,0x00,0x00,0x00,0x02,0x39,0x00,0xff +# GFX1250-REAL16: v_cvt_f16_bf8_e64_dpp v1.l, v2 byte_sel:1 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x10,0xf8,0xd5,0xfa,0x00,0x00,0x00,0x02,0x39,0x00,0xff] +# GFX1250-FAKE16: v_cvt_f16_bf8_e64_dpp v1, v2 byte_sel:1 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x10,0xf8,0xd5,0xfa,0x00,0x00,0x00,0x02,0x39,0x00,0xff] + +0x01,0x08,0xf8,0xd5,0xfa,0x00,0x00,0x00,0x02,0x39,0x00,0xff +# GFX1250-REAL16: v_cvt_f16_bf8_e64_dpp v1.l, v2 byte_sel:2 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x08,0xf8,0xd5,0xfa,0x00,0x00,0x00,0x02,0x39,0x00,0xff] +# GFX1250-FAKE16: v_cvt_f16_bf8_e64_dpp v1, v2 byte_sel:2 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x08,0xf8,0xd5,0xfa,0x00,0x00,0x00,0x02,0x39,0x00,0xff] + +0x01,0x18,0xf8,0xd5,0xfa,0x00,0x00,0x00,0x02,0x39,0x00,0xff +# GFX1250-REAL16: v_cvt_f16_bf8_e64_dpp v1.l, v2 byte_sel:3 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x18,0xf8,0xd5,0xfa,0x00,0x00,0x00,0x02,0x39,0x00,0xff] +# GFX1250-FAKE16: v_cvt_f16_bf8_e64_dpp v1, v2 byte_sel:3 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x18,0xf8,0xd5,0xfa,0x00,0x00,0x00,0x02,0x39,0x00,0xff] + +0x96,0x00,0xf8,0xd5,0xfa,0x00,0x00,0x00,0x02,0x39,0x00,0xff +# GFX1250-REAL16: v_cvt_f16_bf8_e64_dpp v150.l, v2 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf ; encoding: [0x96,0x00,0xf8,0xd5,0xfa,0x00,0x00,0x00,0x02,0x39,0x00,0xff] +# GFX1250-FAKE16: v_cvt_f16_bf8_e64_dpp v150, v2 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf ; encoding: [0x96,0x00,0xf8,0xd5,0xfa,0x00,0x00,0x00,0x02,0x39,0x00,0xff] + +0x01,0x58,0xf8,0xd5,0xfa,0x00,0x00,0x00,0x02,0xe4,0x00,0xff +# GFX1250-REAL16: v_cvt_f16_bf8_e64_dpp v1.h, v2 op_sel:[0,1] byte_sel:3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x58,0xf8,0xd5,0xfa,0x00,0x00,0x00,0x02,0xe4,0x00,0xff] +# GFX1250-FAKE16: v_cvt_f16_bf8_e64_dpp v1, v2 op_sel:[0,1] byte_sel:3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x58,0xf8,0xd5,0xfa,0x00,0x00,0x00,0x02,0xe4,0x00,0xff] + +0x80,0x00,0xf8,0xd5,0xfa,0x00,0x00,0x00,0x02,0xe4,0x00,0xff +# GFX1250-REAL16: v_cvt_f16_bf8_e64_dpp v128.l, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x80,0x00,0xf8,0xd5,0xfa,0x00,0x00,0x00,0x02,0xe4,0x00,0xff] +# GFX1250-FAKE16: v_cvt_f16_bf8_e64_dpp v128, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x80,0x00,0xf8,0xd5,0xfa,0x00,0x00,0x00,0x02,0xe4,0x00,0xff] + 0x01,0x10,0xf7,0xd5,0xfa,0x00,0x00,0x00,0x02,0x39,0x00,0xff # GFX1250-REAL16: v_cvt_f16_fp8_e64_dpp v1.l, v2 byte_sel:1 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x10,0xf7,0xd5,0xfa,0x00,0x00,0x00,0x02,0x39,0x00,0xff] # GFX1250-FAKE16: v_cvt_f16_fp8_e64_dpp v1, v2 byte_sel:1 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x10,0xf7,0xd5,0xfa,0x00,0x00,0x00,0x02,0x39,0x00,0xff] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1_dpp8.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1_dpp8.txt index 9cd2bd088f889..3868abe246e5d 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1_dpp8.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1_dpp8.txt @@ -10,6 +10,34 @@ # GFX1250-REAL16: v_cvt_f32_bf16_e64_dpp v5, v128.h op_sel:[1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0xf2,0xd5,0xe9,0x00,0x00,0x00,0x80,0x77,0x39,0x05] # GFX1250-FAKE16: v_cvt_f32_bf16_e64_dpp v5, v128 op_sel:[1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0xf2,0xd5,0xe9,0x00,0x00,0x00,0x80,0x77,0x39,0x05] +0x01,0x10,0xf8,0xd5,0xe9,0x00,0x00,0x00,0x02,0x77,0x39,0x05 +# GFX1250-REAL16: v_cvt_f16_bf8_e64_dpp v1.l, v2 byte_sel:1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x01,0x10,0xf8,0xd5,0xe9,0x00,0x00,0x00,0x02,0x77,0x39,0x05] +# GFX1250-FAKE16: v_cvt_f16_bf8_e64_dpp v1, v2 byte_sel:1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x01,0x10,0xf8,0xd5,0xe9,0x00,0x00,0x00,0x02,0x77,0x39,0x05] + +0x01,0x08,0xf8,0xd5,0xe9,0x00,0x00,0x00,0x02,0x77,0x39,0x05 +# GFX1250-REAL16: v_cvt_f16_bf8_e64_dpp v1.l, v2 byte_sel:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x01,0x08,0xf8,0xd5,0xe9,0x00,0x00,0x00,0x02,0x77,0x39,0x05] +# GFX1250-FAKE16: v_cvt_f16_bf8_e64_dpp v1, v2 byte_sel:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x01,0x08,0xf8,0xd5,0xe9,0x00,0x00,0x00,0x02,0x77,0x39,0x05] + +0x01,0x18,0xf8,0xd5,0xe9,0x00,0x00,0x00,0x02,0x77,0x39,0x05 +# GFX1250-REAL16: v_cvt_f16_bf8_e64_dpp v1.l, v2 byte_sel:3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x01,0x18,0xf8,0xd5,0xe9,0x00,0x00,0x00,0x02,0x77,0x39,0x05] +# GFX1250-FAKE16: v_cvt_f16_bf8_e64_dpp v1, v2 byte_sel:3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x01,0x18,0xf8,0xd5,0xe9,0x00,0x00,0x00,0x02,0x77,0x39,0x05] + +0x96,0x18,0xf8,0xd5,0xea,0x00,0x00,0x00,0x02,0x77,0x39,0x05 +# GFX1250-REAL16: v_cvt_f16_bf8_e64_dpp v150.l, v2 byte_sel:3 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x96,0x18,0xf8,0xd5,0xea,0x00,0x00,0x00,0x02,0x77,0x39,0x05] +# GFX1250-FAKE16: v_cvt_f16_bf8_e64_dpp v150, v2 byte_sel:3 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x96,0x18,0xf8,0xd5,0xea,0x00,0x00,0x00,0x02,0x77,0x39,0x05] + +0x01,0x58,0xf8,0xd5,0xe9,0x00,0x00,0x00,0x02,0x77,0x39,0x05 +# GFX1250-REAL16: v_cvt_f16_bf8_e64_dpp v1.h, v2 op_sel:[0,1] byte_sel:3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x01,0x58,0xf8,0xd5,0xe9,0x00,0x00,0x00,0x02,0x77,0x39,0x05] +# GFX1250-FAKE16: v_cvt_f16_bf8_e64_dpp v1, v2 op_sel:[0,1] byte_sel:3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x01,0x58,0xf8,0xd5,0xe9,0x00,0x00,0x00,0x02,0x77,0x39,0x05] + +0x80,0x00,0xf8,0xd5,0xe9,0x00,0x00,0x00,0x02,0x77,0x39,0x05 +# GFX1250-REAL16: v_cvt_f16_bf8_e64_dpp v128.l, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x80,0x00,0xf8,0xd5,0xe9,0x00,0x00,0x00,0x02,0x77,0x39,0x05] +# GFX1250-FAKE16: v_cvt_f16_bf8_e64_dpp v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x80,0x00,0xf8,0xd5,0xe9,0x00,0x00,0x00,0x02,0x77,0x39,0x05] + +0x96,0x00,0xf8,0xd5,0xe9,0x00,0x00,0x00,0x02,0x77,0x39,0x05 +# GFX1250-REAL16: v_cvt_f16_bf8_e64_dpp v150.l, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x96,0x00,0xf8,0xd5,0xe9,0x00,0x00,0x00,0x02,0x77,0x39,0x05] +# GFX1250-FAKE16: v_cvt_f16_bf8_e64_dpp v150, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x96,0x00,0xf8,0xd5,0xe9,0x00,0x00,0x00,0x02,0x77,0x39,0x05] + 0x01,0x10,0xf7,0xd5,0xe9,0x00,0x00,0x00,0x02,0x77,0x39,0x05 # GFX1250-REAL16: v_cvt_f16_fp8_e64_dpp v1.l, v2 byte_sel:1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x01,0x10,0xf7,0xd5,0xe9,0x00,0x00,0x00,0x02,0x77,0x39,0x05] # GFX1250-FAKE16: v_cvt_f16_fp8_e64_dpp v1, v2 byte_sel:1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x01,0x10,0xf7,0xd5,0xe9,0x00,0x00,0x00,0x02,0x77,0x39,0x05] _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits