llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-clang @llvm/pr-subscribers-backend-amdgpu Author: Brox Chen (broxigarchen) <details> <summary>Changes</summary> Support true16 format for v_alignbyte_b32 in MC The operand type of v_alignbyte_b32 is `32_32_32_16`. For the last argument, in pre-GFX11 and post-GFX11(fake16) flow it takes 32bit input while in post-GFX11(true16) it takes16bits input. Update the operand type of `__builtin_amdgcn_alignbyte` so the last argument can be any interger type and use custom code to generate the right signature. --- Patch is 46.11 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/119750.diff 17 Files Affected: - (modified) clang/lib/CodeGen/CGBuiltin.cpp (+8) - (modified) clang/test/CodeGenOpenCL/builtins-amdgcn.cl (+1-1) - (modified) llvm/include/llvm/IR/IntrinsicsAMDGPU.td (+2-2) - (modified) llvm/lib/Target/AMDGPU/VOP3Instructions.td (+6-2) - (modified) llvm/test/MC/AMDGPU/gfx11_asm_vop3.s (+7-4) - (modified) llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16.s (+30-12) - (modified) llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8.s (+13-4) - (modified) llvm/test/MC/AMDGPU/gfx12_asm_vop3.s (+3) - (modified) llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp16.s (+3) - (modified) llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp8.s (+3) - (modified) llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3.txt (+14-2) - (modified) llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16.txt (+26-5) - (modified) llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8.txt (+14-2) - (modified) llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3.txt (+14-2) - (modified) llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp16.txt (+30-6) - (modified) llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp8.txt (+18-3) - (modified) llvm/unittests/IR/IntrinsicsTest.cpp (-1) ``````````diff diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 4d4b7428abd505..4ea8c5eea15769 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -20219,6 +20219,14 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, Function *F = CGM.getIntrinsic(Intrinsic::fshr, Src0->getType()); return Builder.CreateCall(F, { Src0, Src1, Src2 }); } + case AMDGPU::BI__builtin_amdgcn_alignbyte: { + llvm::Value *Src0 = EmitScalarExpr(E->getArg(0)); + llvm::Value *Src1 = EmitScalarExpr(E->getArg(1)); + llvm::Value *Src2 = EmitScalarExpr(E->getArg(2)); + llvm::Function *F = + CGM.getIntrinsic(Intrinsic::amdgcn_alignbyte, Src2->getType()); + return Builder.CreateCall(F, {Src0, Src1, Src2}); + } case AMDGPU::BI__builtin_amdgcn_fence: { ProcessOrderScopeAMDGCN(EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1)), AO, SSID); diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn.cl index ded5f6b5ac4fd3..db816da45b8b35 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn.cl @@ -734,7 +734,7 @@ kernel void test_alignbit(global uint* out, uint src0, uint src1, uint src2) { } // CHECK-LABEL: @test_alignbyte( -// CHECK: tail call{{.*}} i32 @llvm.amdgcn.alignbyte(i32 %src0, i32 %src1, i32 %src2) +// CHECK: tail call{{.*}} i32 @llvm.amdgcn.alignbyte.i32(i32 %src0, i32 %src1, i32 %src2) kernel void test_alignbyte(global uint* out, uint src0, uint src1, uint src2) { *out = __builtin_amdgcn_alignbyte(src0, src1, src2); } diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td index 92418b9104ad14..b07e90a83e8613 100644 --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -2353,8 +2353,8 @@ def int_amdgcn_writelane : [IntrNoMem, IntrConvergent, IntrWillReturn, IntrNoCallback, IntrNoFree] >; -def int_amdgcn_alignbyte : ClangBuiltin<"__builtin_amdgcn_alignbyte">, - DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], +def int_amdgcn_alignbyte : DefaultAttrsIntrinsic<[llvm_i32_ty], + [llvm_i32_ty, llvm_i32_ty, llvm_anyint_ty], [IntrNoMem, IntrSpeculatable] >; diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td index 34f90b33bc4ba4..ba5da9000879ae 100644 --- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td @@ -212,7 +212,11 @@ defm V_BFE_U32 : VOP3Inst <"v_bfe_u32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGP defm V_BFE_I32 : VOP3Inst <"v_bfe_i32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGPUbfe_i32>; defm V_BFI_B32 : VOP3Inst <"v_bfi_b32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGPUbfi>; defm V_ALIGNBIT_B32 : VOP3Inst <"v_alignbit_b32", VOP3_Profile<VOP_I32_I32_I32_I32>, fshr>; -defm V_ALIGNBYTE_B32 : VOP3Inst <"v_alignbyte_b32", VOP3_Profile<VOP_I32_I32_I32_I32>, int_amdgcn_alignbyte>; +defm V_ALIGNBYTE_B32 : VOP3Inst_t16_with_profiles <"v_alignbyte_b32", + VOP3_Profile<VOP_I32_I32_I32_I32>, + VOP3_Profile_True16<VOP_I32_I32_I32_I16, VOP3_OPSEL>, + VOP3_Profile_Fake16<VOP_I32_I32_I32_I16, VOP3_OPSEL>, + int_amdgcn_alignbyte>; // XXX - No FPException seems suspect but manual doesn't say it does let mayRaiseFPException = 0 in { @@ -1679,7 +1683,7 @@ defm V_FMA_F32 : VOP3_Realtriple_gfx11_gfx12<0x213>; defm V_FMA_F64 : VOP3_Real_Base_gfx11_gfx12<0x214>; defm V_LERP_U8 : VOP3_Realtriple_gfx11_gfx12<0x215>; defm V_ALIGNBIT_B32 : VOP3_Realtriple_gfx11_gfx12<0x216>; -defm V_ALIGNBYTE_B32 : VOP3_Realtriple_gfx11_gfx12<0x217>; +defm V_ALIGNBYTE_B32 : VOP3_Realtriple_t16_and_fake16_gfx11_gfx12<0x217, "v_alignbyte_b32">; defm V_MULLIT_F32 : VOP3_Realtriple_gfx11_gfx12<0x218>; defm V_MIN3_F32 : VOP3_Realtriple_gfx11<0x219>; defm V_MIN3_I32 : VOP3_Realtriple_gfx11_gfx12<0x21a>; diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s index d46f010a2dafbd..1dbdb9ed5b6fda 100644 --- a/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s @@ -461,11 +461,11 @@ v_alignbyte_b32 v5, s1, v255, s3 v_alignbyte_b32 v5, s105, s105, s105 // GFX11: v_alignbyte_b32 v5, s105, s105, s105 ; encoding: [0x05,0x00,0x17,0xd6,0x69,0xd2,0xa4,0x01] -v_alignbyte_b32 v5, vcc_lo, ttmp15, v3 -// GFX11: v_alignbyte_b32 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x17,0xd6,0x6a,0xf6,0x0c,0x04] +v_alignbyte_b32 v5, vcc_lo, ttmp15, v3.l +// GFX11: v_alignbyte_b32 v5, vcc_lo, ttmp15, v3.l ; encoding: [0x05,0x00,0x17,0xd6,0x6a,0xf6,0x0c,0x04] -v_alignbyte_b32 v5, vcc_hi, 0xaf123456, v255 -// GFX11: v_alignbyte_b32 v5, vcc_hi, 0xaf123456, v255 ; encoding: [0x05,0x00,0x17,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] +v_alignbyte_b32 v5, vcc_hi, 0xaf123456, v255.l +// GFX11: v_alignbyte_b32 v5, vcc_hi, 0xaf123456, v255.l ; encoding: [0x05,0x00,0x17,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] v_alignbyte_b32 v5, ttmp15, src_scc, ttmp15 // GFX11: v_alignbyte_b32 v5, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x17,0xd6,0x7b,0xfa,0xed,0x01] @@ -494,6 +494,9 @@ v_alignbyte_b32 v5, src_scc, vcc_lo, -1 v_alignbyte_b32 v255, 0xaf123456, vcc_hi, null // GFX11: v_alignbyte_b32 v255, 0xaf123456, vcc_hi, null ; encoding: [0xff,0x00,0x17,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] +v_alignbyte_b32 v5, vcc_hi, 0xaf123456, v255.h +// GFX11: v_alignbyte_b32 v5, vcc_hi, 0xaf123456, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x17,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + v_and_b16 v5.l, v1.l, v2.l // GFX11: v_and_b16 v5.l, v1.l, v2.l ; encoding: [0x05,0x00,0x62,0xd7,0x01,0x05,0x02,0x00] diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16.s index 36d959faa99840..64a244759870bd 100644 --- a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16.s @@ -363,22 +363,22 @@ v_alignbit_b32_e64_dpp v5, v1, v2, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bou v_alignbit_b32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 // GFX11: v_alignbit_b32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0x16,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] -v_alignbyte_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] -// GFX11: v_alignbyte_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_alignbyte_b32_e64_dpp v5, v1, v2, v3.l quad_perm:[3,2,1,0] +// GFX11: v_alignbyte_b32_e64_dpp v5, v1, v2, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_alignbyte_b32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] -// GFX11: v_alignbyte_b32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_alignbyte_b32_e64_dpp v5, v1, v2, v3.l quad_perm:[0,1,2,3] +// GFX11: v_alignbyte_b32_e64_dpp v5, v1, v2, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -v_alignbyte_b32_e64_dpp v5, v1, v2, v3 row_mirror -// GFX11: v_alignbyte_b32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +v_alignbyte_b32_e64_dpp v5, v1, v2, v3.l row_mirror row_mask:0xf bank_mask:0xf +// GFX11: v_alignbyte_b32_e64_dpp v5, v1, v2, v3.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] -v_alignbyte_b32_e64_dpp v5, v1, v2, v3 row_half_mirror -// GFX11: v_alignbyte_b32_e64_dpp v5, v1, v2, v3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] +v_alignbyte_b32_e64_dpp v5, v1, v2, v3.l row_half_mirror row_mask:0xf bank_mask:0xf +// GFX11: v_alignbyte_b32_e64_dpp v5, v1, v2, v3.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] -v_alignbyte_b32_e64_dpp v5, v1, v2, v255 row_shl:1 -// GFX11: v_alignbyte_b32_e64_dpp v5, v1, v2, v255 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x01,0x01,0xff] +v_alignbyte_b32_e64_dpp v5, v1, v2, v255.l row_shl:1 row_mask:0xf bank_mask:0xf +// GFX11: v_alignbyte_b32_e64_dpp v5, v1, v2, v255.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x01,0x01,0xff] -v_alignbyte_b32_e64_dpp v5, v1, v2, s105 row_shl:15 +v_alignbyte_b32_e64_dpp v5, v1, v2, s105 row_shl:15 row_mask:0xf bank_mask:0xf // GFX11: v_alignbyte_b32_e64_dpp v5, v1, v2, s105 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x0f,0x01,0xff] v_alignbyte_b32_e64_dpp v5, v1, v2, vcc_hi row_shr:1 @@ -387,7 +387,7 @@ v_alignbyte_b32_e64_dpp v5, v1, v2, vcc_hi row_shr:1 v_alignbyte_b32_e64_dpp v5, v1, v2, vcc_lo row_shr:15 // GFX11: v_alignbyte_b32_e64_dpp v5, v1, v2, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] -v_alignbyte_b32_e64_dpp v5, v1, v2, ttmp15 row_ror:1 +v_alignbyte_b32_e64_dpp v5, v1, v2, ttmp15 row_ror:1 row_mask:0xf bank_mask:0xf // GFX11: v_alignbyte_b32_e64_dpp v5, v1, v2, ttmp15 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0xee,0x01,0x01,0x21,0x01,0xff] v_alignbyte_b32_e64_dpp v5, v1, v2, exec_hi row_ror:15 @@ -405,6 +405,24 @@ v_alignbyte_b32_e64_dpp v5, v1, v2, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bo v_alignbyte_b32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 // GFX11: v_alignbyte_b32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0x17,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] +v_alignbyte_b32_e64_dpp v5, v1, v2, v255.l row_mirror +// GFX11: v_alignbyte_b32_e64_dpp v5, v1, v2, v255.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] + +v_alignbyte_b32_e64_dpp v5, v1, v2, s3 row_half_mirror +// GFX11: v_alignbyte_b32_e64_dpp v5, v1, v2, s3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff] + +v_alignbyte_b32_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX11: v_alignbyte_b32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_alignbyte_b32_e64_dpp v5, v1, v2, ttmp15 row_shl:15 +// GFX11: v_alignbyte_b32_e64_dpp v5, v1, v2, ttmp15 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff] + +v_alignbyte_b32_e64_dpp v5, v1, v2, m0 row_ror:1 +// GFX11: v_alignbyte_b32_e64_dpp v5, v1, v2, m0 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x21,0x01,0xff] + +v_alignbyte_b32_e64_dpp v5, v1, v2, v255.h row_mirror +// GFX11: v_alignbyte_b32_e64_dpp v5, v1, v2, v255.h op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x17,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] + v_and_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] // GFX11: v_and_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8.s index 479bd19eaac896..87e141fa217bd0 100644 --- a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8.s @@ -187,11 +187,11 @@ v_alignbit_b32_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] fi:1 v_alignbit_b32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX11: v_alignbit_b32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x00,0x16,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] -v_alignbyte_b32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_alignbyte_b32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x17,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +v_alignbyte_b32_e64_dpp v5, v1, v2, v3.l dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_alignbyte_b32_e64_dpp v5, v1, v2, v3.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x17,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] -v_alignbyte_b32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_alignbyte_b32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x17,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +v_alignbyte_b32_e64_dpp v5, v1, v2, v255.l dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_alignbyte_b32_e64_dpp v5, v1, v2, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x17,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] v_alignbyte_b32_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] // GFX11: v_alignbyte_b32_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x17,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] @@ -220,6 +220,15 @@ v_alignbyte_b32_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] fi:1 v_alignbyte_b32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX11: v_alignbyte_b32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x00,0x17,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +v_alignbyte_b32_e64_dpp v5, v1, v2, s3 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_alignbyte_b32_e64_dpp v5, v1, v2, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x17,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] + +v_alignbyte_b32_e64_dpp v5, v1, v2, m0 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_alignbyte_b32_e64_dpp v5, v1, v2, m0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x17,0xd6,0xe9,0x04,0xf6,0x01,0x01,0x77,0x39,0x05] + +v_alignbyte_b32_e64_dpp v5, v1, v2, v255.h dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_alignbyte_b32_e64_dpp v5, v1, v2, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x17,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + v_and_b16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] // GFX11: v_and_b16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x62,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop3.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop3.s index 62bebd00ee51f8..e3e12a14a8d989 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_vop3.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop3.s @@ -452,6 +452,9 @@ v_alignbyte_b32 v5, src_scc, vcc_lo, -1 v_alignbyte_b32 v255, 0xaf123456, vcc_hi, null // GFX12: v_alignbyte_b32 v255, 0xaf123456, vcc_hi, null ; encoding: [0xff,0x00,0x17,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] +v_alignbyte_b32 v5, vcc_hi, 0xaf123456, v255.h +// GFX12: v_alignbyte_b32 v5, vcc_hi, 0xaf123456, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x17,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + v_and_b16 v5.l, v1.l, v2.l // GFX12: v_and_b16 v5.l, v1.l, v2.l ; encoding: [0x05,0x00,0x62,0xd7,0x01,0x05,0x02,0x00] diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp16.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp16.s index e7b12ec2deff5d..e6b40965e83c38 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp16.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp16.s @@ -485,6 +485,9 @@ v_alignbyte_b32_e64_dpp v5, v1, v2, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bo v_alignbyte_b32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 // GFX12: v_alignbyte_b32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0x17,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] +v_alignbyte_b32_e64_dpp v5, v1, v2, v255.h row_mirror +// GFX12: v_alignbyte_b32_e64_dpp v5, v1, v2, v255.h op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x17,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] + v_and_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] // GFX12: v_and_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp8.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp8.s index b879e59b3608b4..143505ed81fceb 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp8.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp8.s @@ -288,6 +288,9 @@ v_alignbyte_b32_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] fi:1 v_alignbyte_b32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX12: v_alignbyte_b32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x00,0x17,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +v_alignbyte_b32_e64_dpp v5, v1, v2, v255.h dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_alignbyte_b32_e64_dpp v5, v1, v2, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x17,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + v_and_b16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] // GFX12: v_and_b16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x62,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3.txt index 857da7cd58cfb6..a756c6f4e76279 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3.txt @@ -508,10 +508,16 @@ # GFX11: v_alignbyte_b32 v5, s105, s105, s105 ; encoding: [0x05,0x00,0x17,0xd6,0x69,0xd2,0xa4,0x01] 0x05,0x00,0x17,0xd6,0x6a,0xf6,0x0c,0x04 -# GFX11: v_alignbyte_b32 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x17,0xd6,0x6a,0xf6,0x0c,0x04] +# W32-REAL16: v_alignbyte_b32 v5, vcc_lo, ttmp15, v3.l ; encoding: [0x05,0x00,0x17,0xd6,0x6a,0xf6,0x0c,0x04] +# W32-FAKE16: v_alignbyte_b32 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x17,0xd6,0x6a,0xf6,0x0c,0x04] +# W64-REAL16: v_alignbyte_b32 v5, vcc_lo, ttmp15, v3.l ; encoding: [0x05,0x00,0x17,0xd6,0x6a,0xf6,0x0c,0x04] +# W64-FAKE16: v_alignbyte_b32 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x17,0xd6,0x6a,0xf6,0x0c,0x04] 0x05,0x00,0x17,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf -# GFX11: v_alignbyte_b32 v5, vcc_hi, 0xaf123456, v255 ; encoding: [0x05,0x00,0x17,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] +# W32-REAL16: v_alignbyte_b32 v5, vcc_hi, 0xaf123456, v255.l ; encoding: [0x05,0x00,0x17,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] +# W32-FAKE16: v_alignbyte_b32 v5, vcc_hi, 0xaf123456, v255 ; encoding: [0x05,0x00,0x17,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] +# W64-REAL16: v_alignbyte_b32 v5, vcc_hi, 0xaf123456, v255.l ; encoding: [0x05,0x00,0x17,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] +# W64-FAKE16: v_alignbyte_b32 v5, vcc_hi, 0xaf123456, v255 ; encoding: [0x05,0x00,0x17,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] 0x05,0x00,0x17,0xd6,0x7b,0xfa,0xed,0x01 # GFX11: v_alignbyte_b32 v5, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x17,0xd6,0x7b,0xfa,0xed,0x01] @@ -540,6 +546,12 @@ 0xff,0x00,0x17,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf # GFX11: v_alignbyte_b32 v255, 0xaf123456, vcc_hi, null ; encoding: [0xff,0x00,0x17,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] +0x05,0x20,0x17,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf +# W32-REAL16: v_alignbyte_b32 v5, vcc_hi, 0xaf123456, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x17,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] +# W32-FAKE16: v_alignbyte_b32 v5, vcc_hi, 0xaf123456, v255 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x17,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] +# W64-REAL16: v_alignbyte_b32 v5, vcc_hi, 0xaf123456, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x17,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] +# W64-FAKE16: v_alignbyte_b32 v5, vcc_hi, 0xaf123456, v255 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x17,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x3... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/119750 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits