jrbyrnes created this revision. jrbyrnes added reviewers: rampitec, arsenm. Herald added subscribers: kosarev, foad, kerbowa, hiraditya, tpr, dstuttard, yaxunl, jvesely, kzhuravl. Herald added a project: All. jrbyrnes requested review of this revision. Herald added subscribers: llvm-commits, cfe-commits, wdng. Herald added projects: clang, LLVM.
Add builtins which accept floats for these instructions. A user is requesting to have permlane builtins for floats without use of casts. Repository: rG LLVM Github Monorepo https://reviews.llvm.org/D147732 Files: clang/include/clang/Basic/BuiltinsAMDGPU.def clang/test/CodeGenOpenCL/builtins-amdgcn-gfx10.cl clang/test/SemaOpenCL/builtins-amdgcn-error-gfx10-param.cl llvm/include/llvm/IR/IntrinsicsAMDGPU.td llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td llvm/lib/Target/AMDGPU/VOP3Instructions.td
Index: llvm/lib/Target/AMDGPU/VOP3Instructions.td =================================================================== --- llvm/lib/Target/AMDGPU/VOP3Instructions.td +++ llvm/lib/Target/AMDGPU/VOP3Instructions.td @@ -663,7 +663,9 @@ let OtherPredicates = [HasMADIntraFwdBug], SubtargetPredicate = isGFX11Only in defm : IMAD32_Pats<V_MAD_U64_U32_gfx11_e64>; -def VOP3_PERMLANE_Profile : VOP3_Profile<VOPProfile <[i32, i32, i32, i32]>, VOP3_OPSEL> { + + +class VOP3_PERMLANE_Profile<ValueType vt> : VOP3_Profile<VOPProfile <[vt, vt, i32, i32]>, VOP3_OPSEL> { let InsVOP3OpSel = (ins IntOpSelMods:$src0_modifiers, VRegSrc_32:$src0, IntOpSelMods:$src1_modifiers, SSrc_b32:$src1, IntOpSelMods:$src2_modifiers, SSrc_b32:$src2, @@ -679,9 +681,9 @@ def gi_opsel_i1timm : GICustomOperandRenderer<"renderOpSelTImm">, GISDNodeXFormEquiv<opsel_i1timm>; -class PermlanePat<SDPatternOperator permlane, +class PermlanePat<ValueType vt, SDPatternOperator permlane, Instruction inst> : GCNPat< - (permlane i32:$vdst_in, i32:$src0, i32:$src1, i32:$src2, + (permlane vt:$vdst_in, vt:$src0, i32:$src1, i32:$src2, timm:$fi, timm:$bc), (inst (opsel_i1timm $fi), VGPR_32:$src0, (opsel_i1timm $bc), SCSrc_b32:$src1, 0, SCSrc_b32:$src2, VGPR_32:$vdst_in) @@ -695,12 +697,17 @@ def : ThreeOp_i32_Pats<xor, xor, V_XOR3_B32_e64>; let Constraints = "$vdst = $vdst_in", DisableEncoding="$vdst_in" in { - defm V_PERMLANE16_B32 : VOP3Inst<"v_permlane16_b32", VOP3_PERMLANE_Profile>; - defm V_PERMLANEX16_B32 : VOP3Inst<"v_permlanex16_b32", VOP3_PERMLANE_Profile>; + defm V_PERMLANE16_B32 : VOP3Inst<"v_permlane16_b32", VOP3_PERMLANE_Profile<i32>>; + defm V_PERMLANEX16_B32 : VOP3Inst<"v_permlanex16_b32", VOP3_PERMLANE_Profile<i32>>; + defm V_PERMLANE16_F32_B32 : VOP3Inst<"v_permlane16_b32", VOP3_PERMLANE_Profile<f32>>; + defm V_PERMLANEX16_F32_B32 : VOP3Inst<"v_permlanex16_b32", VOP3_PERMLANE_Profile<f32>>; } // End $vdst = $vdst_in, DisableEncoding $vdst_in - def : PermlanePat<int_amdgcn_permlane16, V_PERMLANE16_B32_e64>; - def : PermlanePat<int_amdgcn_permlanex16, V_PERMLANEX16_B32_e64>; + def : PermlanePat<i32, int_amdgcn_permlane16, V_PERMLANE16_B32_e64>; + def : PermlanePat<i32, int_amdgcn_permlanex16, V_PERMLANEX16_B32_e64>; + def : PermlanePat<f32, int_amdgcn_permlane16_f32, V_PERMLANE16_F32_B32_e64>; + def : PermlanePat<f32, int_amdgcn_permlanex16_f32, V_PERMLANEX16_F32_B32_e64>; + defm V_ADD_NC_U16 : VOP3Inst <"v_add_nc_u16", VOP3_Profile<VOP_I16_I16_I16, VOP3_OPSEL>, add>; defm V_SUB_NC_U16 : VOP3Inst <"v_sub_nc_u16", VOP3_Profile<VOP_I16_I16_I16, VOP3_OPSEL>, sub>; Index: llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td +++ llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td @@ -303,6 +303,8 @@ def : SourceOfDivergence<int_amdgcn_ds_ordered_swap>; def : SourceOfDivergence<int_amdgcn_permlane16>; def : SourceOfDivergence<int_amdgcn_permlanex16>; +def : SourceOfDivergence<int_amdgcn_permlane16_f32>; +def : SourceOfDivergence<int_amdgcn_permlanex16_f32>; def : SourceOfDivergence<int_amdgcn_mov_dpp>; def : SourceOfDivergence<int_amdgcn_mov_dpp8>; def : SourceOfDivergence<int_amdgcn_update_dpp>; Index: llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -2990,7 +2990,9 @@ applyDefaultMapping(OpdMapper); return; case Intrinsic::amdgcn_permlane16: - case Intrinsic::amdgcn_permlanex16: { + case Intrinsic::amdgcn_permlanex16: + case Intrinsic::amdgcn_permlane16_f32: + case Intrinsic::amdgcn_permlanex16_f32: { // Doing a waterfall loop over these wouldn't make any sense. substituteSimpleCopyRegs(OpdMapper, 2); substituteSimpleCopyRegs(OpdMapper, 3); @@ -4367,7 +4369,9 @@ break; } case Intrinsic::amdgcn_permlane16: - case Intrinsic::amdgcn_permlanex16: { + case Intrinsic::amdgcn_permlanex16: + case Intrinsic::amdgcn_permlane16_f32: + case Intrinsic::amdgcn_permlanex16_f32: { unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI); OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size); OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size); Index: llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp @@ -892,7 +892,9 @@ return IC.replaceOperand(II, 0, UndefValue::get(Old->getType())); } case Intrinsic::amdgcn_permlane16: - case Intrinsic::amdgcn_permlanex16: { + case Intrinsic::amdgcn_permlanex16: + case Intrinsic::amdgcn_permlane16_f32: + case Intrinsic::amdgcn_permlanex16_f32: { // Discard vdst_in if it's not going to be read. Value *VDstIn = II.getArgOperand(0); if (isa<UndefValue>(VDstIn)) Index: llvm/include/llvm/IR/IntrinsicsAMDGPU.td =================================================================== --- llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -1950,6 +1950,24 @@ [IntrNoMem, IntrConvergent, IntrWillReturn, ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>, IntrNoCallback, IntrNoFree]>; + +// llvm.amdgcn.permlane16.f32 <old> <src0> <src1> <src2> <fi> <bound_control> +def int_amdgcn_permlane16_f32 : ClangBuiltin<"__builtin_amdgcn_permlane16_f32">, + Intrinsic<[llvm_float_ty], + [llvm_float_ty, llvm_float_ty, llvm_i32_ty, llvm_i32_ty, llvm_i1_ty, llvm_i1_ty], + [IntrNoMem, IntrConvergent, IntrWillReturn, + ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>, IntrNoCallback, IntrNoFree]>; + +// llvm.amdgcn.permlanex16.f32 <old> <src0> <src1> <src2> <fi> <bound_control> +def int_amdgcn_permlanex16_f32 : ClangBuiltin<"__builtin_amdgcn_permlanex16_f32">, + Intrinsic<[llvm_float_ty], + [llvm_float_ty, llvm_float_ty, llvm_i32_ty, llvm_i32_ty, llvm_i1_ty, llvm_i1_ty], + [IntrNoMem, IntrConvergent, IntrWillReturn, + ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>, IntrNoCallback, IntrNoFree]>; + + + + // llvm.amdgcn.mov.dpp8.i32 <src> <sel> // <sel> is a 32-bit constant whose high 8 bits must be zero which selects // the lanes to read from. Index: clang/test/SemaOpenCL/builtins-amdgcn-error-gfx10-param.cl =================================================================== --- clang/test/SemaOpenCL/builtins-amdgcn-error-gfx10-param.cl +++ clang/test/SemaOpenCL/builtins-amdgcn-error-gfx10-param.cl @@ -13,6 +13,18 @@ *out = __builtin_amdgcn_permlanex16(a, b, c, d, 1, e); // expected-error{{argument to '__builtin_amdgcn_permlanex16' must be a constant integer}} } + +void test_permlane16_f32(global float* out, float a, float b, uint c, uint d, uint e) { + *out = __builtin_amdgcn_permlane16_f32(a, b, c, d, e, 1); // expected-error{{argument to '__builtin_amdgcn_permlane16_f32' must be a constant integer}} + *out = __builtin_amdgcn_permlane16_f32(a, b, c, d, 1, e); // expected-error{{argument to '__builtin_amdgcn_permlane16_f32' must be a constant integer}} +} + +void test_permlanex16_f32(global float* out, float a, float b, uint c, uint d, uint e) { + *out = __builtin_amdgcn_permlanex16_f32(a, b, c, d, e, 1); // expected-error{{argument to '__builtin_amdgcn_permlanex16_f32' must be a constant integer}} + *out = __builtin_amdgcn_permlanex16_f32(a, b, c, d, 1, e); // expected-error{{argument to '__builtin_amdgcn_permlanex16_f32' must be a constant integer}} +} + + void test_mov_dpp8(global uint* out, uint a, uint b) { *out = __builtin_amdgcn_mov_dpp8(a, b); // expected-error{{argument to '__builtin_amdgcn_mov_dpp8' must be a constant integer}} } Index: clang/test/CodeGenOpenCL/builtins-amdgcn-gfx10.cl =================================================================== --- clang/test/CodeGenOpenCL/builtins-amdgcn-gfx10.cl +++ clang/test/CodeGenOpenCL/builtins-amdgcn-gfx10.cl @@ -18,6 +18,19 @@ *out = __builtin_amdgcn_permlanex16(a, b, c, d, 0, 0); } +// CHECK-LABEL: @test_permlane16_f32( +// CHECK: call float @llvm.amdgcn.permlane16.f32(float %a, float %b, i32 %c, i32 %d, i1 false, i1 false) +void test_permlane16_f32(global float* out, float a, float b, uint c, uint d) { + *out = __builtin_amdgcn_permlane16_f32(a, b, c, d, 0, 0); +} + +// CHECK-LABEL: @test_permlanex16_f32( +// CHECK: call float @llvm.amdgcn.permlanex16.f32(float %a, float %b, i32 %c, i32 %d, i1 false, i1 false) +void test_permlanex16_f32(global float* out, float a, float b, uint c, uint d) { + *out = __builtin_amdgcn_permlanex16_f32(a, b, c, d, 0, 0); +} + + // CHECK-LABEL: @test_mov_dpp8( // CHECK: call i32 @llvm.amdgcn.mov.dpp8.i32(i32 %a, i32 1) void test_mov_dpp8(global uint* out, uint a) { Index: clang/include/clang/Basic/BuiltinsAMDGPU.def =================================================================== --- clang/include/clang/Basic/BuiltinsAMDGPU.def +++ clang/include/clang/Basic/BuiltinsAMDGPU.def @@ -255,6 +255,8 @@ //===----------------------------------------------------------------------===// TARGET_BUILTIN(__builtin_amdgcn_permlane16, "UiUiUiUiUiIbIb", "nc", "gfx10-insts") TARGET_BUILTIN(__builtin_amdgcn_permlanex16, "UiUiUiUiUiIbIb", "nc", "gfx10-insts") +TARGET_BUILTIN(__builtin_amdgcn_permlane16_f32, "fffUiUiIbIb", "nc", "gfx10-insts") +TARGET_BUILTIN(__builtin_amdgcn_permlanex16_f32, "fffUiUiIbIb", "nc", "gfx10-insts") TARGET_BUILTIN(__builtin_amdgcn_mov_dpp8, "UiUiIUi", "nc", "gfx10-insts") //===----------------------------------------------------------------------===//
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits