jrbyrnes created this revision.
jrbyrnes added reviewers: rampitec, arsenm.
Herald added subscribers: kosarev, foad, kerbowa, hiraditya, tpr, dstuttard,
yaxunl, jvesely, kzhuravl.
Herald added a project: All.
jrbyrnes requested review of this revision.
Herald added subscribers: llvm-commits, cfe-commits, wdng.
Herald added projects: clang, LLVM.
Add builtins which accept floats for these instructions. A user is requesting
to have permlane builtins for floats without use of casts.
Repository:
rG LLVM Github Monorepo
https://reviews.llvm.org/D147732
Files:
clang/include/clang/Basic/BuiltinsAMDGPU.def
clang/test/CodeGenOpenCL/builtins-amdgcn-gfx10.cl
clang/test/SemaOpenCL/builtins-amdgcn-error-gfx10-param.cl
llvm/include/llvm/IR/IntrinsicsAMDGPU.td
llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td
llvm/lib/Target/AMDGPU/VOP3Instructions.td
Index: llvm/lib/Target/AMDGPU/VOP3Instructions.td
===================================================================
--- llvm/lib/Target/AMDGPU/VOP3Instructions.td
+++ llvm/lib/Target/AMDGPU/VOP3Instructions.td
@@ -663,7 +663,9 @@
let OtherPredicates = [HasMADIntraFwdBug], SubtargetPredicate = isGFX11Only in
defm : IMAD32_Pats<V_MAD_U64_U32_gfx11_e64>;
-def VOP3_PERMLANE_Profile : VOP3_Profile<VOPProfile <[i32, i32, i32, i32]>, VOP3_OPSEL> {
+
+
+class VOP3_PERMLANE_Profile<ValueType vt> : VOP3_Profile<VOPProfile <[vt, vt, i32, i32]>, VOP3_OPSEL> {
let InsVOP3OpSel = (ins IntOpSelMods:$src0_modifiers, VRegSrc_32:$src0,
IntOpSelMods:$src1_modifiers, SSrc_b32:$src1,
IntOpSelMods:$src2_modifiers, SSrc_b32:$src2,
@@ -679,9 +681,9 @@
def gi_opsel_i1timm : GICustomOperandRenderer<"renderOpSelTImm">,
GISDNodeXFormEquiv<opsel_i1timm>;
-class PermlanePat<SDPatternOperator permlane,
+class PermlanePat<ValueType vt, SDPatternOperator permlane,
Instruction inst> : GCNPat<
- (permlane i32:$vdst_in, i32:$src0, i32:$src1, i32:$src2,
+ (permlane vt:$vdst_in, vt:$src0, i32:$src1, i32:$src2,
timm:$fi, timm:$bc),
(inst (opsel_i1timm $fi), VGPR_32:$src0, (opsel_i1timm $bc),
SCSrc_b32:$src1, 0, SCSrc_b32:$src2, VGPR_32:$vdst_in)
@@ -695,12 +697,17 @@
def : ThreeOp_i32_Pats<xor, xor, V_XOR3_B32_e64>;
let Constraints = "$vdst = $vdst_in", DisableEncoding="$vdst_in" in {
- defm V_PERMLANE16_B32 : VOP3Inst<"v_permlane16_b32", VOP3_PERMLANE_Profile>;
- defm V_PERMLANEX16_B32 : VOP3Inst<"v_permlanex16_b32", VOP3_PERMLANE_Profile>;
+ defm V_PERMLANE16_B32 : VOP3Inst<"v_permlane16_b32", VOP3_PERMLANE_Profile<i32>>;
+ defm V_PERMLANEX16_B32 : VOP3Inst<"v_permlanex16_b32", VOP3_PERMLANE_Profile<i32>>;
+ defm V_PERMLANE16_F32_B32 : VOP3Inst<"v_permlane16_b32", VOP3_PERMLANE_Profile<f32>>;
+ defm V_PERMLANEX16_F32_B32 : VOP3Inst<"v_permlanex16_b32", VOP3_PERMLANE_Profile<f32>>;
} // End $vdst = $vdst_in, DisableEncoding $vdst_in
- def : PermlanePat<int_amdgcn_permlane16, V_PERMLANE16_B32_e64>;
- def : PermlanePat<int_amdgcn_permlanex16, V_PERMLANEX16_B32_e64>;
+ def : PermlanePat<i32, int_amdgcn_permlane16, V_PERMLANE16_B32_e64>;
+ def : PermlanePat<i32, int_amdgcn_permlanex16, V_PERMLANEX16_B32_e64>;
+ def : PermlanePat<f32, int_amdgcn_permlane16_f32, V_PERMLANE16_F32_B32_e64>;
+ def : PermlanePat<f32, int_amdgcn_permlanex16_f32, V_PERMLANEX16_F32_B32_e64>;
+
defm V_ADD_NC_U16 : VOP3Inst <"v_add_nc_u16", VOP3_Profile<VOP_I16_I16_I16, VOP3_OPSEL>, add>;
defm V_SUB_NC_U16 : VOP3Inst <"v_sub_nc_u16", VOP3_Profile<VOP_I16_I16_I16, VOP3_OPSEL>, sub>;
Index: llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td
===================================================================
--- llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td
+++ llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td
@@ -303,6 +303,8 @@
def : SourceOfDivergence<int_amdgcn_ds_ordered_swap>;
def : SourceOfDivergence<int_amdgcn_permlane16>;
def : SourceOfDivergence<int_amdgcn_permlanex16>;
+def : SourceOfDivergence<int_amdgcn_permlane16_f32>;
+def : SourceOfDivergence<int_amdgcn_permlanex16_f32>;
def : SourceOfDivergence<int_amdgcn_mov_dpp>;
def : SourceOfDivergence<int_amdgcn_mov_dpp8>;
def : SourceOfDivergence<int_amdgcn_update_dpp>;
Index: llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
+++ llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
@@ -2990,7 +2990,9 @@
applyDefaultMapping(OpdMapper);
return;
case Intrinsic::amdgcn_permlane16:
- case Intrinsic::amdgcn_permlanex16: {
+ case Intrinsic::amdgcn_permlanex16:
+ case Intrinsic::amdgcn_permlane16_f32:
+ case Intrinsic::amdgcn_permlanex16_f32: {
// Doing a waterfall loop over these wouldn't make any sense.
substituteSimpleCopyRegs(OpdMapper, 2);
substituteSimpleCopyRegs(OpdMapper, 3);
@@ -4367,7 +4369,9 @@
break;
}
case Intrinsic::amdgcn_permlane16:
- case Intrinsic::amdgcn_permlanex16: {
+ case Intrinsic::amdgcn_permlanex16:
+ case Intrinsic::amdgcn_permlane16_f32:
+ case Intrinsic::amdgcn_permlanex16_f32: {
unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI);
OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);
OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);
Index: llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
+++ llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
@@ -892,7 +892,9 @@
return IC.replaceOperand(II, 0, UndefValue::get(Old->getType()));
}
case Intrinsic::amdgcn_permlane16:
- case Intrinsic::amdgcn_permlanex16: {
+ case Intrinsic::amdgcn_permlanex16:
+ case Intrinsic::amdgcn_permlane16_f32:
+ case Intrinsic::amdgcn_permlanex16_f32: {
// Discard vdst_in if it's not going to be read.
Value *VDstIn = II.getArgOperand(0);
if (isa<UndefValue>(VDstIn))
Index: llvm/include/llvm/IR/IntrinsicsAMDGPU.td
===================================================================
--- llvm/include/llvm/IR/IntrinsicsAMDGPU.td
+++ llvm/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -1950,6 +1950,24 @@
[IntrNoMem, IntrConvergent, IntrWillReturn,
ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>, IntrNoCallback, IntrNoFree]>;
+
+// llvm.amdgcn.permlane16.f32 <old> <src0> <src1> <src2> <fi> <bound_control>
+def int_amdgcn_permlane16_f32 : ClangBuiltin<"__builtin_amdgcn_permlane16_f32">,
+ Intrinsic<[llvm_float_ty],
+ [llvm_float_ty, llvm_float_ty, llvm_i32_ty, llvm_i32_ty, llvm_i1_ty, llvm_i1_ty],
+ [IntrNoMem, IntrConvergent, IntrWillReturn,
+ ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>, IntrNoCallback, IntrNoFree]>;
+
+// llvm.amdgcn.permlanex16.f32 <old> <src0> <src1> <src2> <fi> <bound_control>
+def int_amdgcn_permlanex16_f32 : ClangBuiltin<"__builtin_amdgcn_permlanex16_f32">,
+ Intrinsic<[llvm_float_ty],
+ [llvm_float_ty, llvm_float_ty, llvm_i32_ty, llvm_i32_ty, llvm_i1_ty, llvm_i1_ty],
+ [IntrNoMem, IntrConvergent, IntrWillReturn,
+ ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>, IntrNoCallback, IntrNoFree]>;
+
+
+
+
// llvm.amdgcn.mov.dpp8.i32 <src> <sel>
// <sel> is a 32-bit constant whose high 8 bits must be zero which selects
// the lanes to read from.
Index: clang/test/SemaOpenCL/builtins-amdgcn-error-gfx10-param.cl
===================================================================
--- clang/test/SemaOpenCL/builtins-amdgcn-error-gfx10-param.cl
+++ clang/test/SemaOpenCL/builtins-amdgcn-error-gfx10-param.cl
@@ -13,6 +13,18 @@
*out = __builtin_amdgcn_permlanex16(a, b, c, d, 1, e); // expected-error{{argument to '__builtin_amdgcn_permlanex16' must be a constant integer}}
}
+
+void test_permlane16_f32(global float* out, float a, float b, uint c, uint d, uint e) {
+ *out = __builtin_amdgcn_permlane16_f32(a, b, c, d, e, 1); // expected-error{{argument to '__builtin_amdgcn_permlane16_f32' must be a constant integer}}
+ *out = __builtin_amdgcn_permlane16_f32(a, b, c, d, 1, e); // expected-error{{argument to '__builtin_amdgcn_permlane16_f32' must be a constant integer}}
+}
+
+void test_permlanex16_f32(global float* out, float a, float b, uint c, uint d, uint e) {
+ *out = __builtin_amdgcn_permlanex16_f32(a, b, c, d, e, 1); // expected-error{{argument to '__builtin_amdgcn_permlanex16_f32' must be a constant integer}}
+ *out = __builtin_amdgcn_permlanex16_f32(a, b, c, d, 1, e); // expected-error{{argument to '__builtin_amdgcn_permlanex16_f32' must be a constant integer}}
+}
+
+
void test_mov_dpp8(global uint* out, uint a, uint b) {
*out = __builtin_amdgcn_mov_dpp8(a, b); // expected-error{{argument to '__builtin_amdgcn_mov_dpp8' must be a constant integer}}
}
Index: clang/test/CodeGenOpenCL/builtins-amdgcn-gfx10.cl
===================================================================
--- clang/test/CodeGenOpenCL/builtins-amdgcn-gfx10.cl
+++ clang/test/CodeGenOpenCL/builtins-amdgcn-gfx10.cl
@@ -18,6 +18,19 @@
*out = __builtin_amdgcn_permlanex16(a, b, c, d, 0, 0);
}
+// CHECK-LABEL: @test_permlane16_f32(
+// CHECK: call float @llvm.amdgcn.permlane16.f32(float %a, float %b, i32 %c, i32 %d, i1 false, i1 false)
+void test_permlane16_f32(global float* out, float a, float b, uint c, uint d) {
+ *out = __builtin_amdgcn_permlane16_f32(a, b, c, d, 0, 0);
+}
+
+// CHECK-LABEL: @test_permlanex16_f32(
+// CHECK: call float @llvm.amdgcn.permlanex16.f32(float %a, float %b, i32 %c, i32 %d, i1 false, i1 false)
+void test_permlanex16_f32(global float* out, float a, float b, uint c, uint d) {
+ *out = __builtin_amdgcn_permlanex16_f32(a, b, c, d, 0, 0);
+}
+
+
// CHECK-LABEL: @test_mov_dpp8(
// CHECK: call i32 @llvm.amdgcn.mov.dpp8.i32(i32 %a, i32 1)
void test_mov_dpp8(global uint* out, uint a) {
Index: clang/include/clang/Basic/BuiltinsAMDGPU.def
===================================================================
--- clang/include/clang/Basic/BuiltinsAMDGPU.def
+++ clang/include/clang/Basic/BuiltinsAMDGPU.def
@@ -255,6 +255,8 @@
//===----------------------------------------------------------------------===//
TARGET_BUILTIN(__builtin_amdgcn_permlane16, "UiUiUiUiUiIbIb", "nc", "gfx10-insts")
TARGET_BUILTIN(__builtin_amdgcn_permlanex16, "UiUiUiUiUiIbIb", "nc", "gfx10-insts")
+TARGET_BUILTIN(__builtin_amdgcn_permlane16_f32, "fffUiUiIbIb", "nc", "gfx10-insts")
+TARGET_BUILTIN(__builtin_amdgcn_permlanex16_f32, "fffUiUiIbIb", "nc", "gfx10-insts")
TARGET_BUILTIN(__builtin_amdgcn_mov_dpp8, "UiUiIUi", "nc", "gfx10-insts")
//===----------------------------------------------------------------------===//
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits