amyk created this revision.
amyk added reviewers: power-llvm-team, PowerPC, lei, stefanp.
amyk added projects: PowerPC, LLVM, clang.
Herald added subscribers: shchenz, hiraditya, nemanjai.
This patch implements the following function prototypes:
vector unsigned char vec_genbm (unsigned long long bm);
vector unsigned char vec_genbm (const unsigned long long bm);
vector unsigned short vec_genhm (unsigned long long bm);
vector unsigned int vec_genwm (unsigned long long bm);
vector unsigned long long vec_gendm (unsigned long long bm);
vector unsigned __int128 vec_genqm (unsigned long long bm);
Depends on D82675 <https://reviews.llvm.org/D82675>.
Repository:
rG LLVM Github Monorepo
https://reviews.llvm.org/D82725
Files:
clang/include/clang/Basic/BuiltinsPPC.def
clang/lib/Headers/altivec.h
clang/test/CodeGen/builtins-ppc-p10vector.c
llvm/include/llvm/IR/IntrinsicsPowerPC.td
llvm/lib/Target/PowerPC/PPCISelLowering.cpp
llvm/lib/Target/PowerPC/PPCInstrPrefix.td
llvm/test/CodeGen/PowerPC/p10-vector-mask-ops.ll
llvm/test/MC/Disassembler/PowerPC/p10insts.txt
llvm/test/MC/PowerPC/p10.s
Index: llvm/test/MC/PowerPC/p10.s
===================================================================
--- llvm/test/MC/PowerPC/p10.s
+++ llvm/test/MC/PowerPC/p10.s
@@ -18,6 +18,24 @@
# CHECK-BE: vextractqm 1, 2 # encoding: [0x10,0x2c,0x16,0x42]
# CHECK-LE: vextractqm 1, 2 # encoding: [0x42,0x16,0x2c,0x10]
vextractqm 1, 2
+# CHECK-BE: mtvsrbm 1, 2 # encoding: [0x10,0x30,0x16,0x42]
+# CHECK-LE: mtvsrbm 1, 2 # encoding: [0x42,0x16,0x30,0x10]
+ mtvsrbm 1, 2
+# CHECK-BE: mtvsrhm 1, 2 # encoding: [0x10,0x31,0x16,0x42]
+# CHECK-LE: mtvsrhm 1, 2 # encoding: [0x42,0x16,0x31,0x10]
+ mtvsrhm 1, 2
+# CHECK-BE: mtvsrwm 1, 2 # encoding: [0x10,0x32,0x16,0x42]
+# CHECK-LE: mtvsrwm 1, 2 # encoding: [0x42,0x16,0x32,0x10]
+ mtvsrwm 1, 2
+# CHECK-BE: mtvsrdm 1, 2 # encoding: [0x10,0x33,0x16,0x42]
+# CHECK-LE: mtvsrdm 1, 2 # encoding: [0x42,0x16,0x33,0x10]
+ mtvsrdm 1, 2
+# CHECK-BE: mtvsrqm 1, 2 # encoding: [0x10,0x34,0x16,0x42]
+# CHECK-LE: mtvsrqm 1, 2 # encoding: [0x42,0x16,0x34,0x10]
+ mtvsrqm 1, 2
+# CHECK-BE: mtvsrbmi 1, 31 # encoding: [0x10,0x2f,0x00,0x15]
+# CHECK-LE: mtvsrbmi 1, 31 # encoding: [0x15,0x00,0x2f,0x10]
+ mtvsrbmi 1, 31
# CHECK-BE: vpdepd 1, 2, 0 # encoding: [0x10,0x22,0x05,0xcd]
# CHECK-LE: vpdepd 1, 2, 0 # encoding: [0xcd,0x05,0x22,0x10]
vpdepd 1, 2, 0
Index: llvm/test/MC/Disassembler/PowerPC/p10insts.txt
===================================================================
--- llvm/test/MC/Disassembler/PowerPC/p10insts.txt
+++ llvm/test/MC/Disassembler/PowerPC/p10insts.txt
@@ -16,6 +16,24 @@
# CHECK: vextractqm 1, 2
0x10 0x2c 0x16 0x42
+# CHECK: mtvsrbm 1, 2
+0x10 0x30 0x16 0x42
+
+# CHECK: mtvsrhm 1, 2
+0x10 0x31 0x16 0x42
+
+# CHECK: mtvsrwm 1, 2
+0x10 0x32 0x16 0x42
+
+# CHECK: mtvsrdm 1, 2
+0x10 0x33 0x16 0x42
+
+# CHECK: mtvsrqm 1, 2
+0x10 0x34 0x16 0x42
+
+# CHECK: mtvsrbmi 1, 65535
+0x10 0x3f 0xff 0xd5
+
# CHECK: vpdepd 1, 2, 0
0x10 0x22 0x05 0xcd
Index: llvm/test/CodeGen/PowerPC/p10-vector-mask-ops.ll
===================================================================
--- llvm/test/CodeGen/PowerPC/p10-vector-mask-ops.ll
+++ llvm/test/CodeGen/PowerPC/p10-vector-mask-ops.ll
@@ -61,3 +61,99 @@
declare i32 @llvm.ppc.altivec.vextractwm(<4 x i32>)
declare i32 @llvm.ppc.altivec.vextractdm(<2 x i64>)
declare i32 @llvm.ppc.altivec.vextractqm(<1 x i128>)
+
+define <16 x i8> @test_mtvsrbm(i64 %a) {
+; CHECK-LABEL: test_mtvsrbm:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: mtvsrbm v2, r3
+; CHECK-NEXT: blr
+entry:
+ %mv = tail call <16 x i8> @llvm.ppc.altivec.mtvsrbm(i64 %a)
+ ret <16 x i8> %mv
+}
+
+define <16 x i8> @test_mtvsrbmi() {
+; CHECK-LABEL: test_mtvsrbmi:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: mtvsrbmi v2, 1
+; CHECK-NEXT: blr
+entry:
+ %mv = tail call <16 x i8> @llvm.ppc.altivec.mtvsrbm(i64 1)
+ ret <16 x i8> %mv
+}
+
+define <16 x i8> @test_mtvsrbmi2() {
+; CHECK-LABEL: test_mtvsrbmi2:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: mtvsrbmi v2, 255
+; CHECK-NEXT: blr
+entry:
+ %mv = tail call <16 x i8> @llvm.ppc.altivec.mtvsrbm(i64 255)
+ ret <16 x i8> %mv
+}
+
+define <16 x i8> @test_mtvsrbmi3() {
+; CHECK-LABEL: test_mtvsrbmi3:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: mtvsrbmi v2, 0
+; CHECK-NEXT: blr
+entry:
+ %mv = tail call <16 x i8> @llvm.ppc.altivec.mtvsrbm(i64 256)
+ ret <16 x i8> %mv
+}
+
+define <16 x i8> @test_mtvsrbmi4() {
+; CHECK-LABEL: test_mtvsrbmi4:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: mtvsrbmi v2, 10
+; CHECK-NEXT: blr
+entry:
+ %mv = tail call <16 x i8> @llvm.ppc.altivec.mtvsrbm(i64 266)
+ ret <16 x i8> %mv
+}
+
+define <8 x i16> @test_mtvsrhm(i64 %a) {
+; CHECK-LABEL: test_mtvsrhm:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: mtvsrhm v2, r3
+; CHECK-NEXT: blr
+entry:
+ %mv = tail call <8 x i16> @llvm.ppc.altivec.mtvsrhm(i64 %a)
+ ret <8 x i16> %mv
+}
+
+define <4 x i32> @test_mtvsrwm(i64 %a) {
+; CHECK-LABEL: test_mtvsrwm:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: mtvsrwm v2, r3
+; CHECK-NEXT: blr
+entry:
+ %mv = tail call <4 x i32> @llvm.ppc.altivec.mtvsrwm(i64 %a)
+ ret <4 x i32> %mv
+}
+
+define <2 x i64> @test_mtvsrdm(i64 %a) {
+; CHECK-LABEL: test_mtvsrdm:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: mtvsrdm v2, r3
+; CHECK-NEXT: blr
+entry:
+ %mv = tail call <2 x i64> @llvm.ppc.altivec.mtvsrdm(i64 %a)
+ ret <2 x i64> %mv
+}
+
+define <1 x i128> @test_mtvsrqm(i64 %a) {
+; CHECK-LABEL: test_mtvsrqm:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: mtvsrqm v2, r3
+; CHECK-NEXT: blr
+entry:
+ %mv = tail call <1 x i128> @llvm.ppc.altivec.mtvsrqm(i64 %a)
+ ret <1 x i128> %mv
+}
+
+declare <16 x i8> @llvm.ppc.altivec.mtvsrbm(i64)
+declare <8 x i16> @llvm.ppc.altivec.mtvsrhm(i64)
+declare <4 x i32> @llvm.ppc.altivec.mtvsrwm(i64)
+declare <2 x i64> @llvm.ppc.altivec.mtvsrdm(i64)
+declare <1 x i128> @llvm.ppc.altivec.mtvsrqm(i64)
Index: llvm/lib/Target/PowerPC/PPCInstrPrefix.td
===================================================================
--- llvm/lib/Target/PowerPC/PPCInstrPrefix.td
+++ llvm/lib/Target/PowerPC/PPCInstrPrefix.td
@@ -586,6 +586,30 @@
"vextractqm $rD, $vB", IIC_VecGeneral,
[(set i32:$rD,
(int_ppc_altivec_vextractqm v1i128:$vB))]>;
+ def MTVSRBM : VXForm_RD5_XO5_RS5<1602, 16, (outs vrrc:$vD), (ins g8rc:$rB),
+ "mtvsrbm $vD, $rB", IIC_VecGeneral,
+ [(set v16i8:$vD,
+ (int_ppc_altivec_mtvsrbm i64:$rB))]>;
+ def MTVSRHM : VXForm_RD5_XO5_RS5<1602, 17, (outs vrrc:$vD), (ins g8rc:$rB),
+ "mtvsrhm $vD, $rB", IIC_VecGeneral,
+ [(set v8i16:$vD,
+ (int_ppc_altivec_mtvsrhm i64:$rB))]>;
+ def MTVSRWM : VXForm_RD5_XO5_RS5<1602, 18, (outs vrrc:$vD), (ins g8rc:$rB),
+ "mtvsrwm $vD, $rB", IIC_VecGeneral,
+ [(set v4i32:$vD,
+ (int_ppc_altivec_mtvsrwm i64:$rB))]>;
+ def MTVSRDM : VXForm_RD5_XO5_RS5<1602, 19, (outs vrrc:$vD), (ins g8rc:$rB),
+ "mtvsrdm $vD, $rB", IIC_VecGeneral,
+ [(set v2i64:$vD,
+ (int_ppc_altivec_mtvsrdm i64:$rB))]>;
+ def MTVSRQM : VXForm_RD5_XO5_RS5<1602, 20, (outs vrrc:$vD), (ins g8rc:$rB),
+ "mtvsrqm $vD, $rB", IIC_VecGeneral,
+ [(set v1i128:$vD,
+ (int_ppc_altivec_mtvsrqm i64:$rB))]>;
+ def MTVSRBMI : DXForm<4, 10, (outs vrrc:$vD), (ins u16imm64:$D),
+ "mtvsrbmi $vD, $D", IIC_VecGeneral,
+ [(set v16i8:$vD,
+ (int_ppc_altivec_mtvsrbm imm:$D))]>;
def VPDEPD : VXForm_1<1485, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
"vpdepd $vD, $vA, $vB", IIC_VecGeneral,
[(set v2i64:$vD,
Index: llvm/lib/Target/PowerPC/PPCISelLowering.cpp
===================================================================
--- llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -10320,12 +10320,31 @@
SDLoc dl(Op);
- if (IntrinsicID == Intrinsic::thread_pointer) {
+ // Lowering intrinsics by switching on the IntrinsicID, as future intrinsics
+ // will be also be lowered in this manner.
+ switch(IntrinsicID) {
+ case Intrinsic::thread_pointer:
// Reads the thread pointer register, used for __builtin_thread_pointer.
if (Subtarget.isPPC64())
return DAG.getRegister(PPC::X13, MVT::i64);
return DAG.getRegister(PPC::R2, MVT::i32);
+
+ case Intrinsic::ppc_altivec_mtvsrbm: {
+ // The llvm.ppc.altivec.mtvsrbm intrinsic can correspond to two different
+ // instructions: one that takes an immediate as an operand (mtvsrbmi)
+ // and one that does not (mtvsrbm).
+ // For mtvsrbmi, the immediate value must fit within 16 bits (up to 255).
+ // If the immediate exceeds these bits, we mask out the remaining bits.
+ SDValue ImmOperand = Op.getOperand(1);
+ if (!isa<ConstantSDNode>(ImmOperand))
+ break;
+
+ unsigned Imm =
+ (cast<ConstantSDNode>(ImmOperand)->getZExtValue()) & 0xFF;
+ return DAG.getNode(Op.getOpcode(), dl, Op.getValueType(), Op.getOperand(0),
+ DAG.getConstant(Imm, dl, MVT::i64));
}
+ } // End switch(Intrinsic ID).
// If this is a lowered altivec predicate compare, CompareOpc is set to the
// opcode number of the comparison.
Index: llvm/include/llvm/IR/IntrinsicsPowerPC.td
===================================================================
--- llvm/include/llvm/IR/IntrinsicsPowerPC.td
+++ llvm/include/llvm/IR/IntrinsicsPowerPC.td
@@ -435,6 +435,18 @@
def int_ppc_altivec_vextractqm : GCCBuiltin<"__builtin_altivec_vextractqm">,
Intrinsic<[llvm_i32_ty], [llvm_v1i128_ty], [IntrNoMem]>;
+ // P10 Move to VSR with Mask Intrinsics.
+ def int_ppc_altivec_mtvsrbm : GCCBuiltin<"__builtin_altivec_mtvsrbm">,
+ Intrinsic<[llvm_v16i8_ty], [llvm_i64_ty], [IntrNoMem]>;
+ def int_ppc_altivec_mtvsrhm : GCCBuiltin<"__builtin_altivec_mtvsrhm">,
+ Intrinsic<[llvm_v8i16_ty], [llvm_i64_ty], [IntrNoMem]>;
+ def int_ppc_altivec_mtvsrwm : GCCBuiltin<"__builtin_altivec_mtvsrwm">,
+ Intrinsic<[llvm_v4i32_ty], [llvm_i64_ty], [IntrNoMem]>;
+ def int_ppc_altivec_mtvsrdm : GCCBuiltin<"__builtin_altivec_mtvsrdm">,
+ Intrinsic<[llvm_v2i64_ty], [llvm_i64_ty], [IntrNoMem]>;
+ def int_ppc_altivec_mtvsrqm : GCCBuiltin<"__builtin_altivec_mtvsrqm">,
+ Intrinsic<[llvm_v1i128_ty], [llvm_i64_ty], [IntrNoMem]>;
+
// P10 Vector Parallel Bits Deposit/Extract Doubleword Builtins.
def int_ppc_altivec_vpdepd : GCCBuiltin<"__builtin_altivec_vpdepd">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
Index: clang/test/CodeGen/builtins-ppc-p10vector.c
===================================================================
--- clang/test/CodeGen/builtins-ppc-p10vector.c
+++ clang/test/CodeGen/builtins-ppc-p10vector.c
@@ -12,6 +12,7 @@
vector unsigned long long vulla, vullb, vullc;
vector unsigned __int128 vui128a, vui128b, vui128c;
unsigned int uia;
+unsigned long long ulla;
unsigned int test_vec_extractm_uc(void) {
// CHECK: @llvm.ppc.altivec.vextractbm(<16 x i8>
@@ -43,6 +44,64 @@
return vec_extractm(vui128a);
}
+vector unsigned char test_vec_genbm(void) {
+ // CHECK: @llvm.ppc.altivec.mtvsrbm(i64
+ // CHECK-NEXT: ret <16 x i8>
+ return vec_genbm(ulla);
+}
+
+vector unsigned char test_vec_genbm_imm(void) {
+ // CHECK: store i64 1
+ // CHECK: @llvm.ppc.altivec.mtvsrbm(i64
+ // CHECK-NEXT: ret <16 x i8>
+ return vec_genbm(1);
+}
+
+vector unsigned char test_vec_genbm_imm2(void) {
+ // CHECK: store i64 255
+ // CHECK: @llvm.ppc.altivec.mtvsrbm(i64
+ // CHECK-NEXT: ret <16 x i8>
+ return vec_genbm(255);
+}
+
+vector unsigned char test_vec_genbm_imm3(void) {
+ // CHECK: store i64 256
+ // CHECK: @llvm.ppc.altivec.mtvsrbm(i64
+ // CHECK-NEXT: ret <16 x i8>
+ return vec_genbm(256);
+}
+
+vector unsigned char test_vec_genbm_imm4(void) {
+ // CHECK: store i64 266
+ // CHECK: @llvm.ppc.altivec.mtvsrbm(i64
+ // CHECK-NEXT: ret <16 x i8>
+ return vec_genbm(266);
+}
+
+vector unsigned short test_vec_genhm(void) {
+ // CHECK: @llvm.ppc.altivec.mtvsrhm(i64
+ // CHECK-NEXT: ret <8 x i16>
+ return vec_genhm(ulla);
+}
+
+vector unsigned int test_vec_genwm(void) {
+ // CHECK: @llvm.ppc.altivec.mtvsrwm(i64
+ // CHECK-NEXT: ret <4 x i32>
+ return vec_genwm(ulla);
+}
+
+vector unsigned long long test_vec_gendm(void) {
+ // CHECK: @llvm.ppc.altivec.mtvsrdm(i64
+ // CHECK-NEXT: ret <2 x i64>
+ return vec_gendm(ulla);
+}
+
+vector unsigned __int128 test_vec_genqm(void) {
+ // CHECK: @llvm.ppc.altivec.mtvsrqm(i64
+ // CHECK-NEXT: ret <1 x i128>
+ return vec_genqm(ulla);
+}
+
vector unsigned long long test_vpdepd(void) {
// CHECK: @llvm.ppc.altivec.vpdepd(<2 x i64>
// CHECK-NEXT: ret <2 x i64>
Index: clang/lib/Headers/altivec.h
===================================================================
--- clang/lib/Headers/altivec.h
+++ clang/lib/Headers/altivec.h
@@ -16791,6 +16791,33 @@
return __builtin_altivec_vextractqm(__a);
}
+/* vec_gen[b|h|w|d|q]m */
+
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_genbm(unsigned long long __bm) {
+ return __builtin_altivec_mtvsrbm(__bm);
+}
+
+static __inline__ vector unsigned short __ATTRS_o_ai
+vec_genhm(unsigned long long __bm) {
+ return __builtin_altivec_mtvsrhm(__bm);
+}
+
+static __inline__ vector unsigned int __ATTRS_o_ai
+vec_genwm(unsigned long long __bm) {
+ return __builtin_altivec_mtvsrwm(__bm);
+}
+
+static __inline__ vector unsigned long long __ATTRS_o_ai
+vec_gendm(unsigned long long __bm) {
+ return __builtin_altivec_mtvsrdm(__bm);
+}
+
+static __inline__ vector unsigned __int128 __ATTRS_o_ai
+vec_genqm(unsigned long long __bm) {
+ return __builtin_altivec_mtvsrqm(__bm);
+}
+
/* vec_pdep */
static __inline__ vector unsigned long long __ATTRS_o_ai
Index: clang/include/clang/Basic/BuiltinsPPC.def
===================================================================
--- clang/include/clang/Basic/BuiltinsPPC.def
+++ clang/include/clang/Basic/BuiltinsPPC.def
@@ -305,6 +305,13 @@
BUILTIN(__builtin_altivec_vextractdm, "UiV2ULLi", "")
BUILTIN(__builtin_altivec_vextractqm, "UiV1ULLLi", "")
+// P10 Move to VSR with Mask built-ins.
+BUILTIN(__builtin_altivec_mtvsrbm, "V16UcULLi", "")
+BUILTIN(__builtin_altivec_mtvsrhm, "V8UsULLi", "")
+BUILTIN(__builtin_altivec_mtvsrwm, "V4UiULLi", "")
+BUILTIN(__builtin_altivec_mtvsrdm, "V2ULLiULLi", "")
+BUILTIN(__builtin_altivec_mtvsrqm, "V1ULLLiULLi", "")
+
// P10 Vector Parallel Bits built-ins.
BUILTIN(__builtin_altivec_vpdepd, "V2ULLiV2ULLiV2ULLi", "")
BUILTIN(__builtin_altivec_vpextd, "V2ULLiV2ULLiV2ULLi", "")
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits