Conanap updated this revision to Diff 293474.
Conanap marked 2 inline comments as done.
Conanap added a comment.
Changed implementation for vrlqnm as per Nemanja
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D86819/new/
https://reviews.llvm.org/D86819
Files:
clang/include/clang/Basic/BuiltinsPPC.def
clang/lib/Headers/altivec.h
clang/test/CodeGen/builtins-ppc-p10vector.c
llvm/include/llvm/IR/IntrinsicsPowerPC.td
llvm/lib/Target/PowerPC/PPCISelLowering.cpp
llvm/lib/Target/PowerPC/PPCInstrPrefix.td
llvm/test/CodeGen/PowerPC/p10-vector-rotate.ll
Index: llvm/test/CodeGen/PowerPC/p10-vector-rotate.ll
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/PowerPC/p10-vector-rotate.ll
@@ -0,0 +1,75 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
+; RUN: FileCheck %s
+
+; This test case aims to test the builtins for vector rotate instructions
+; on Power10.
+
+
+define <1 x i128> @test_vrlq(<1 x i128> %x, <1 x i128> %y) {
+; CHECK-LABEL: test_vrlq:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vrlq v2, v3, v2
+; CHECK-NEXT: blr
+ %shl.i = shl <1 x i128> %y, %x
+ %sub.i = sub <1 x i128> <i128 128>, %x
+ %lshr.i = lshr <1 x i128> %y, %sub.i
+ %tmp = or <1 x i128> %shl.i, %lshr.i
+ ret <1 x i128> %tmp
+}
+
+define <1 x i128> @test_vrlq_cost_mult8(<1 x i128> %x) {
+; CHECK-LABEL: test_vrlq_cost_mult8:
+; CHECK: # %bb.0:
+; CHECK: vrlq v2, v3, v2
+; CHECK-NEXT: blr
+ %shl.i = shl <1 x i128> <i128 16>, %x
+ %sub.i = sub <1 x i128> <i128 128>, %x
+ %lshr.i = lshr <1 x i128> <i128 16>, %sub.i
+ %tmp = or <1 x i128> %shl.i, %lshr.i
+ ret <1 x i128> %tmp
+}
+
+define <1 x i128> @test_vrlq_cost_non_mult8(<1 x i128> %x) {
+; CHECK-LABEL: test_vrlq_cost_non_mult8:
+; CHECK: # %bb.0:
+; CHECK: vrlq v2, v3, v2
+; CHECK-NEXT: blr
+ %shl.i = shl <1 x i128> <i128 4>, %x
+ %sub.i = sub <1 x i128> <i128 128>, %x
+ %lshr.i = lshr <1 x i128> <i128 4>, %sub.i
+ %tmp = or <1 x i128> %shl.i, %lshr.i
+ ret <1 x i128> %tmp
+}
+
+; Function Attrs: nounwind readnone
+define <1 x i128> @test_vrlqmi(<1 x i128> %a, <1 x i128> %b, <1 x i128> %c) {
+; CHECK-LABEL: test_vrlqmi:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vrlqmi v3, v2, v4
+; CHECK-NEXT: vmr v2, v3
+; CHECK-NEXT: blr
+entry:
+ %tmp = tail call <1 x i128> @llvm.ppc.altivec.vrlqmi(<1 x i128> %a, <1 x i128> %c, <1 x i128> %b)
+ ret <1 x i128> %tmp
+}
+
+; Function Attrs: nounwind readnone
+define <1 x i128> @test_vrlqnm(<1 x i128> %a, <1 x i128> %b, <1 x i128> %c) {
+; CHECK-LABEL: test_vrlqnm:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vrlqnm v2, v2, v3
+; CHECK-NEXT: xxland v2, v2, v4
+; CHECK-NEXT: blr
+entry:
+ %0 = tail call <1 x i128> @llvm.ppc.altivec.vrlqnm(<1 x i128> %a, <1 x i128> %b)
+ %tmp = and <1 x i128> %0, %c
+ ret <1 x i128> %tmp
+}
+
+; Function Attrs: nounwind readnone
+declare <1 x i128> @llvm.ppc.altivec.vrlqmi(<1 x i128>, <1 x i128>, <1 x i128>)
+
+; Function Attrs: nounwind readnone
+declare <1 x i128> @llvm.ppc.altivec.vrlqnm(<1 x i128>, <1 x i128>)
Index: llvm/lib/Target/PowerPC/PPCInstrPrefix.td
===================================================================
--- llvm/lib/Target/PowerPC/PPCInstrPrefix.td
+++ llvm/lib/Target/PowerPC/PPCInstrPrefix.td
@@ -1446,19 +1446,25 @@
"vcmpuq $BF, $vA, $vB", IIC_VecGeneral, []>;
def VCMPSQ : VXForm_BF3_VAB5<321, (outs crrc:$BF), (ins vrrc:$vA, vrrc:$vB),
"vcmpsq $BF, $vA, $vB", IIC_VecGeneral, []>;
- def VRLQNM : VX1_VT5_VA5_VB5<325, "vrlqnm", []>;
- def VRLQMI : VXForm_1<69, (outs vrrc:$vD),
- (ins vrrc:$vA, vrrc:$vB, vrrc:$vDi),
- "vrlqmi $vD, $vA, $vB", IIC_VecFP, []>,
- RegConstraint<"$vDi = $vD">, NoEncode<"$vDi">;
def VSLQ : VX1_VT5_VA5_VB5<261, "vslq", []>;
def VSRAQ : VX1_VT5_VA5_VB5<773, "vsraq", []>;
def VSRQ : VX1_VT5_VA5_VB5<517, "vsrq", []>;
- def VRLQ : VX1_VT5_VA5_VB5<5, "vrlq", []>;
def XSCVQPUQZ : X_VT5_XO5_VB5<63, 0, 836, "xscvqpuqz", []>;
def XSCVQPSQZ : X_VT5_XO5_VB5<63, 8, 836, "xscvqpsqz", []>;
def XSCVUQQP : X_VT5_XO5_VB5<63, 3, 836, "xscvuqqp", []>;
def XSCVSQQP : X_VT5_XO5_VB5<63, 11, 836, "xscvsqqp", []>;
+ def VRLQ : VX1_VT5_VA5_VB5<5, "vrlq", []>;
+ def VRLQNM : VX1_VT5_VA5_VB5<325, "vrlqnm",
+ [(set v1i128:$vD,
+ (int_ppc_altivec_vrlqnm v1i128:$vA,
+ v1i128:$vB))]>;
+ def VRLQMI : VXForm_1<69, (outs vrrc:$vD),
+ (ins vrrc:$vA, vrrc:$vB, vrrc:$vDi),
+ "vrlqmi $vD, $vA, $vB", IIC_VecFP,
+ [(set v1i128:$vD,
+ (int_ppc_altivec_vrlqmi v1i128:$vA, v1i128:$vB,
+ v1i128:$vDi))]>,
+ RegConstraint<"$vDi = $vD">, NoEncode<"$vDi">;
}
let Predicates = [IsISA3_1, HasVSX] in {
@@ -1510,6 +1516,9 @@
(v1i128 (COPY_TO_REGCLASS (LXVRWX xoaddr:$src), VRRC))>;
def : Pat <(v1i128 (PPClxvrzx xoaddr:$src, 64)),
(v1i128 (COPY_TO_REGCLASS (LXVRDX xoaddr:$src), VRRC))>;
+
+ def : Pat<(v1i128 (rotl v1i128:$vA, v1i128:$vB)),
+ (v1i128 (VRLQ v1i128:$vA, v1i128:$vB))>;
}
let Predicates = [IsISA3_1, HasVSX] in {
Index: llvm/lib/Target/PowerPC/PPCISelLowering.cpp
===================================================================
--- llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -890,6 +890,7 @@
setOperationAction(ISD::SREM, MVT::v4i32, Legal);
setOperationAction(ISD::UDIV, MVT::v1i128, Legal);
setOperationAction(ISD::SDIV, MVT::v1i128, Legal);
+ setOperationAction(ISD::ROTL, MVT::v1i128, Legal);
}
setOperationAction(ISD::MUL, MVT::v8i16, Legal);
Index: llvm/include/llvm/IR/IntrinsicsPowerPC.td
===================================================================
--- llvm/include/llvm/IR/IntrinsicsPowerPC.td
+++ llvm/include/llvm/IR/IntrinsicsPowerPC.td
@@ -1002,6 +1002,15 @@
[llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty],
[IntrNoMem]>;
+def int_ppc_altivec_vrlqnm :
+ PowerPC_Vec_Intrinsic<"vrlqnm", [llvm_v1i128_ty],
+ [llvm_v1i128_ty, llvm_v1i128_ty],
+ [IntrNoMem]>;
+def int_ppc_altivec_vrlqmi :
+ PowerPC_Vec_Intrinsic<"vrlqmi", [llvm_v1i128_ty],
+ [llvm_v1i128_ty, llvm_v1i128_ty, llvm_v1i128_ty],
+ [IntrNoMem]>;
+
// Vector Divide Extended Intrinsics.
def int_ppc_altivec_vdivesw : PowerPC_Vec_WWW_Intrinsic<"vdivesw">;
def int_ppc_altivec_vdiveuw : PowerPC_Vec_WWW_Intrinsic<"vdiveuw">;
Index: clang/test/CodeGen/builtins-ppc-p10vector.c
===================================================================
--- clang/test/CodeGen/builtins-ppc-p10vector.c
+++ clang/test/CodeGen/builtins-ppc-p10vector.c
@@ -17,7 +17,7 @@
vector unsigned int vuia, vuib, vuic;
vector signed long long vslla, vsllb;
vector unsigned long long vulla, vullb, vullc;
-vector signed __int128 vsi128a, vsi128b;
+vector signed __int128 vsi128a, vsi128b, vsi128c;
vector unsigned __int128 vui128a, vui128b, vui128c;
vector float vfa, vfb;
vector double vda, vdb;
@@ -1157,3 +1157,49 @@
// CHECK: ret <1 x i128>
return vec_xl_zext(llb, ullap);
}
+
+vector signed __int128 test_vec_rl_s128(void) {
+ // CHECK-LABEL: @test_vec_rl_s128(
+ // CHECK: sub <1 x i128>
+ // CHECK-NEXT: lshr <1 x i128>
+ // CHECK-NEXT: or <1 x i128>
+ // CHECK-NEXT: ret <1 x i128>
+ return vec_rl(vsi128a, vsi128b);
+}
+
+vector unsigned __int128 test_vec_rl_u128(void) {
+ // CHECK-LABEL: @test_vec_rl_u128(
+ // CHECK: sub <1 x i128>
+ // CHECK: lshr <1 x i128>
+ // CHECK: or <1 x i128>
+ // CHECK-NEXT: ret <1 x i128>
+ return vec_rl(vui128a, vui128b);
+}
+
+vector signed __int128 test_vec_rlnm_s128(void) {
+ // CHECK-LABEL: @test_vec_rlnm_s128(
+ // CHECK: call <1 x i128> @llvm.ppc.altivec.vrlqnm(<1 x i128>
+ // CHECK-NEXT: ret <1 x i128>
+ return vec_rlnm(vsi128a, vsi128b, vsi128c);
+}
+
+vector unsigned __int128 test_vec_rlnm_u128(void) {
+ // CHECK-LABEL: @test_vec_rlnm_u128(
+ // CHECK: call <1 x i128> @llvm.ppc.altivec.vrlqnm(<1 x i128>
+ // CHECK-NEXT: ret <1 x i128>
+ return vec_rlnm(vui128a, vui128b, vui128c);
+}
+
+vector signed __int128 test_vec_rlmi_s128(void) {
+ // CHECK-LABEL: @test_vec_rlmi_s128(
+ // CHECK: call <1 x i128> @llvm.ppc.altivec.vrlqmi(<1 x i128>
+ // CHECK-NEXT: ret <1 x i128>
+ return vec_rlmi(vsi128a, vsi128b, vsi128c);
+}
+
+vector unsigned __int128 test_vec_rlmi_u128(void) {
+ // CHECK-LABEL: @test_vec_rlmi_u128(
+ // CHECK: call <1 x i128> @llvm.ppc.altivec.vrlqmi(<1 x i128>
+ // CHECK-NEXT: ret <1 x i128>
+ return vec_rlmi(vui128a, vui128b, vui128c);
+}
Index: clang/lib/Headers/altivec.h
===================================================================
--- clang/lib/Headers/altivec.h
+++ clang/lib/Headers/altivec.h
@@ -7789,6 +7789,18 @@
}
#endif
+#ifdef __POWER10_VECTOR__
+static __inline__ vector signed __int128 __ATTRS_o_ai
+vec_rl(vector signed __int128 __a, vector unsigned __int128 __b) {
+ return (__b << __a)|(__b >> ((__CHAR_BIT__ * sizeof(vector signed __int128)) - __a));
+}
+
+static __inline__ vector unsigned __int128 __ATTRS_o_ai
+vec_rl(vector unsigned __int128 __a, vector unsigned __int128 __b) {
+ return (__b << __a)|(__b >> ((__CHAR_BIT__ * sizeof(vector unsigned __int128)) - __a));
+}
+#endif
+
/* vec_rlmi */
#ifdef __POWER9_VECTOR__
static __inline__ vector unsigned int __ATTRS_o_ai
@@ -7802,8 +7814,24 @@
vector unsigned long long __c) {
return __builtin_altivec_vrldmi(__a, __c, __b);
}
+#endif
+
+#ifdef __POWER10_VECTOR__
+static __inline__ vector unsigned __int128 __ATTRS_o_ai
+vec_rlmi(vector unsigned __int128 __a, vector unsigned __int128 __b,
+ vector unsigned __int128 __c) {
+ return __builtin_altivec_vrlqmi(__a, __c, __b);
+}
+
+static __inline__ vector signed __int128 __ATTRS_o_ai
+vec_rlmi(vector signed __int128 __a, vector signed __int128 __b,
+ vector signed __int128 __c) {
+ return __builtin_altivec_vrlqmi(__a, __c, __b);
+}
+#endif
/* vec_rlnm */
+#ifdef __POWER9_VECTOR__
static __inline__ vector unsigned int __ATTRS_o_ai
vec_rlnm(vector unsigned int __a, vector unsigned int __b,
vector unsigned int __c) {
@@ -7819,6 +7847,42 @@
}
#endif
+#ifdef __POWER10_VECTOR__
+static __inline__ vector unsigned __int128 __ATTRS_o_ai
+vec_rlnm(vector unsigned __int128 __a, vector unsigned __int128 __b,
+ vector unsigned __int128 __c) {
+ // Merge __b and __c using an appropriate shuffle.
+ vector unsigned char TmpB = (vector unsigned char)__b;
+ vector unsigned char TmpC = (vector unsigned char)__c;
+ vector unsigned char MaskAndShift =
+#ifdef __LITTLE_ENDIAN__
+ __builtin_shufflevector(TmpB, TmpC, -1, -1, -1, -1, -1, -1, -1, -1, 16, 1,
+ 0, -1, -1, -1, -1, -1);
+#else
+ __builtin_shufflevector(TmpB, TmpC, -1, -1, -1, -1, -1, 30, 31, 15, -1,
+ -1, -1, -1, -1, -1, -1, -1);
+#endif
+ return __builtin_altivec_vrlqnm(__a, MaskAndShift);
+}
+
+static __inline__ vector signed __int128 __ATTRS_o_ai
+vec_rlnm(vector signed __int128 __a, vector signed __int128 __b,
+ vector signed __int128 __c) {
+ // Merge __b and __c using an appropriate shuffle.
+ vector unsigned char TmpB = (vector unsigned char)__b;
+ vector unsigned char TmpC = (vector unsigned char)__c;
+ vector unsigned char MaskAndShift =
+#ifdef __LITTLE_ENDIAN__
+ __builtin_shufflevector(TmpB, TmpC, -1, -1, -1, -1, -1, -1, -1, -1, 16, 1,
+ 0, -1, -1, -1, -1, -1);
+#else
+ __builtin_shufflevector(TmpB, TmpC, -1, -1, -1, -1, -1, 30, 31, 15, -1,
+ -1, -1, -1, -1, -1, -1, -1);
+#endif
+ return __builtin_altivec_vrlqnm(__a, MaskAndShift);
+}
+#endif
+
/* vec_vrlb */
static __inline__ vector signed char __ATTRS_o_ai
Index: clang/include/clang/Basic/BuiltinsPPC.def
===================================================================
--- clang/include/clang/Basic/BuiltinsPPC.def
+++ clang/include/clang/Basic/BuiltinsPPC.def
@@ -390,6 +390,10 @@
BUILTIN(__builtin_altivec_vextddvlx, "V2ULLiV2ULLiV2ULLiUi", "")
BUILTIN(__builtin_altivec_vextddvrx, "V2ULLiV2ULLiV2ULLiUi", "")
+// P10 Vector rotate built-ins.
+BUILTIN(__builtin_altivec_vrlqmi, "V1ULLLiV1ULLLiV1ULLLiV1ULLLi", "")
+BUILTIN(__builtin_altivec_vrlqnm, "V1ULLLiV1ULLLiV1ULLLi", "")
+
// VSX built-ins.
BUILTIN(__builtin_vsx_lxvd2x, "V2divC*", "")
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits