Conanap created this revision.
Conanap added reviewers: PowerPC, saghir, nemanjai, hfinkel.
Conanap added projects: LLVM, clang, PowerPC.
Herald added subscribers: danielkiss, steven.zhang, kbarton.
Conanap requested review of this revision.

This patch implements 128-bit Binary Vector Rotate builtins for PowerPC10.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D86819

Files:
  clang/include/clang/Basic/BuiltinsPPC.def
  clang/lib/Headers/altivec.h
  clang/test/CodeGen/builtins-ppc-p10vector.c
  llvm/include/llvm/IR/IntrinsicsPowerPC.td
  llvm/lib/Target/PowerPC/PPCISelLowering.cpp
  llvm/lib/Target/PowerPC/PPCInstrPrefix.td
  llvm/test/CodeGen/PowerPC/p10-vector-rotate.ll

Index: llvm/test/CodeGen/PowerPC/p10-vector-rotate.ll
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/PowerPC/p10-vector-rotate.ll
@@ -0,0 +1,51 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:   -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
+; RUN:   FileCheck %s
+
+; This test case aims to test the builtins for vector rotate instructions
+; on Power10.
+
+
+define <1 x i128> @test_vrlq(<1 x i128> %x, <1 x i128> %y) nounwind readnone {
+; CHECK-LABEL: test_vrlq:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vrlq v2, v3, v2
+; CHECK-NEXT:    blr
+  %shl.i = shl <1 x i128> %y, %x
+  %sub.i = sub <1 x i128> <i128 128>, %x
+  %lshr.i = lshr <1 x i128> %y, %sub.i
+  %tmp = or <1 x i128> %shl.i, %lshr.i
+  ret <1 x i128> %tmp
+}
+
+; Function Attrs: nounwind readnone
+define <1 x i128> @test_vrlqmi(<1 x i128> %a, <1 x i128> %b, <1 x i128> %c) {
+; CHECK-LABEL: test_vrlqmi:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vrlqmi v3, v2, v4
+; CHECK-NEXT:    vmr v2, v3
+; CHECK-NEXT:    blr
+entry:
+  %tmp = tail call <1 x i128> @llvm.ppc.altivec.vrlqmi(<1 x i128> %a, <1 x i128> %c, <1 x i128> %b)
+  ret <1 x i128> %tmp
+}
+
+; Function Attrs: nounwind readnone
+define <1 x i128> @test_vrlqnm(<1 x i128> %a, <1 x i128> %b, <1 x i128> %c) {
+; CHECK-LABEL: test_vrlqnm:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vrlqnm v2, v2, v3
+; CHECK-NEXT:    xxland v2, v2, v4
+; CHECK-NEXT:    blr
+entry:
+  %0 = tail call <1 x i128> @llvm.ppc.altivec.vrlqnm(<1 x i128> %a, <1 x i128> %b)
+  %tmp = and <1 x i128> %0, %c
+  ret <1 x i128> %tmp
+}
+
+; Function Attrs: nounwind readnone
+declare <1 x i128> @llvm.ppc.altivec.vrlqmi(<1 x i128>, <1 x i128>, <1 x i128>)
+
+; Function Attrs: nounwind readnone
+declare <1 x i128> @llvm.ppc.altivec.vrlqnm(<1 x i128>, <1 x i128>)
Index: llvm/lib/Target/PowerPC/PPCInstrPrefix.td
===================================================================
--- llvm/lib/Target/PowerPC/PPCInstrPrefix.td
+++ llvm/lib/Target/PowerPC/PPCInstrPrefix.td
@@ -1285,19 +1285,25 @@
                                "vcmpuq $BF, $vA, $vB", IIC_VecGeneral, []>;
   def VCMPSQ : VXForm_BF3_VAB5<321, (outs crrc:$BF), (ins vrrc:$vA, vrrc:$vB),
                                "vcmpsq $BF, $vA, $vB", IIC_VecGeneral, []>;
-  def VRLQNM : VX1_VT5_VA5_VB5<325, "vrlqnm", []>;
-  def VRLQMI : VXForm_1<69, (outs vrrc:$vD),
-                        (ins vrrc:$vA, vrrc:$vB, vrrc:$vDi),
-                        "vrlqmi $vD, $vA, $vB", IIC_VecFP, []>,
-                        RegConstraint<"$vDi = $vD">, NoEncode<"$vDi">;
   def VSLQ : VX1_VT5_VA5_VB5<261, "vslq", []>;
   def VSRAQ : VX1_VT5_VA5_VB5<773, "vsraq", []>;
   def VSRQ : VX1_VT5_VA5_VB5<517, "vsrq", []>;
-  def VRLQ : VX1_VT5_VA5_VB5<5, "vrlq", []>;
   def XSCVQPUQZ : X_VT5_XO5_VB5<63, 0, 836, "xscvqpuqz", []>;
   def XSCVQPSQZ : X_VT5_XO5_VB5<63, 8, 836, "xscvqpsqz", []>;
   def XSCVUQQP : X_VT5_XO5_VB5<63, 3, 836, "xscvuqqp", []>;
   def XSCVSQQP : X_VT5_XO5_VB5<63, 11, 836, "xscvsqqp", []>;
+  def VRLQ : VX1_VT5_VA5_VB5<5, "vrlq", []>;
+  def VRLQNM : VX1_VT5_VA5_VB5<325, "vrlqnm",
+                               [(set v1i128:$vD,
+                                   (int_ppc_altivec_vrlqnm v1i128:$vA,
+                                                           v1i128:$vB))]>;
+  def VRLQMI : VXForm_1<69, (outs vrrc:$vD),
+                        (ins vrrc:$vA, vrrc:$vB, vrrc:$vDi),
+                        "vrlqmi $vD, $vA, $vB", IIC_VecFP,
+                        [(set v1i128:$vD,
+                           (int_ppc_altivec_vrlqmi v1i128:$vA, v1i128:$vB,
+                                                   v1i128:$vDi))]>,
+                        RegConstraint<"$vDi = $vD">, NoEncode<"$vDi">;
 }
 
 //---------------------------- Anonymous Patterns ----------------------------//
@@ -1344,6 +1350,9 @@
              (v1i128 (COPY_TO_REGCLASS (LXVRWX xoaddr:$src), VRRC))>;
   def : Pat <(v1i128 (PPClxvrzx xoaddr:$src, 64)),
              (v1i128 (COPY_TO_REGCLASS (LXVRDX xoaddr:$src), VRRC))>;
+
+  def : Pat<(v1i128 (rotl v1i128:$vA, v1i128:$vB)),
+            (v1i128 (VRLQ v1i128:$vA, v1i128:$vB))>;
 }
 
 let AddedComplexity = 400, Predicates = [IsISA3_1] in {
Index: llvm/lib/Target/PowerPC/PPCISelLowering.cpp
===================================================================
--- llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -884,6 +884,7 @@
       setOperationAction(ISD::SREM, MVT::v2i64, Legal);
       setOperationAction(ISD::UREM, MVT::v4i32, Legal);
       setOperationAction(ISD::SREM, MVT::v4i32, Legal);
+      setOperationAction(ISD::ROTL, MVT::v1i128, Legal);
     }
 
     setOperationAction(ISD::MUL, MVT::v8i16, Legal);
Index: llvm/include/llvm/IR/IntrinsicsPowerPC.td
===================================================================
--- llvm/include/llvm/IR/IntrinsicsPowerPC.td
+++ llvm/include/llvm/IR/IntrinsicsPowerPC.td
@@ -949,6 +949,14 @@
       PowerPC_Vec_Intrinsic<"vrldmi", [llvm_v2i64_ty],
                             [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty],
                             [IntrNoMem]>;
+def int_ppc_altivec_vrlqnm :
+      PowerPC_Vec_Intrinsic<"vrlqnm", [llvm_v1i128_ty],
+                            [llvm_v1i128_ty, llvm_v1i128_ty],
+                            [IntrNoMem]>;
+def int_ppc_altivec_vrlqmi :
+      PowerPC_Vec_Intrinsic<"vrlqmi", [llvm_v1i128_ty],
+                            [llvm_v1i128_ty, llvm_v1i128_ty, llvm_v1i128_ty],
+                            [IntrNoMem]>;
 
 // Vector Divide Extended Intrinsics.
 def int_ppc_altivec_vdivesw : PowerPC_Vec_WWW_Intrinsic<"vdivesw">;
Index: clang/test/CodeGen/builtins-ppc-p10vector.c
===================================================================
--- clang/test/CodeGen/builtins-ppc-p10vector.c
+++ clang/test/CodeGen/builtins-ppc-p10vector.c
@@ -1,10 +1,10 @@
 // REQUIRES: powerpc-registered-target
 // RUN: %clang_cc1 -target-feature +vsx \
 // RUN:   -target-cpu pwr10 -triple powerpc64-unknown-unknown -emit-llvm %s \
-// RUN:   -o - | FileCheck %s -check-prefixes=CHECK-BE,CHECK
+// RUN:   -o - | FileCheck %s -check-prefixes=CHECK-BE,CHECK -check-prefix=CHECK-COMMON
 // RUN: %clang_cc1 -target-feature +vsx \
 // RUN:   -target-cpu pwr10 -triple powerpc64le-unknown-unknown -emit-llvm %s \
-// RUN:   -o - | FileCheck %s -check-prefixes=CHECK-LE,CHECK
+// RUN:   -o - | FileCheck %s -check-prefixes=CHECK-LE,CHECK -check-prefix=CHECK-COMMON
 
 #include <altivec.h>
 
@@ -18,6 +18,7 @@
 vector signed long long vslla, vsllb;
 vector unsigned long long vulla, vullb, vullc;
 vector unsigned __int128 vui128a, vui128b, vui128c;
+vector signed __int128 vsi128a, vsi128b, vsi128c;
 vector float vfa, vfb;
 vector double vda, vdb;
 signed int *iap;
@@ -971,3 +972,49 @@
   // CHECK: ret <1 x i128>
   return vec_xl_zext(llb, ullap);
 }
+
+vector signed __int128 test_vec_rl_s128(void) {
+  // CHECK-COMMON-LABEL: @test_vec_rl_s128(
+  // CHECK-COMMON: sub <1 x i128>
+  // CHECK-COMMON-NEXT: lshr <1 x i128>
+  // CHECK-COMMON-NEXT: or <1 x i128>
+  // CHECK-COMMON-NEXT: ret <1 x i128>
+  return vec_rl(vsi128a, vsi128b);
+}
+
+vector unsigned __int128 test_vec_rl_u128(void) {
+  // CHECK-COMMON-LABEL: @test_vec_rl_u128(
+  // CHECK-COMMON: sub <1 x i128>
+  // CHECK-COMMON: lshr <1 x i128>
+  // CHECK-COMMON: or <1 x i128>
+  // CHECK-COMMON-NEXT: ret <1 x i128>
+  return vec_rl(vui128a, vui128b);
+}
+
+vector signed __int128 test_vec_rlnm_s128(void) {
+  // CHECK-COMMON-LABEL: @test_vec_rlnm_s128(
+  // CHECK-COMMON: call <1 x i128> @llvm.ppc.altivec.vrlqnm(<1 x i128>
+  // CHECK-COMMON-NEXT: ret <1 x i128>
+  return vec_rlnm(vsi128a, vsi128b, vsi128c);
+}
+
+vector unsigned __int128 test_vec_rlnm_u128(void) {
+  // CHECK-COMMON-LABEL: @test_vec_rlnm_u128(
+  // CHECK-COMMON: call <1 x i128> @llvm.ppc.altivec.vrlqnm(<1 x i128>
+  // CHECK-COMMON-NEXT: ret <1 x i128>
+  return vec_rlnm(vui128a, vui128b, vui128c);
+}
+
+vector signed __int128 test_vec_rlmi_s128(void) {
+  // CHECK-COMMON-LABEL: @test_vec_rlmi_s128(
+  // CHECK-COMMON: call <1 x i128> @llvm.ppc.altivec.vrlqmi(<1 x i128>
+  // CHECK-COMMON-NEXT: ret <1 x i128>
+  return vec_rlmi(vsi128a, vsi128b, vsi128c);
+}
+
+vector unsigned __int128 test_vec_rlmi_u128(void) {
+  // CHECK-COMMON-LABEL: @test_vec_rlmi_u128(
+  // CHECK-COMMON: call <1 x i128> @llvm.ppc.altivec.vrlqmi(<1 x i128>
+  // CHECK-COMMON-NEXT: ret <1 x i128>
+  return vec_rlmi(vui128a, vui128b, vui128c);
++
Index: clang/lib/Headers/altivec.h
===================================================================
--- clang/lib/Headers/altivec.h
+++ clang/lib/Headers/altivec.h
@@ -7675,6 +7675,18 @@
 }
 #endif
 
+#ifdef __POWER10_VECTOR__
+static __inline__ vector signed __int128 __ATTRS_o_ai
+vec_rl(vector signed __int128 __a, vector unsigned __int128 __b) {
+  return (__b << __a)|(__b >> ((__CHAR_BIT__ * sizeof(vector signed __int128)) - __a));
+}
+
+static __inline__ vector unsigned __int128 __ATTRS_o_ai
+vec_rl(vector unsigned __int128 __a, vector unsigned __int128 __b) {
+  return (__b << __a)|(__b >> ((__CHAR_BIT__ * sizeof(vector unsigned __int128)) - __a));
+}
+#endif
+
 /* vec_rlmi */
 #ifdef __POWER9_VECTOR__
 static __inline__ vector unsigned int __ATTRS_o_ai
@@ -7688,8 +7700,24 @@
          vector unsigned long long __c) {
   return __builtin_altivec_vrldmi(__a, __c, __b);
 }
+#endif
+
+#ifdef __POWER10_VECTOR__
+static __inline__ vector unsigned __int128 __ATTRS_o_ai
+vec_rlmi(vector unsigned __int128 __a, vector unsigned __int128 __b,
+         vector unsigned __int128 __c) {
+  return __builtin_altivec_vrlqmi(__a, __c, __b);
+}
+
+static __inline__ vector signed __int128 __ATTRS_o_ai
+vec_rlmi(vector signed __int128 __a, vector signed __int128 __b,
+         vector signed __int128 __c) {
+  return __builtin_altivec_vrlqmi(__a, __c, __b);
+}
+#endif
 
 /* vec_rlnm */
+#ifdef __POWER9_VECTOR__
 static __inline__ vector unsigned int __ATTRS_o_ai
 vec_rlnm(vector unsigned int __a, vector unsigned int __b,
          vector unsigned int __c) {
@@ -7705,6 +7733,26 @@
 }
 #endif
 
+#ifdef __POWER10_VECTOR__
+static __inline__ vector unsigned __int128 __ATTRS_o_ai
+vec_rlnm(vector unsigned __int128 __a, vector unsigned __int128 __b,
+         vector unsigned __int128 __c) {
+  vector unsigned __int128 ShiftRotation = {0x40};
+  vector unsigned __int128 ShiftMask = {0x48};
+  return __builtin_altivec_vrlqnm(__a, ((__c << ShiftMask) |
+                                        (__b << ShiftRotation)));
+}
+
+static __inline__ vector signed __int128 __ATTRS_o_ai
+vec_rlnm(vector signed __int128 __a, vector signed __int128 __b,
+         vector signed __int128 __c) {
+  vector unsigned __int128 ShiftRotation = {0x40};
+  vector unsigned __int128 ShiftMask = {0x48};
+  return __builtin_altivec_vrlqnm(__a, ((__c << ShiftMask) |
+                                        (__b << ShiftRotation)));
+}
+#endif
+
 /* vec_vrlb */
 
 static __inline__ vector signed char __ATTRS_o_ai
Index: clang/include/clang/Basic/BuiltinsPPC.def
===================================================================
--- clang/include/clang/Basic/BuiltinsPPC.def
+++ clang/include/clang/Basic/BuiltinsPPC.def
@@ -365,6 +365,10 @@
 BUILTIN(__builtin_altivec_vextddvlx, "V2ULLiV2ULLiV2ULLiUi", "")
 BUILTIN(__builtin_altivec_vextddvrx, "V2ULLiV2ULLiV2ULLiUi", "")
 
+// P10 Vector rotate built-ins.
+BUILTIN(__builtin_altivec_vrlqmi, "V1ULLLiV1ULLLiV1ULLLiV1ULLLi", "")
+BUILTIN(__builtin_altivec_vrlqnm, "V1ULLLiV1ULLLiV1ULLLi", "")
+
 // VSX built-ins.
 
 BUILTIN(__builtin_vsx_lxvd2x, "V2divC*", "")
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to