[PATCH] D88278: [PowerPC] Add builtins for xvtdiv(dp|sp) and xvtsqrt(dp|sp).

EsmeYi via Phabricator via cfe-commits Thu, 24 Sep 2020 22:42:08 -0700

Esme created this revision.
Esme added reviewers: steven.zhang, masoud.ataei, shchenz, jsji, qiucf.
Herald added subscribers: llvm-commits, cfe-commits, kbarton, hiraditya, 
nemanjai.
Herald added projects: clang, LLVM.
Esme requested review of this revision.


This patch implements the builtins for `xvtdivdp, xvtdivsp, xvtsqrtdp, 
xvtsqrtsp`.
The instructions correspond to the following builtins:

  int vec_test_swdiv(vector double v1, vector double v2);
  int vec_test_swdivs(vector float v1, vector float v2);
  int vec_test_swsqrt(vector double v1, vector double v2);
  int vec_test_swsqrts(vector float v1, vector float v2);

This patch depends on D88274 <https://reviews.llvm.org/D88274>, which fixes the 
bug in copying from CRRC to GRC.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D88278

Files:
  clang/include/clang/Basic/BuiltinsPPC.def
  clang/lib/Headers/altivec.h
  clang/test/CodeGen/builtins-ppc-vsx.c
  llvm/include/llvm/IR/IntrinsicsPowerPC.td
  llvm/lib/Target/PowerPC/PPCInstrVSX.td
  llvm/test/CodeGen/PowerPC/vsx_builtins.ll

Index: llvm/test/CodeGen/PowerPC/vsx_builtins.ll
===================================================================
--- llvm/test/CodeGen/PowerPC/vsx_builtins.ll
+++ llvm/test/CodeGen/PowerPC/vsx_builtins.ll
@@ -54,3 +54,55 @@
 }
 ; Function Attrs: nounwind readnone
 declare void @llvm.ppc.vsx.stxvd2x.be(<2 x double>, i8*)
+
+define i32 @test_vec_test_swdiv(<2 x double> %a, <2 x double> %b) {
+; CHECK-LABEL: test_vec_test_swdiv:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xvtdivdp cr0, v2, v3
+; CHECK-NEXT:    mfocrf r3, 128
+; CHECK-NEXT:    srwi r3, r3, 28
+; CHECK-NEXT:    blr
+  entry:
+    %0 = tail call i32 @llvm.ppc.vsx.xvtdivdp(<2 x double> %a, <2 x double> %b)
+    ret i32 %0
+}
+declare i32 @llvm.ppc.vsx.xvtdivdp(<2 x double>, <2 x double>)
+
+define i32 @test_vec_test_swdivs(<4 x float> %a, <4 x float> %b) {
+; CHECK-LABEL: test_vec_test_swdivs:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xvtdivsp cr0, v2, v3
+; CHECK-NEXT:    mfocrf r3, 128
+; CHECK-NEXT:    srwi r3, r3, 28
+; CHECK-NEXT:    blr
+  entry:
+    %0 = tail call i32 @llvm.ppc.vsx.xvtdivsp(<4 x float> %a, <4 x float> %b)
+    ret i32 %0
+}
+declare i32 @llvm.ppc.vsx.xvtdivsp(<4 x float>, <4 x float>)
+
+define i32 @test_vec_test_swsqrt(<2 x double> %a) {
+; CHECK-LABEL: test_vec_test_swsqrt:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xvtsqrtdp cr0, v2
+; CHECK-NEXT:    mfocrf r3, 128
+; CHECK-NEXT:    srwi r3, r3, 28
+; CHECK-NEXT:    blr
+  entry:
+    %0 = tail call i32 @llvm.ppc.vsx.xvtsqrtdp(<2 x double> %a)
+    ret i32 %0
+}
+declare i32 @llvm.ppc.vsx.xvtsqrtdp(<2 x double>)
+
+define i32 @test_vec_test_swsqrts(<4 x float> %a) {
+; CHECK-LABEL: test_vec_test_swsqrts:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xvtsqrtsp cr0, v2
+; CHECK-NEXT:    mfocrf r3, 128
+; CHECK-NEXT:    srwi r3, r3, 28
+; CHECK-NEXT:    blr
+  entry:
+    %0 = tail call i32 @llvm.ppc.vsx.xvtsqrtsp(<4 x float> %a)
+    ret i32 %0
+}
+declare i32 @llvm.ppc.vsx.xvtsqrtsp(<4 x float>)
Index: llvm/lib/Target/PowerPC/PPCInstrVSX.td
===================================================================
--- llvm/lib/Target/PowerPC/PPCInstrVSX.td
+++ llvm/lib/Target/PowerPC/PPCInstrVSX.td
@@ -2581,6 +2581,16 @@
 def : Pat<(int_ppc_vsx_xvdivdp v2f64:$A, v2f64:$B),
           (XVDIVDP $A, $B)>;
 
+// Vector test software functions.
+def : Pat<(i32 (int_ppc_vsx_xvtdivdp v2f64:$A, v2f64:$B)),
+          (COPY_TO_REGCLASS (XVTDIVDP $A, $B), GPRC)>;
+def : Pat<(i32 (int_ppc_vsx_xvtdivsp v4f32:$A, v4f32:$B)),
+          (COPY_TO_REGCLASS (XVTDIVSP $A, $B), GPRC)>;
+def : Pat<(i32 (int_ppc_vsx_xvtsqrtdp v2f64:$A)),
+          (COPY_TO_REGCLASS (XVTSQRTDP $A), GPRC)>;
+def : Pat<(i32 (int_ppc_vsx_xvtsqrtsp v4f32:$A)),
+          (COPY_TO_REGCLASS (XVTSQRTSP $A), GPRC)>;
+
 // Reciprocal estimate
 def : Pat<(int_ppc_vsx_xvresp v4f32:$A),
           (XVRESP $A)>;
Index: llvm/include/llvm/IR/IntrinsicsPowerPC.td
===================================================================
--- llvm/include/llvm/IR/IntrinsicsPowerPC.td
+++ llvm/include/llvm/IR/IntrinsicsPowerPC.td
@@ -1249,6 +1249,16 @@
 def int_ppc_vsx_xvtlsbb :
       PowerPC_VSX_Intrinsic<"xvtlsbb", [llvm_i32_ty],
                             [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>;
+def int_ppc_vsx_xvtdivdp :
+      PowerPC_VSX_Intrinsic<"xvtdivdp", [llvm_i32_ty],
+                            [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>;
+def int_ppc_vsx_xvtdivsp :
+      PowerPC_VSX_Intrinsic<"xvtdivsp", [llvm_i32_ty],
+                            [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>;
+def int_ppc_vsx_xvtsqrtdp :
+      PowerPC_VSX_Intrinsic<"xvtsqrtdp", [llvm_i32_ty], [llvm_v2f64_ty], [IntrNoMem]>;
+def int_ppc_vsx_xvtsqrtsp :
+      PowerPC_VSX_Intrinsic<"xvtsqrtsp", [llvm_i32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
 def int_ppc_vsx_xxeval :
       PowerPC_VSX_Intrinsic<"xxeval", [llvm_v2i64_ty],
                            [llvm_v2i64_ty, llvm_v2i64_ty,
Index: clang/test/CodeGen/builtins-ppc-vsx.c
===================================================================
--- clang/test/CodeGen/builtins-ppc-vsx.c
+++ clang/test/CodeGen/builtins-ppc-vsx.c
@@ -52,6 +52,7 @@
 vector signed __int128 res_vslll;
 
 double res_d;
+int res_i;
 float res_af[4];
 double res_ad[2];
 signed char res_asc[16];
@@ -878,6 +879,23 @@
 // CHECK: call <2 x double> @llvm.ppc.vsx.xvrsqrtedp(<2 x double> %{{[0-9]+}})
 // CHECK-LE: call <2 x double> @llvm.ppc.vsx.xvrsqrtedp(<2 x double> %{{[0-9]+}})
 
+  res_i = vec_test_swsqrt(vd);
+// CHECK: call i32 @llvm.ppc.vsx.xvtsqrtdp(<2 x double> %{{[0-9]+}})
+// CHECK-LE: call i32 @llvm.ppc.vsx.xvtsqrtdp(<2 x double> %{{[0-9]+}})
+
+  res_i = vec_test_swsqrts(vf);
+// CHECK: call i32 @llvm.ppc.vsx.xvtsqrtsp(<4 x float> %{{[0-9]+}})
+// CHECK-LE: call i32 @llvm.ppc.vsx.xvtsqrtsp(<4 x float> %{{[0-9]+}})
+
+  res_i = vec_test_swdiv(vd, vd);
+// CHECK: call i32 @llvm.ppc.vsx.xvtdivdp(<2 x double> %{{[0-9]+}}, <2 x double> %{{[0-9]+}})
+// CHECK-LE: call i32 @llvm.ppc.vsx.xvtdivdp(<2 x double> %{{[0-9]+}}, <2 x double> %{{[0-9]+}})
+
+  res_i = vec_test_swdivs(vf, vf);
+// CHECK: call i32 @llvm.ppc.vsx.xvtdivsp(<4 x float> %{{[0-9]+}}, <4 x float> %{{[0-9]+}})
+// CHECK-LE: call i32 @llvm.ppc.vsx.xvtdivsp(<4 x float> %{{[0-9]+}}, <4 x float> %{{[0-9]+}})
+
+
   dummy();
 // CHECK: call void @dummy()
 // CHECK-LE: call void @dummy()
Index: clang/lib/Headers/altivec.h
===================================================================
--- clang/lib/Headers/altivec.h
+++ clang/lib/Headers/altivec.h
@@ -3504,6 +3504,20 @@
 }
 #endif __POWER10_VECTOR__
 
+/* vec_xvtdiv */
+
+#ifdef __VSX__
+static __inline__ int __ATTRS_o_ai vec_test_swdiv(vector double __a,
+                                                  vector double __b) {
+  return __builtin_vsx_xvtdivdp(__a, __b);
+}
+
+static __inline__ int __ATTRS_o_ai vec_test_swdivs(vector float __a,
+                                                   vector float __b) {
+  return __builtin_vsx_xvtdivsp(__a, __b);
+}
+#endif
+
 /* vec_dss */
 
 #define vec_dss __builtin_altivec_dss
@@ -8057,6 +8071,18 @@
   return __builtin_altivec_vrsqrtefp(__a);
 }
 
+/* vec_xvtsqrt */
+
+#ifdef __VSX__
+static __inline__ int __ATTRS_o_ai vec_test_swsqrt(vector double __a) {
+  return __builtin_vsx_xvtsqrtdp(__a);
+}
+
+static __inline__ int __ATTRS_o_ai vec_test_swsqrts(vector float __a) {
+  return __builtin_vsx_xvtsqrtsp(__a);
+}
+#endif
+
 /* vec_sel */
 
 #define __builtin_altivec_vsel_4si vec_sel
Index: clang/include/clang/Basic/BuiltinsPPC.def
===================================================================
--- clang/include/clang/Basic/BuiltinsPPC.def
+++ clang/include/clang/Basic/BuiltinsPPC.def
@@ -558,6 +558,11 @@
 
 BUILTIN(__builtin_vsx_xvtlsbb, "iV16UcUi", "")
 
+BUILTIN(__builtin_vsx_xvtdivdp, "iV2dV2d", "")
+BUILTIN(__builtin_vsx_xvtdivsp, "iV4fV4f", "")
+BUILTIN(__builtin_vsx_xvtsqrtdp, "iV2d", "")
+BUILTIN(__builtin_vsx_xvtsqrtsp, "iV4f", "")
+
 // P10 Vector Permute Extended built-in.
 BUILTIN(__builtin_vsx_xxpermx, "V16UcV16UcV16UcV16UcIi", "")

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D88278: [PowerPC] Add builtins for xvtdiv(dp|sp) and xvtsqrt(dp|sp).

Reply via email to