nemanjai updated this revision to Diff 341456.
nemanjai added a comment.

Changed `rsqrt` to be an actual reciprocal rather than just a refined `sqrt` 
estimate.

I have verified that the code generated is equivalent to gcc's and the results 
produced are the same.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D101209/new/

https://reviews.llvm.org/D101209

Files:
  clang/include/clang/Basic/BuiltinsPPC.def
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/lib/Headers/altivec.h
  clang/test/CodeGen/builtins-ppc-altivec.c
  clang/test/CodeGen/builtins-ppc-vsx.c

Index: clang/test/CodeGen/builtins-ppc-vsx.c
===================================================================
--- clang/test/CodeGen/builtins-ppc-vsx.c
+++ clang/test/CodeGen/builtins-ppc-vsx.c
@@ -2283,3 +2283,21 @@
 // CHECK-NEXT: call <2 x double> @llvm.copysign.v2f64(<2 x double> [[RA]], <2 x double> [[RB]])
   __builtin_vsx_xvcpsgndp(a, b);
 }
+
+vector double test_recipdivd(vector double a, vector double b) {
+  // CHECK-LABEL: test_recipdivd
+  // CHECK: fdiv fast <2 x double>
+  // CHECK-LE-LABEL: test_recipdivd
+  // CHECK-LE: fdiv fast <2 x double>
+  return vec_recipdiv(a, b);
+}
+
+vector double test_rsqrtd(vector double a, vector double b) {
+  // CHECK-LABEL: test_rsqrtd
+  // CHECK: call fast <2 x double> @llvm.sqrt.v2f64
+  // CHECK: fdiv fast <2 x double> <double 1.000000e+00, double 1.000000e+00>
+  // CHECK-LE-LABEL: test_rsqrtd
+  // CHECK-LE: call fast <2 x double> @llvm.sqrt.v2f64
+  // CHECK-LE: fdiv fast <2 x double> <double 1.000000e+00, double 1.000000e+00>
+  return vec_rsqrt(a);
+}
Index: clang/test/CodeGen/builtins-ppc-altivec.c
===================================================================
--- clang/test/CodeGen/builtins-ppc-altivec.c
+++ clang/test/CodeGen/builtins-ppc-altivec.c
@@ -9577,3 +9577,21 @@
   // CHECK: store <4 x float> %{{[0-9]+}}, <4 x float>* %{{[0-9]+}}, align 1
   // CHECK-LE: call void @llvm.ppc.vsx.stxvw4x.be(<4 x i32> %{{[0-9]+}}, i8* %{{[0-9]+}})
 }
+
+vector float test_rsqrtf(vector float a, vector float b) {
+  // CHECK-LABEL: test_rsqrtf
+  // CHECK: call fast <4 x float> @llvm.sqrt.v4f32
+  // CHECK: fdiv fast <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
+  // CHECK-LE-LABEL: test_rsqrtf
+  // CHECK-LE: call fast <4 x float> @llvm.sqrt.v4f32
+  // CHECK-LE: fdiv fast <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
+  return vec_rsqrt(a);
+}
+
+vector float test_recipdivf(vector float a, vector float b) {
+  // CHECK-LABEL: test_recipdivf
+  // CHECK: fdiv fast <4 x float>
+  // CHECK-LE-LABEL: test_recipdivf
+  // CHECK-LE: fdiv fast <4 x float>
+  return vec_recipdiv(a, b);
+}
Index: clang/lib/Headers/altivec.h
===================================================================
--- clang/lib/Headers/altivec.h
+++ clang/lib/Headers/altivec.h
@@ -8359,6 +8359,16 @@
 }
 #endif
 
+static vector float __ATTRS_o_ai vec_rsqrt(vector float __a) {
+  return __builtin_ppc_rsqrtf(__a);
+}
+
+#ifdef __VSX__
+static vector double __ATTRS_o_ai vec_rsqrt(vector double __a) {
+  return __builtin_ppc_rsqrtd(__a);
+}
+#endif
+
 /* vec_vrsqrtefp */
 
 static __inline__ __vector float __attribute__((__always_inline__))
@@ -17897,6 +17907,18 @@
   return __builtin_altivec_vminsb(__a, -__a);
 }
 
+static vector float __ATTRS_o_ai vec_recipdiv(vector float __a,
+                                              vector float __b) {
+  return __builtin_ppc_recipdivf(__a, __b);
+}
+
+#ifdef __VSX__
+static vector double __ATTRS_o_ai vec_recipdiv(vector double __a,
+                                               vector double __b) {
+  return __builtin_ppc_recipdivd(__a, __b);
+}
+#endif
+
 #ifdef __POWER10_VECTOR__
 
 /* vec_extractm */
Index: clang/lib/CodeGen/CGBuiltin.cpp
===================================================================
--- clang/lib/CodeGen/CGBuiltin.cpp
+++ clang/lib/CodeGen/CGBuiltin.cpp
@@ -15114,6 +15114,25 @@
     return Builder.CreateCall(F, X);
   }
 
+  // Fastmath by default
+  case PPC::BI__builtin_ppc_recipdivf:
+  case PPC::BI__builtin_ppc_recipdivd:
+  case PPC::BI__builtin_ppc_rsqrtf:
+  case PPC::BI__builtin_ppc_rsqrtd: {
+    Builder.getFastMathFlags().setFast();
+    llvm::Type *ResultType = ConvertType(E->getType());
+    Value *X = EmitScalarExpr(E->getArg(0));
+
+    if (BuiltinID == PPC::BI__builtin_ppc_recipdivf ||
+        BuiltinID == PPC::BI__builtin_ppc_recipdivd) {
+      Value *Y = EmitScalarExpr(E->getArg(1));
+      return Builder.CreateFDiv(X, Y, "recipdiv");
+    }
+    auto *One = ConstantFP::get(ResultType, 1.0);
+    llvm::Function *F = CGM.getIntrinsic(Intrinsic::sqrt, ResultType);
+    return Builder.CreateFDiv(One, Builder.CreateCall(F, X), "rsqrt");
+  }
+
   // FMA variations
   case PPC::BI__builtin_vsx_xvmaddadp:
   case PPC::BI__builtin_vsx_xvmaddasp:
Index: clang/include/clang/Basic/BuiltinsPPC.def
===================================================================
--- clang/include/clang/Basic/BuiltinsPPC.def
+++ clang/include/clang/Basic/BuiltinsPPC.def
@@ -600,6 +600,12 @@
 BUILTIN(__builtin_vsx_scalar_extract_expq, "ULLiLLd", "")
 BUILTIN(__builtin_vsx_scalar_insert_exp_qp, "LLdLLdULLi", "")
 
+// Fastmath by default builtins
+BUILTIN(__builtin_ppc_rsqrtf, "V4fV4f", "")
+BUILTIN(__builtin_ppc_rsqrtd, "V2dV2d", "")
+BUILTIN(__builtin_ppc_recipdivf, "V4fV4fV4f", "")
+BUILTIN(__builtin_ppc_recipdivd, "V2dV2dV2d", "")
+
 // HTM builtins
 BUILTIN(__builtin_tbegin, "UiUIi", "")
 BUILTIN(__builtin_tend, "UiUIi", "")
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to