nemanjai updated this revision to Diff 341456. nemanjai added a comment. Changed `rsqrt` to be an actual reciprocal rather than just a refined `sqrt` estimate.
I have verified that the code generated is equivalent to gcc's and the results produced are the same. Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D101209/new/ https://reviews.llvm.org/D101209 Files: clang/include/clang/Basic/BuiltinsPPC.def clang/lib/CodeGen/CGBuiltin.cpp clang/lib/Headers/altivec.h clang/test/CodeGen/builtins-ppc-altivec.c clang/test/CodeGen/builtins-ppc-vsx.c
Index: clang/test/CodeGen/builtins-ppc-vsx.c =================================================================== --- clang/test/CodeGen/builtins-ppc-vsx.c +++ clang/test/CodeGen/builtins-ppc-vsx.c @@ -2283,3 +2283,21 @@ // CHECK-NEXT: call <2 x double> @llvm.copysign.v2f64(<2 x double> [[RA]], <2 x double> [[RB]]) __builtin_vsx_xvcpsgndp(a, b); } + +vector double test_recipdivd(vector double a, vector double b) { + // CHECK-LABEL: test_recipdivd + // CHECK: fdiv fast <2 x double> + // CHECK-LE-LABEL: test_recipdivd + // CHECK-LE: fdiv fast <2 x double> + return vec_recipdiv(a, b); +} + +vector double test_rsqrtd(vector double a, vector double b) { + // CHECK-LABEL: test_rsqrtd + // CHECK: call fast <2 x double> @llvm.sqrt.v2f64 + // CHECK: fdiv fast <2 x double> <double 1.000000e+00, double 1.000000e+00> + // CHECK-LE-LABEL: test_rsqrtd + // CHECK-LE: call fast <2 x double> @llvm.sqrt.v2f64 + // CHECK-LE: fdiv fast <2 x double> <double 1.000000e+00, double 1.000000e+00> + return vec_rsqrt(a); +} Index: clang/test/CodeGen/builtins-ppc-altivec.c =================================================================== --- clang/test/CodeGen/builtins-ppc-altivec.c +++ clang/test/CodeGen/builtins-ppc-altivec.c @@ -9577,3 +9577,21 @@ // CHECK: store <4 x float> %{{[0-9]+}}, <4 x float>* %{{[0-9]+}}, align 1 // CHECK-LE: call void @llvm.ppc.vsx.stxvw4x.be(<4 x i32> %{{[0-9]+}}, i8* %{{[0-9]+}}) } + +vector float test_rsqrtf(vector float a, vector float b) { + // CHECK-LABEL: test_rsqrtf + // CHECK: call fast <4 x float> @llvm.sqrt.v4f32 + // CHECK: fdiv fast <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00> + // CHECK-LE-LABEL: test_rsqrtf + // CHECK-LE: call fast <4 x float> @llvm.sqrt.v4f32 + // CHECK-LE: fdiv fast <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00> + return vec_rsqrt(a); +} + +vector float test_recipdivf(vector float a, vector float b) { + // CHECK-LABEL: test_recipdivf + // CHECK: fdiv fast <4 x float> + // CHECK-LE-LABEL: test_recipdivf + // CHECK-LE: fdiv fast <4 x float> + return vec_recipdiv(a, b); +} Index: clang/lib/Headers/altivec.h =================================================================== --- clang/lib/Headers/altivec.h +++ clang/lib/Headers/altivec.h @@ -8359,6 +8359,16 @@ } #endif +static vector float __ATTRS_o_ai vec_rsqrt(vector float __a) { + return __builtin_ppc_rsqrtf(__a); +} + +#ifdef __VSX__ +static vector double __ATTRS_o_ai vec_rsqrt(vector double __a) { + return __builtin_ppc_rsqrtd(__a); +} +#endif + /* vec_vrsqrtefp */ static __inline__ __vector float __attribute__((__always_inline__)) @@ -17897,6 +17907,18 @@ return __builtin_altivec_vminsb(__a, -__a); } +static vector float __ATTRS_o_ai vec_recipdiv(vector float __a, + vector float __b) { + return __builtin_ppc_recipdivf(__a, __b); +} + +#ifdef __VSX__ +static vector double __ATTRS_o_ai vec_recipdiv(vector double __a, + vector double __b) { + return __builtin_ppc_recipdivd(__a, __b); +} +#endif + #ifdef __POWER10_VECTOR__ /* vec_extractm */ Index: clang/lib/CodeGen/CGBuiltin.cpp =================================================================== --- clang/lib/CodeGen/CGBuiltin.cpp +++ clang/lib/CodeGen/CGBuiltin.cpp @@ -15114,6 +15114,25 @@ return Builder.CreateCall(F, X); } + // Fastmath by default + case PPC::BI__builtin_ppc_recipdivf: + case PPC::BI__builtin_ppc_recipdivd: + case PPC::BI__builtin_ppc_rsqrtf: + case PPC::BI__builtin_ppc_rsqrtd: { + Builder.getFastMathFlags().setFast(); + llvm::Type *ResultType = ConvertType(E->getType()); + Value *X = EmitScalarExpr(E->getArg(0)); + + if (BuiltinID == PPC::BI__builtin_ppc_recipdivf || + BuiltinID == PPC::BI__builtin_ppc_recipdivd) { + Value *Y = EmitScalarExpr(E->getArg(1)); + return Builder.CreateFDiv(X, Y, "recipdiv"); + } + auto *One = ConstantFP::get(ResultType, 1.0); + llvm::Function *F = CGM.getIntrinsic(Intrinsic::sqrt, ResultType); + return Builder.CreateFDiv(One, Builder.CreateCall(F, X), "rsqrt"); + } + // FMA variations case PPC::BI__builtin_vsx_xvmaddadp: case PPC::BI__builtin_vsx_xvmaddasp: Index: clang/include/clang/Basic/BuiltinsPPC.def =================================================================== --- clang/include/clang/Basic/BuiltinsPPC.def +++ clang/include/clang/Basic/BuiltinsPPC.def @@ -600,6 +600,12 @@ BUILTIN(__builtin_vsx_scalar_extract_expq, "ULLiLLd", "") BUILTIN(__builtin_vsx_scalar_insert_exp_qp, "LLdLLdULLi", "") +// Fastmath by default builtins +BUILTIN(__builtin_ppc_rsqrtf, "V4fV4f", "") +BUILTIN(__builtin_ppc_rsqrtd, "V2dV2d", "") +BUILTIN(__builtin_ppc_recipdivf, "V4fV4fV4f", "") +BUILTIN(__builtin_ppc_recipdivd, "V2dV2dV2d", "") + // HTM builtins BUILTIN(__builtin_tbegin, "UiUIi", "") BUILTIN(__builtin_tend, "UiUIi", "")
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits