Author: Wenju He Date: 2025-10-20T10:08:35+08:00 New Revision: 377148fd60df55e486dc72b26248006ae092725a
URL: https://github.com/llvm/llvm-project/commit/377148fd60df55e486dc72b26248006ae092725a DIFF: https://github.com/llvm/llvm-project/commit/377148fd60df55e486dc72b26248006ae092725a.diff LOG: [libclc] Move functions definition from header clc_sincos_piby4.inc into clc_sincos_helpers.cl (#164028) inline functions defined in clc_sincos_piby4.inc miss static specifier and are deleted by EliminateAvailableExternallyPass when not inlined. This PR fix the problem by removing inline and moving function definition into clc/lib/generic/math/clc_sincos_helpers.cl. It makes sense to put all sin/cos helpers definitions in one file clc_sincos_helpers.cl. Added: Modified: libclc/clc/include/clc/math/clc_sincos_helpers.inc libclc/clc/include/clc/math/clc_sincos_helpers_fp64.inc libclc/clc/lib/generic/math/clc_cos.cl libclc/clc/lib/generic/math/clc_cospi.cl libclc/clc/lib/generic/math/clc_sin.cl libclc/clc/lib/generic/math/clc_sincos_helpers.inc libclc/clc/lib/generic/math/clc_sincos_helpers_fp64.inc libclc/clc/lib/generic/math/clc_sinpi.cl libclc/clc/lib/generic/math/clc_tan.cl libclc/clc/lib/generic/math/clc_tanpi.cl Removed: libclc/clc/include/clc/math/clc_sincos_piby4.h libclc/clc/include/clc/math/clc_sincos_piby4.inc ################################################################################ diff --git a/libclc/clc/include/clc/math/clc_sincos_helpers.inc b/libclc/clc/include/clc/math/clc_sincos_helpers.inc index 4daff92955cd7..0a3b816cb8c89 100644 --- a/libclc/clc/include/clc/math/clc_sincos_helpers.inc +++ b/libclc/clc/include/clc/math/clc_sincos_helpers.inc @@ -10,6 +10,11 @@ _CLC_DECL _CLC_OVERLOAD __CLC_FLOATN __clc_sinf_piby4(__CLC_FLOATN x, __CLC_FLOATN y); _CLC_DECL _CLC_OVERLOAD __CLC_FLOATN __clc_cosf_piby4(__CLC_FLOATN x, __CLC_FLOATN y); + +_CLC_DECL _CLC_OVERLOAD void __clc_sincos_piby4(__CLC_FLOATN x, + private __CLC_FLOATN *sinval, + private __CLC_FLOATN *cosval); + _CLC_DECL _CLC_OVERLOAD __CLC_FLOATN __clc_tanf_piby4(__CLC_FLOATN x, __CLC_INTN regn); diff --git a/libclc/clc/include/clc/math/clc_sincos_helpers_fp64.inc b/libclc/clc/include/clc/math/clc_sincos_helpers_fp64.inc index 09c6e1c965f64..15934cab32751 100644 --- a/libclc/clc/include/clc/math/clc_sincos_helpers_fp64.inc +++ b/libclc/clc/include/clc/math/clc_sincos_helpers_fp64.inc @@ -6,6 +6,15 @@ // //===----------------------------------------------------------------------===// +_CLC_DECL _CLC_OVERLOAD void __clc_sincos_piby4(__CLC_DOUBLEN x, + __CLC_DOUBLEN xx, + private __CLC_DOUBLEN *sinval, + private __CLC_DOUBLEN *cosval); + +_CLC_DECL _CLC_OVERLOAD void __clc_tan_piby4(__CLC_DOUBLEN x, __CLC_DOUBLEN xx, + private __CLC_DOUBLEN *leadval, + private __CLC_DOUBLEN *tailval); + _CLC_DECL _CLC_OVERLOAD void __clc_remainder_piby2_medium(__CLC_DOUBLEN x, private __CLC_DOUBLEN *r, private __CLC_DOUBLEN *rr, diff --git a/libclc/clc/include/clc/math/clc_sincos_piby4.h b/libclc/clc/include/clc/math/clc_sincos_piby4.h deleted file mode 100644 index 50608ae24e947..0000000000000 --- a/libclc/clc/include/clc/math/clc_sincos_piby4.h +++ /dev/null @@ -1,14 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include <clc/math/clc_fma.h> -#include <clc/math/clc_mad.h> -#include <clc/math/math.h> - -#define __CLC_BODY <clc/math/clc_sincos_piby4.inc> -#include <clc/math/gentype.inc> diff --git a/libclc/clc/include/clc/math/clc_sincos_piby4.inc b/libclc/clc/include/clc/math/clc_sincos_piby4.inc deleted file mode 100644 index 91ec518b70e97..0000000000000 --- a/libclc/clc/include/clc/math/clc_sincos_piby4.inc +++ /dev/null @@ -1,174 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#if __CLC_FPSIZE == 32 - -// Evaluate single precisions in and cos of value in interval [-pi/4, pi/4] -_CLC_INLINE _CLC_OVERLOAD void -__clc_sincos_piby4(__CLC_GENTYPE x, private __CLC_GENTYPE *sinval, - private __CLC_GENTYPE *cosval) { - // Taylor series for sin(x) is x - x^3/3! + x^5/5! - x^7/7! ... - // = x * (1 - x^2/3! + x^4/5! - x^6/7! ... - // = x * f(w) - // where w = x*x and f(w) = (1 - w/3! + w^2/5! - w^3/7! ... - // We use a minimax approximation of (f(w) - 1) / w - // because this produces an expansion in even powers of x. - - // Taylor series for cos(x) is 1 - x^2/2! + x^4/4! - x^6/6! ... - // = f(w) - // where w = x*x and f(w) = (1 - w/2! + w^2/4! - w^3/6! ... - // We use a minimax approximation of (f(w) - 1 + w/2) / (w*w) - // because this produces an expansion in even powers of x. - - const __CLC_GENTYPE sc1 = -0.166666666638608441788607926e0F; - const __CLC_GENTYPE sc2 = 0.833333187633086262120839299e-2F; - const __CLC_GENTYPE sc3 = -0.198400874359527693921333720e-3F; - const __CLC_GENTYPE sc4 = 0.272500015145584081596826911e-5F; - - const __CLC_GENTYPE cc1 = 0.41666666664325175238031e-1F; - const __CLC_GENTYPE cc2 = -0.13888887673175665567647e-2F; - const __CLC_GENTYPE cc3 = 0.24800600878112441958053e-4F; - const __CLC_GENTYPE cc4 = -0.27301013343179832472841e-6F; - - __CLC_GENTYPE x2 = x * x; - - *sinval = __clc_mad( - x * x2, __clc_mad(x2, __clc_mad(x2, __clc_mad(x2, sc4, sc3), sc2), sc1), - x); - *cosval = __clc_mad( - x2 * x2, __clc_mad(x2, __clc_mad(x2, __clc_mad(x2, cc4, cc3), cc2), cc1), - __clc_mad(x2, -0.5f, 1.0f)); -} - -#elif __CLC_FPSIZE == 64 - -_CLC_INLINE _CLC_OVERLOAD void -__clc_sincos_piby4(__CLC_GENTYPE x, __CLC_GENTYPE xx, - private __CLC_GENTYPE *sinval, - private __CLC_GENTYPE *cosval) { - // Taylor series for sin(x) is x - x^3/3! + x^5/5! - x^7/7! ... - // = x * (1 - x^2/3! + x^4/5! - x^6/7! ... - // = x * f(w) - // where w = x*x and f(w) = (1 - w/3! + w^2/5! - w^3/7! ... - // We use a minimax approximation of (f(w) - 1) / w - // because this produces an expansion in even powers of x. - // If xx (the tail of x) is non-zero, we add a correction - // term g(x,xx) = (1-x*x/2)*xx to the result, where g(x,xx) - // is an approximation to cos(x)*sin(xx) valid because - // xx is tiny relative to x. - - // Taylor series for cos(x) is 1 - x^2/2! + x^4/4! - x^6/6! ... - // = f(w) - // where w = x*x and f(w) = (1 - w/2! + w^2/4! - w^3/6! ... - // We use a minimax approximation of (f(w) - 1 + w/2) / (w*w) - // because this produces an expansion in even powers of x. - // If xx (the tail of x) is non-zero, we subtract a correction - // term g(x,xx) = x*xx to the result, where g(x,xx) - // is an approximation to sin(x)*sin(xx) valid because - // xx is tiny relative to x. - - const __CLC_GENTYPE sc1 = -0.166666666666666646259241729; - const __CLC_GENTYPE sc2 = 0.833333333333095043065222816e-2; - const __CLC_GENTYPE sc3 = -0.19841269836761125688538679e-3; - const __CLC_GENTYPE sc4 = 0.275573161037288022676895908448e-5; - const __CLC_GENTYPE sc5 = -0.25051132068021699772257377197e-7; - const __CLC_GENTYPE sc6 = 0.159181443044859136852668200e-9; - - const __CLC_GENTYPE cc1 = 0.41666666666666665390037e-1; - const __CLC_GENTYPE cc2 = -0.13888888888887398280412e-2; - const __CLC_GENTYPE cc3 = 0.248015872987670414957399e-4; - const __CLC_GENTYPE cc4 = -0.275573172723441909470836e-6; - const __CLC_GENTYPE cc5 = 0.208761463822329611076335e-8; - const __CLC_GENTYPE cc6 = -0.113826398067944859590880e-10; - - __CLC_GENTYPE x2 = x * x; - __CLC_GENTYPE x3 = x2 * x; - __CLC_GENTYPE r = (__CLC_GENTYPE)0.5 * x2; - __CLC_GENTYPE t = (__CLC_GENTYPE)1.0 - r; - - __CLC_GENTYPE sp = __clc_fma( - __clc_fma(__clc_fma(__clc_fma(sc6, x2, sc5), x2, sc4), x2, sc3), x2, sc2); - - __CLC_GENTYPE cp = - t + - __clc_fma(__clc_fma(__clc_fma(__clc_fma(__clc_fma(__clc_fma(cc6, x2, cc5), - x2, cc4), - x2, cc3), - x2, cc2), - x2, cc1), - x2 * x2, __clc_fma(x, xx, (1.0 - t) - r)); - - *sinval = - x - __clc_fma(-x3, sc1, __clc_fma(__clc_fma(-x3, sp, 0.5 * xx), x2, -xx)); - *cosval = cp; -} - -_CLC_INLINE _CLC_OVERLOAD void __clc_tan_piby4(__CLC_GENTYPE x, - __CLC_GENTYPE xx, - private __CLC_GENTYPE *leadval, - private __CLC_GENTYPE *tailval) { - // 0x3fe921fb54442d18 - const __CLC_GENTYPE piby4_lead = 7.85398163397448278999e-01; - // 0x3c81a62633145c06 - const __CLC_GENTYPE piby4_tail = 3.06161699786838240164e-17; - - // In order to maintain relative precision transform using the identity: - // tan(pi/4-x) = (1-tan(x))/(1+tan(x)) for arguments close to pi/4. - // Similarly use tan(x-pi/4) = (tan(x)-1)/(tan(x)+1) close to -pi/4. - - __CLC_LONGN ca = x > 0.68; - __CLC_LONGN cb = x < -0.68; - __CLC_GENTYPE transform = ca ? 1.0 : 0.0; - transform = cb ? -1.0 : transform; - - __CLC_GENTYPE tx = __clc_fma(-transform, x, piby4_lead) + - __clc_fma(-transform, xx, piby4_tail); - __CLC_LONGN c = ca | cb; - x = c ? tx : x; - xx = c ? 0.0 : xx; - - // Core Remez [2,3] approximation to tan(x+xx) on the interval [0,0.68]. - __CLC_GENTYPE t1 = x; - __CLC_GENTYPE r = __clc_fma(2.0, x * xx, x * x); - - __CLC_GENTYPE a = __clc_fma(r, - __clc_fma(r, 0.224044448537022097264602535574e-3, - -0.229345080057565662883358588111e-1), - 0.372379159759792203640806338901e0); - - __CLC_GENTYPE b = - __clc_fma(r, - __clc_fma(r, - __clc_fma(r, -0.232371494088563558304549252913e-3, - 0.260656620398645407524064091208e-1), - -0.515658515729031149329237816945e0), - 0.111713747927937668539901657944e1); - - __CLC_GENTYPE t2 = __clc_fma(MATH_DIVIDE(a, b), x * r, xx); - - __CLC_GENTYPE tp = t1 + t2; - - // Compute -1.0/(t1 + t2) accurately - __CLC_GENTYPE z1 = - __CLC_AS_GENTYPE(__CLC_AS_ULONGN(tp) & 0xffffffff00000000L); - __CLC_GENTYPE z2 = t2 - (z1 - t1); - __CLC_GENTYPE trec = -MATH_RECIP(tp); - __CLC_GENTYPE trec_top = - __CLC_AS_GENTYPE(__CLC_AS_ULONGN(trec) & 0xffffffff00000000L); - - __CLC_GENTYPE tpr = __clc_fma( - __clc_fma(trec_top, z2, __clc_fma(trec_top, z1, 1.0)), trec, trec_top); - - __CLC_GENTYPE tpt = transform * (1.0 - MATH_DIVIDE(2.0 * tp, 1.0 + tp)); - __CLC_GENTYPE tptr = transform * (MATH_DIVIDE(2.0 * tp, tp - 1.0) - 1.0); - - *leadval = c ? tpt : tp; - *tailval = c ? tptr : tpr; -} - -#endif diff --git a/libclc/clc/lib/generic/math/clc_cos.cl b/libclc/clc/lib/generic/math/clc_cos.cl index e7e4d6ad39ede..5529ec411a195 100644 --- a/libclc/clc/lib/generic/math/clc_cos.cl +++ b/libclc/clc/lib/generic/math/clc_cos.cl @@ -10,7 +10,6 @@ #include <clc/float/definitions.h> #include <clc/math/clc_fabs.h> #include <clc/math/clc_sincos_helpers.h> -#include <clc/math/clc_sincos_piby4.h> #include <clc/math/math.h> #include <clc/relational/clc_isinf.h> #include <clc/relational/clc_isnan.h> diff --git a/libclc/clc/lib/generic/math/clc_cospi.cl b/libclc/clc/lib/generic/math/clc_cospi.cl index 07e1b49cc9e02..6a10171c723d0 100644 --- a/libclc/clc/lib/generic/math/clc_cospi.cl +++ b/libclc/clc/lib/generic/math/clc_cospi.cl @@ -11,7 +11,6 @@ #include <clc/internal/clc.h> #include <clc/math/clc_fabs.h> #include <clc/math/clc_sincos_helpers.h> -#include <clc/math/clc_sincos_piby4.h> #include <clc/math/math.h> #define __CLC_BODY <clc_cospi.inc> diff --git a/libclc/clc/lib/generic/math/clc_sin.cl b/libclc/clc/lib/generic/math/clc_sin.cl index 741383f94c456..99338c95eb60c 100644 --- a/libclc/clc/lib/generic/math/clc_sin.cl +++ b/libclc/clc/lib/generic/math/clc_sin.cl @@ -11,7 +11,6 @@ #include <clc/internal/clc.h> #include <clc/math/clc_fabs.h> #include <clc/math/clc_sincos_helpers.h> -#include <clc/math/clc_sincos_piby4.h> #include <clc/math/clc_trunc.h> #include <clc/math/math.h> #include <clc/math/tables.h> diff --git a/libclc/clc/lib/generic/math/clc_sincos_helpers.inc b/libclc/clc/lib/generic/math/clc_sincos_helpers.inc index 9a46170a3db38..2a71b5626ccc5 100644 --- a/libclc/clc/lib/generic/math/clc_sincos_helpers.inc +++ b/libclc/clc/lib/generic/math/clc_sincos_helpers.inc @@ -74,6 +74,43 @@ _CLC_DEF _CLC_OVERLOAD __CLC_FLOATN __clc_cosf_piby4(__CLC_FLOATN x, return ret; } +// Evaluate single precisions sin and cos of value in interval [-pi/4, pi/4] +_CLC_DEF _CLC_OVERLOAD void __clc_sincos_piby4(__CLC_FLOATN x, + private __CLC_FLOATN *sinval, + private __CLC_FLOATN *cosval) { + // Taylor series for sin(x) is x - x^3/3! + x^5/5! - x^7/7! ... + // = x * (1 - x^2/3! + x^4/5! - x^6/7! ... + // = x * f(w) + // where w = x*x and f(w) = (1 - w/3! + w^2/5! - w^3/7! ... + // We use a minimax approximation of (f(w) - 1) / w + // because this produces an expansion in even powers of x. + + // Taylor series for cos(x) is 1 - x^2/2! + x^4/4! - x^6/6! ... + // = f(w) + // where w = x*x and f(w) = (1 - w/2! + w^2/4! - w^3/6! ... + // We use a minimax approximation of (f(w) - 1 + w/2) / (w*w) + // because this produces an expansion in even powers of x. + + const __CLC_FLOATN sc1 = -0.166666666638608441788607926e0F; + const __CLC_FLOATN sc2 = 0.833333187633086262120839299e-2F; + const __CLC_FLOATN sc3 = -0.198400874359527693921333720e-3F; + const __CLC_FLOATN sc4 = 0.272500015145584081596826911e-5F; + + const __CLC_FLOATN cc1 = 0.41666666664325175238031e-1F; + const __CLC_FLOATN cc2 = -0.13888887673175665567647e-2F; + const __CLC_FLOATN cc3 = 0.24800600878112441958053e-4F; + const __CLC_FLOATN cc4 = -0.27301013343179832472841e-6F; + + __CLC_FLOATN x2 = x * x; + + *sinval = __clc_mad( + x * x2, __clc_mad(x2, __clc_mad(x2, __clc_mad(x2, sc4, sc3), sc2), sc1), + x); + *cosval = __clc_mad( + x2 * x2, __clc_mad(x2, __clc_mad(x2, __clc_mad(x2, cc4, cc3), cc2), cc1), + __clc_mad(x2, -0.5f, 1.0f)); +} + _CLC_DEF _CLC_OVERLOAD __CLC_FLOATN __clc_tanf_piby4(__CLC_FLOATN x, __CLC_INTN regn) { // Core Remez [1,2] approximation to tan(x) on the interval [0,pi/4]. diff --git a/libclc/clc/lib/generic/math/clc_sincos_helpers_fp64.inc b/libclc/clc/lib/generic/math/clc_sincos_helpers_fp64.inc index 8fae90c9cc5a5..e029c6dcfaa02 100644 --- a/libclc/clc/lib/generic/math/clc_sincos_helpers_fp64.inc +++ b/libclc/clc/lib/generic/math/clc_sincos_helpers_fp64.inc @@ -6,6 +6,129 @@ // //===----------------------------------------------------------------------===// +_CLC_DEF _CLC_OVERLOAD void __clc_sincos_piby4(__CLC_DOUBLEN x, + __CLC_DOUBLEN xx, + private __CLC_DOUBLEN *sinval, + private __CLC_DOUBLEN *cosval) { + // Taylor series for sin(x) is x - x^3/3! + x^5/5! - x^7/7! ... + // = x * (1 - x^2/3! + x^4/5! - x^6/7! ... + // = x * f(w) + // where w = x*x and f(w) = (1 - w/3! + w^2/5! - w^3/7! ... + // We use a minimax approximation of (f(w) - 1) / w + // because this produces an expansion in even powers of x. + // If xx (the tail of x) is non-zero, we add a correction + // term g(x,xx) = (1-x*x/2)*xx to the result, where g(x,xx) + // is an approximation to cos(x)*sin(xx) valid because + // xx is tiny relative to x. + + // Taylor series for cos(x) is 1 - x^2/2! + x^4/4! - x^6/6! ... + // = f(w) + // where w = x*x and f(w) = (1 - w/2! + w^2/4! - w^3/6! ... + // We use a minimax approximation of (f(w) - 1 + w/2) / (w*w) + // because this produces an expansion in even powers of x. + // If xx (the tail of x) is non-zero, we subtract a correction + // term g(x,xx) = x*xx to the result, where g(x,xx) + // is an approximation to sin(x)*sin(xx) valid because + // xx is tiny relative to x. + + const __CLC_DOUBLEN sc1 = -0.166666666666666646259241729; + const __CLC_DOUBLEN sc2 = 0.833333333333095043065222816e-2; + const __CLC_DOUBLEN sc3 = -0.19841269836761125688538679e-3; + const __CLC_DOUBLEN sc4 = 0.275573161037288022676895908448e-5; + const __CLC_DOUBLEN sc5 = -0.25051132068021699772257377197e-7; + const __CLC_DOUBLEN sc6 = 0.159181443044859136852668200e-9; + + const __CLC_DOUBLEN cc1 = 0.41666666666666665390037e-1; + const __CLC_DOUBLEN cc2 = -0.13888888888887398280412e-2; + const __CLC_DOUBLEN cc3 = 0.248015872987670414957399e-4; + const __CLC_DOUBLEN cc4 = -0.275573172723441909470836e-6; + const __CLC_DOUBLEN cc5 = 0.208761463822329611076335e-8; + const __CLC_DOUBLEN cc6 = -0.113826398067944859590880e-10; + + __CLC_DOUBLEN x2 = x * x; + __CLC_DOUBLEN x3 = x2 * x; + __CLC_DOUBLEN r = (__CLC_DOUBLEN)0.5 * x2; + __CLC_DOUBLEN t = (__CLC_DOUBLEN)1.0 - r; + + __CLC_DOUBLEN sp = __clc_fma( + __clc_fma(__clc_fma(__clc_fma(sc6, x2, sc5), x2, sc4), x2, sc3), x2, sc2); + + __CLC_DOUBLEN cp = + t + + __clc_fma(__clc_fma(__clc_fma(__clc_fma(__clc_fma(__clc_fma(cc6, x2, cc5), + x2, cc4), + x2, cc3), + x2, cc2), + x2, cc1), + x2 * x2, __clc_fma(x, xx, (1.0 - t) - r)); + + *sinval = + x - __clc_fma(-x3, sc1, __clc_fma(__clc_fma(-x3, sp, 0.5 * xx), x2, -xx)); + *cosval = cp; +} + +_CLC_DEF _CLC_OVERLOAD void __clc_tan_piby4(__CLC_DOUBLEN x, __CLC_DOUBLEN xx, + private __CLC_DOUBLEN *leadval, + private __CLC_DOUBLEN *tailval) { + // 0x3fe921fb54442d18 + const __CLC_DOUBLEN piby4_lead = 7.85398163397448278999e-01; + // 0x3c81a62633145c06 + const __CLC_DOUBLEN piby4_tail = 3.06161699786838240164e-17; + + // In order to maintain relative precision transform using the identity: + // tan(pi/4-x) = (1-tan(x))/(1+tan(x)) for arguments close to pi/4. + // Similarly use tan(x-pi/4) = (tan(x)-1)/(tan(x)+1) close to -pi/4. + + __CLC_LONGN ca = x > 0.68; + __CLC_LONGN cb = x < -0.68; + __CLC_DOUBLEN transform = ca ? 1.0 : 0.0; + transform = cb ? -1.0 : transform; + + __CLC_DOUBLEN tx = __clc_fma(-transform, x, piby4_lead) + + __clc_fma(-transform, xx, piby4_tail); + __CLC_LONGN c = ca | cb; + x = c ? tx : x; + xx = c ? 0.0 : xx; + + // Core Remez [2,3] approximation to tan(x+xx) on the interval [0,0.68]. + __CLC_DOUBLEN t1 = x; + __CLC_DOUBLEN r = __clc_fma(2.0, x * xx, x * x); + + __CLC_DOUBLEN a = __clc_fma(r, + __clc_fma(r, 0.224044448537022097264602535574e-3, + -0.229345080057565662883358588111e-1), + 0.372379159759792203640806338901e0); + + __CLC_DOUBLEN b = + __clc_fma(r, + __clc_fma(r, + __clc_fma(r, -0.232371494088563558304549252913e-3, + 0.260656620398645407524064091208e-1), + -0.515658515729031149329237816945e0), + 0.111713747927937668539901657944e1); + + __CLC_DOUBLEN t2 = __clc_fma(MATH_DIVIDE(a, b), x * r, xx); + + __CLC_DOUBLEN tp = t1 + t2; + + // Compute -1.0/(t1 + t2) accurately + __CLC_DOUBLEN z1 = + __CLC_AS_GENTYPE(__CLC_AS_ULONGN(tp) & 0xffffffff00000000L); + __CLC_DOUBLEN z2 = t2 - (z1 - t1); + __CLC_DOUBLEN trec = -MATH_RECIP(tp); + __CLC_DOUBLEN trec_top = + __CLC_AS_GENTYPE(__CLC_AS_ULONGN(trec) & 0xffffffff00000000L); + + __CLC_DOUBLEN tpr = __clc_fma( + __clc_fma(trec_top, z2, __clc_fma(trec_top, z1, 1.0)), trec, trec_top); + + __CLC_DOUBLEN tpt = transform * (1.0 - MATH_DIVIDE(2.0 * tp, 1.0 + tp)); + __CLC_DOUBLEN tptr = transform * (MATH_DIVIDE(2.0 * tp, tp - 1.0) - 1.0); + + *leadval = c ? tpt : tp; + *tailval = c ? tptr : tpr; +} + // Reduction for medium sized arguments _CLC_DEF _CLC_OVERLOAD void __clc_remainder_piby2_medium(__CLC_DOUBLEN x, private __CLC_DOUBLEN *r, diff --git a/libclc/clc/lib/generic/math/clc_sinpi.cl b/libclc/clc/lib/generic/math/clc_sinpi.cl index 6cff247707845..bb5de09f03c08 100644 --- a/libclc/clc/lib/generic/math/clc_sinpi.cl +++ b/libclc/clc/lib/generic/math/clc_sinpi.cl @@ -11,7 +11,6 @@ #include <clc/internal/clc.h> #include <clc/math/clc_fabs.h> #include <clc/math/clc_sincos_helpers.h> -#include <clc/math/clc_sincos_piby4.h> #include <clc/math/math.h> #define __CLC_BODY <clc_sinpi.inc> diff --git a/libclc/clc/lib/generic/math/clc_tan.cl b/libclc/clc/lib/generic/math/clc_tan.cl index adf42c43d0484..7e68216ca43aa 100644 --- a/libclc/clc/lib/generic/math/clc_tan.cl +++ b/libclc/clc/lib/generic/math/clc_tan.cl @@ -11,7 +11,6 @@ #include <clc/internal/clc.h> #include <clc/math/clc_fabs.h> #include <clc/math/clc_sincos_helpers.h> -#include <clc/math/clc_sincos_piby4.h> #include <clc/math/math.h> #include <clc/math/tables.h> #include <clc/relational/clc_isinf.h> diff --git a/libclc/clc/lib/generic/math/clc_tanpi.cl b/libclc/clc/lib/generic/math/clc_tanpi.cl index f1265892d107b..099457c186314 100644 --- a/libclc/clc/lib/generic/math/clc_tanpi.cl +++ b/libclc/clc/lib/generic/math/clc_tanpi.cl @@ -12,7 +12,6 @@ #include <clc/math/clc_fabs.h> #include <clc/math/clc_native_recip.h> #include <clc/math/clc_sincos_helpers.h> -#include <clc/math/clc_sincos_piby4.h> #include <clc/math/math.h> #define __CLC_BODY <clc_tanpi.inc> _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
