Author: Fraser Cormack Date: 2025-03-27T10:59:09Z New Revision: 3284559cca4bc64e78e8243bb34195216e8979ee
URL: https://github.com/llvm/llvm-project/commit/3284559cca4bc64e78e8243bb34195216e8979ee DIFF: https://github.com/llvm/llvm-project/commit/3284559cca4bc64e78e8243bb34195216e8979ee.diff LOG: [libclc] Move atan2/atan2pi to the CLC library (#133226) As with other work in this area, these builtins are now vectorized. A further table has been split into two. There was discrepancy between comments above the table describing the values as "lead" and "tail" and variables taken from the table called "head" and "tail", so these have been unified as head/tail. Added: libclc/clc/include/clc/math/clc_atan2.h libclc/clc/include/clc/math/clc_atan2pi.h libclc/clc/lib/generic/math/clc_atan2.cl libclc/clc/lib/generic/math/clc_atan2.inc libclc/clc/lib/generic/math/clc_atan2pi.cl libclc/clc/lib/generic/math/clc_atan2pi.inc Modified: libclc/clc/include/clc/math/tables.h libclc/clc/lib/generic/SOURCES libclc/clc/lib/generic/math/clc_tables.cl libclc/generic/lib/math/atan2.cl libclc/generic/lib/math/atan2pi.cl libclc/generic/lib/math/tables.cl Removed: ################################################################################ diff --git a/libclc/clc/include/clc/math/clc_atan2.h b/libclc/clc/include/clc/math/clc_atan2.h new file mode 100644 index 0000000000000..85b99d0279ee7 --- /dev/null +++ b/libclc/clc/include/clc/math/clc_atan2.h @@ -0,0 +1,20 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef __CLC_MATH_CLC_ATAN2_H__ +#define __CLC_MATH_CLC_ATAN2_H__ + +#define __CLC_BODY <clc/shared/binary_decl.inc> +#define __CLC_FUNCTION __clc_atan2 + +#include <clc/math/gentype.inc> + +#undef __CLC_BODY +#undef __CLC_FUNCTION + +#endif // __CLC_MATH_CLC_ATAN2_H__ diff --git a/libclc/clc/include/clc/math/clc_atan2pi.h b/libclc/clc/include/clc/math/clc_atan2pi.h new file mode 100644 index 0000000000000..af41165f7dcf2 --- /dev/null +++ b/libclc/clc/include/clc/math/clc_atan2pi.h @@ -0,0 +1,20 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef __CLC_MATH_CLC_ATAN2PI_H__ +#define __CLC_MATH_CLC_ATAN2PI_H__ + +#define __CLC_BODY <clc/shared/binary_decl.inc> +#define __CLC_FUNCTION __clc_atan2pi + +#include <clc/math/gentype.inc> + +#undef __CLC_BODY +#undef __CLC_FUNCTION + +#endif // __CLC_MATH_CLC_ATAN2PI_H__ diff --git a/libclc/clc/include/clc/math/tables.h b/libclc/clc/include/clc/math/tables.h index b801ba65bb945..6a0cd80f9c8cb 100644 --- a/libclc/clc/include/clc/math/tables.h +++ b/libclc/clc/include/clc/math/tables.h @@ -79,7 +79,8 @@ CLC_TABLE_FUNCTION_DECL(float, log_inv_tbl); TABLE_FUNCTION_DECL(double2, ln_tbl); CLC_TABLE_FUNCTION_DECL(double, ln_tbl_lo); CLC_TABLE_FUNCTION_DECL(double, ln_tbl_hi); -TABLE_FUNCTION_DECL(double2, atan_jby256_tbl); +CLC_TABLE_FUNCTION_DECL(double, atan_jby256_tbl_head); +CLC_TABLE_FUNCTION_DECL(double, atan_jby256_tbl_tail); TABLE_FUNCTION_DECL(double2, two_to_jby64_ep_tbl); TABLE_FUNCTION_DECL(double2, sinh_tbl); TABLE_FUNCTION_DECL(double2, cosh_tbl); diff --git a/libclc/clc/lib/generic/SOURCES b/libclc/clc/lib/generic/SOURCES index dd94d97303944..d851065bb2e23 100644 --- a/libclc/clc/lib/generic/SOURCES +++ b/libclc/clc/lib/generic/SOURCES @@ -24,6 +24,8 @@ math/clc_asin.cl math/clc_asinh.cl math/clc_asinpi.cl math/clc_atan.cl +math/clc_atan2.cl +math/clc_atan2pi.cl math/clc_atanh.cl math/clc_atanpi.cl math/clc_ceil.cl diff --git a/libclc/clc/lib/generic/math/clc_atan2.cl b/libclc/clc/lib/generic/math/clc_atan2.cl new file mode 100644 index 0000000000000..b10bf32333a32 --- /dev/null +++ b/libclc/clc/lib/generic/math/clc_atan2.cl @@ -0,0 +1,26 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include <clc/clc_convert.h> +#include <clc/float/definitions.h> +#include <clc/internal/clc.h> +#include <clc/math/clc_copysign.h> +#include <clc/math/clc_fabs.h> +#include <clc/math/clc_fma.h> +#include <clc/math/clc_ldexp.h> +#include <clc/math/clc_mad.h> +#include <clc/math/math.h> +#include <clc/math/tables.h> +#include <clc/relational/clc_isinf.h> +#include <clc/relational/clc_isnan.h> +#include <clc/relational/clc_select.h> +#include <clc/shared/clc_max.h> +#include <clc/shared/clc_min.h> + +#define __CLC_BODY <clc_atan2.inc> +#include <clc/math/gentype.inc> diff --git a/libclc/clc/lib/generic/math/clc_atan2.inc b/libclc/clc/lib/generic/math/clc_atan2.inc new file mode 100644 index 0000000000000..61ffeebbc5d11 --- /dev/null +++ b/libclc/clc/lib/generic/math/clc_atan2.inc @@ -0,0 +1,248 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#if __CLC_FPSIZE == 32 + +_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_atan2(__CLC_GENTYPE y, + __CLC_GENTYPE x) { + const __CLC_GENTYPE pi = 0x1.921fb6p+1f; + const __CLC_GENTYPE piby2 = 0x1.921fb6p+0f; + const __CLC_GENTYPE piby4 = 0x1.921fb6p-1f; + const __CLC_GENTYPE threepiby4 = 0x1.2d97c8p+1f; + + __CLC_GENTYPE ax = __clc_fabs(x); + __CLC_GENTYPE ay = __clc_fabs(y); + __CLC_GENTYPE v = __clc_min(ax, ay); + __CLC_GENTYPE u = __clc_max(ax, ay); + + // Scale since u could be large, as in "regular" divide + __CLC_GENTYPE s = u > 0x1.0p+96f ? 0x1.0p-32f : 1.0f; + __CLC_GENTYPE vbyu = s * MATH_DIVIDE(v, s * u); + + __CLC_GENTYPE vbyu2 = vbyu * vbyu; + +#define USE_2_2_APPROXIMATION +#if defined USE_2_2_APPROXIMATION + __CLC_GENTYPE p = + __clc_mad(vbyu2, __clc_mad(vbyu2, -0x1.7e1f78p-9f, -0x1.7d1b98p-3f), + -0x1.5554d0p-2f) * + vbyu2 * vbyu; + __CLC_GENTYPE q = + __clc_mad(vbyu2, __clc_mad(vbyu2, 0x1.1a714cp-2f, 0x1.287c56p+0f), 1.0f); +#else + __CLC_GENTYPE p = + __clc_mad(vbyu2, __clc_mad(vbyu2, -0x1.55cd22p-5f, -0x1.26cf76p-2f), + -0x1.55554ep-2f) * + vbyu2 * vbyu; + __CLC_GENTYPE q = __clc_mad( + vbyu2, + __clc_mad(vbyu2, __clc_mad(vbyu2, 0x1.9f1304p-5f, 0x1.2656fap-1f), + 0x1.76b4b8p+0f), + 1.0f); +#endif + + // Octant 0 result + __CLC_GENTYPE a = __clc_mad(p, MATH_RECIP(q), vbyu); + + // Fix up 3 other octants + __CLC_GENTYPE at = piby2 - a; + a = ay > ax ? at : a; + at = pi - a; + a = x < 0.0F ? at : a; + + // y == 0 => 0 for x >= 0, pi for x < 0 + at = __CLC_AS_INTN(x) < 0 ? pi : 0.0f; + a = y == 0.0f ? at : a; + + // x and y are +- Inf + at = x > 0.0f ? piby4 : threepiby4; + a = __clc_select(a, at, __clc_isinf(x) && __clc_isinf(y)); + + // x or y is NaN + a = __clc_select(a, __CLC_GENTYPE_NAN, __clc_isnan(x) || __clc_isnan(y)); + + // Fixup sign and return + return __clc_copysign(a, y); +} + +#elif __CLC_FPSIZE == 64 + +_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_atan2(__CLC_GENTYPE y, + __CLC_GENTYPE x) { + const __CLC_GENTYPE pi = 3.1415926535897932e+00; /* 0x400921fb54442d18 */ + const __CLC_GENTYPE piby2 = 1.5707963267948966e+00; /* 0x3ff921fb54442d18 */ + const __CLC_GENTYPE piby4 = 7.8539816339744831e-01; /* 0x3fe921fb54442d18 */ + // 0x4002d97c7f3321d2 + const __CLC_GENTYPE three_piby4 = 2.3561944901923449e+00; + const __CLC_GENTYPE pi_head = 3.1415926218032836e+00; /* 0x400921fb50000000 */ + const __CLC_GENTYPE pi_tail = 3.1786509547056392e-08; /* 0x3e6110b4611a6263 */ + // 0x3ff921fb54442d18 + const __CLC_GENTYPE piby2_head = 1.5707963267948965e+00; + // 0x3c91a62633145c07 + const __CLC_GENTYPE piby2_tail = 6.1232339957367660e-17; + + __CLC_GENTYPE x2 = x; + // Important to capture -0.0 in xneg and yneg, so comparison done as integer + __CLC_LONGN xneg = __CLC_AS_LONGN(x) < 0; + __CLC_INTN xexp = + __CLC_CONVERT_INTN(__CLC_AS_ULONGN(x) >> EXPSHIFTBITS_DP64) & 0x7ff; + + __CLC_GENTYPE y2 = y; + __CLC_LONGN yneg = __CLC_AS_LONGN(y) < 0; + __CLC_INTN yexp = + __CLC_CONVERT_INTN(__CLC_AS_ULONGN(y) >> EXPSHIFTBITS_DP64) & 0x7ff; + + __CLC_LONGN cond2 = __CLC_CONVERT_LONGN(xexp < 1021 && yexp < 1021); + __CLC_LONGN diff exp = __CLC_CONVERT_LONGN(yexp - xexp); + + // Scale up both x and y if they are both below 1/4 + __CLC_GENTYPE x1 = __clc_ldexp(x, 1024); + __CLC_INTN xexp1 = + __CLC_CONVERT_INTN(__CLC_AS_ULONGN(x1) >> EXPSHIFTBITS_DP64) & 0x7ff; + __CLC_GENTYPE y1 = __clc_ldexp(y, 1024); + __CLC_INTN yexp1 = + __CLC_CONVERT_INTN(__CLC_AS_ULONGN(y1) >> EXPSHIFTBITS_DP64) & 0x7ff; + __CLC_LONGN diff exp1 = __CLC_CONVERT_LONGN(yexp1 - xexp1); + + diff exp = __clc_select( diff exp, diff exp1, cond2); + x = cond2 ? x1 : x; + y = cond2 ? y1 : y; + + // General case: take absolute values of arguments + __CLC_GENTYPE u = __clc_fabs(x); + __CLC_GENTYPE v = __clc_fabs(y); + + // Swap u and v if necessary to obtain 0 < v < u. Compute v/u. + __CLC_LONGN swap_vu = u < v; + __CLC_GENTYPE uu = u; + u = swap_vu ? v : u; + v = swap_vu ? uu : v; + + __CLC_GENTYPE vbyu = v / u; + __CLC_GENTYPE q1, q2; + + // General values of v/u. Use a look-up table and series expansion. + + { + __CLC_GENTYPE val = vbyu > 0.0625 ? vbyu : 0.063; + __CLC_INTN index = __CLC_CONVERT_INTN(__clc_fma(256.0, val, 0.5)); + q1 = USE_TABLE(atan_jby256_tbl_head, index - 16); + q2 = USE_TABLE(atan_jby256_tbl_tail, index - 16); + __CLC_GENTYPE c = __CLC_CONVERT_GENTYPE(index) * 0x1.0p-8; + + // We're going to scale u and v by 2^(-u_exponent) to bring them close to 1 + // u_exponent could be EMAX so we have to do it in 2 steps + __CLC_INTN m = + -(__CLC_CONVERT_INTN(__CLC_AS_ULONGN(u) >> EXPSHIFTBITS_DP64) - + EXPBIAS_DP64); + __CLC_GENTYPE um = __clc_ldexp(u, m); + __CLC_GENTYPE vm = __clc_ldexp(v, m); + + // 26 leading bits of u + __CLC_GENTYPE u1 = + __CLC_AS_GENTYPE(__CLC_AS_ULONGN(um) & 0xfffffffff8000000UL); + __CLC_GENTYPE u2 = um - u1; + + __CLC_GENTYPE r = MATH_DIVIDE(__clc_fma(-c, u2, __clc_fma(-c, u1, vm)), + __clc_fma(c, vm, um)); + + // Polynomial approximation to atan(r) + __CLC_GENTYPE s = r * r; + q2 = q2 + __clc_fma((s * __clc_fma(-s, 0.19999918038989143496, + 0.33333333333224095522)), + -r, r); + } + + __CLC_GENTYPE q3, q4; + { + q3 = 0.0; + q4 = vbyu; + } + + __CLC_GENTYPE q5, q6; + { + __CLC_GENTYPE u1 = + __CLC_AS_GENTYPE(__CLC_AS_ULONGN(u) & 0xffffffff00000000UL); + __CLC_GENTYPE u2 = u - u1; + __CLC_GENTYPE vu1 = + __CLC_AS_GENTYPE(__CLC_AS_ULONGN(vbyu) & 0xffffffff00000000UL); + __CLC_GENTYPE vu2 = vbyu - vu1; + + q5 = 0.0; + __CLC_GENTYPE s = vbyu * vbyu; + q6 = vbyu + + __clc_fma( + -vbyu * s, + __clc_fma( + -s, + __clc_fma(-s, + __clc_fma(-s, + __clc_fma(-s, 0.90029810285449784439E-01, + 0.11110736283514525407), + 0.14285713561807169030), + 0.19999999999393223405), + 0.33333333333333170500), + MATH_DIVIDE(__clc_fma(-u, vu2, + __clc_fma(-u2, vu1, __clc_fma(-u1, vu1, v))), + u)); + } + + q3 = vbyu < 0x1.d12ed0af1a27fp-27 ? q3 : q5; + q4 = vbyu < 0x1.d12ed0af1a27fp-27 ? q4 : q6; + + q1 = vbyu > 0.0625 ? q1 : q3; + q2 = vbyu > 0.0625 ? q2 : q4; + + // Tidy-up according to which quadrant the arguments lie in + __CLC_GENTYPE res1, res2, res3, res4; + q1 = swap_vu ? piby2_head - q1 : q1; + q2 = swap_vu ? piby2_tail - q2 : q2; + q1 = xneg ? pi_head - q1 : q1; + q2 = xneg ? pi_tail - q2 : q2; + q1 = q1 + q2; + res4 = yneg ? -q1 : q1; + + res1 = yneg ? -three_piby4 : three_piby4; + res2 = yneg ? -piby4 : piby4; + res3 = xneg ? res1 : res2; + + res3 = __clc_select(res4, res3, + __CLC_CONVERT_LONGN(__clc_isinf(x2) && __clc_isinf(y2))); + res1 = yneg ? -pi : pi; + + // abs(x)/abs(y) > 2^56 and x < 0 + res3 = ( diff exp < -56 && xneg) ? res1 : res3; + + res4 = MATH_DIVIDE(y, x); + // x positive and dominant over y by a factor of 2^28 + res3 = diff exp < -28 && xneg == 0 ? res4 : res3; + + // abs(y)/abs(x) > 2^56 + res4 = yneg ? -piby2 : piby2; // atan(y/x) is insignificant compared to piby2 + res3 = diff exp > 56 ? res4 : res3; + + res3 = x2 == 0.0 ? res4 : res3; // Zero x gives +- pi/2 depending on sign of y + res4 = xneg ? res1 : y2; + + // Zero y gives +-0 for positive x and +-pi for negative x + res3 = y2 == 0.0 ? res4 : res3; + res3 = __clc_isnan(y2) ? y2 : res3; + res3 = __clc_isnan(x2) ? x2 : res3; + + return res3; +} + +#elif __CLC_FPSIZE == 16 + +_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_atan2(__CLC_GENTYPE x, + __CLC_GENTYPE y) { + return __CLC_CONVERT_GENTYPE( + __clc_atan2(__CLC_CONVERT_FLOATN(x), __CLC_CONVERT_FLOATN(y))); +} + +#endif diff --git a/libclc/clc/lib/generic/math/clc_atan2pi.cl b/libclc/clc/lib/generic/math/clc_atan2pi.cl new file mode 100644 index 0000000000000..7f75c11d15d7b --- /dev/null +++ b/libclc/clc/lib/generic/math/clc_atan2pi.cl @@ -0,0 +1,26 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include <clc/clc_convert.h> +#include <clc/float/definitions.h> +#include <clc/internal/clc.h> +#include <clc/math/clc_copysign.h> +#include <clc/math/clc_fabs.h> +#include <clc/math/clc_fma.h> +#include <clc/math/clc_ldexp.h> +#include <clc/math/clc_mad.h> +#include <clc/math/math.h> +#include <clc/math/tables.h> +#include <clc/relational/clc_isinf.h> +#include <clc/relational/clc_isnan.h> +#include <clc/relational/clc_select.h> +#include <clc/shared/clc_max.h> +#include <clc/shared/clc_min.h> + +#define __CLC_BODY <clc_atan2pi.inc> +#include <clc/math/gentype.inc> diff --git a/libclc/clc/lib/generic/math/clc_atan2pi.inc b/libclc/clc/lib/generic/math/clc_atan2pi.inc new file mode 100644 index 0000000000000..79b2551e077cc --- /dev/null +++ b/libclc/clc/lib/generic/math/clc_atan2pi.inc @@ -0,0 +1,227 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#if __CLC_FPSIZE == 32 + +_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_atan2pi(__CLC_GENTYPE y, + __CLC_GENTYPE x) { + const __CLC_GENTYPE pi = 0x1.921fb6p+1f; + + __CLC_GENTYPE ax = __clc_fabs(x); + __CLC_GENTYPE ay = __clc_fabs(y); + __CLC_GENTYPE v = __clc_min(ax, ay); + __CLC_GENTYPE u = __clc_max(ax, ay); + + // Scale since u could be large, as in "regular" divide + __CLC_GENTYPE s = u > 0x1.0p+96f ? 0x1.0p-32f : 1.0f; + __CLC_GENTYPE vbyu = s * MATH_DIVIDE(v, s * u); + + __CLC_GENTYPE vbyu2 = vbyu * vbyu; + + __CLC_GENTYPE p = + __clc_mad(vbyu2, __clc_mad(vbyu2, -0x1.7e1f78p-9f, -0x1.7d1b98p-3f), + -0x1.5554d0p-2f) * + vbyu2 * vbyu; + __CLC_GENTYPE q = + __clc_mad(vbyu2, __clc_mad(vbyu2, 0x1.1a714cp-2f, 0x1.287c56p+0f), 1.0f); + + // Octant 0 result + __CLC_GENTYPE a = MATH_DIVIDE(__clc_mad(p, MATH_RECIP(q), vbyu), pi); + + // Fix up 3 other octants + __CLC_GENTYPE at = 0.5f - a; + a = ay > ax ? at : a; + at = 1.0f - a; + a = x < 0.0F ? at : a; + + // y == 0 => 0 for x >= 0, pi for x < 0 + at = __CLC_AS_INTN(x) < 0 ? 1.0f : 0.0f; + a = y == 0.0f ? at : a; + + // x and y are +- Inf + at = x > 0.0f ? 0.25f : 0.75f; + a = __clc_select(a, at, __clc_isinf(x) && __clc_isinf(y)); + + // x or y is NaN + a = __clc_select(a, __CLC_GENTYPE_NAN, __clc_isnan(x) || __clc_isnan(y)); + + // Fixup sign and return + return __clc_copysign(a, y); +} + +#elif __CLC_FPSIZE == 64 + +_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_atan2pi(__CLC_GENTYPE y, + __CLC_GENTYPE x) { + const __CLC_GENTYPE pi = 3.1415926535897932e+00; /* 0x400921fb54442d18 */ + const __CLC_GENTYPE pi_head = 3.1415926218032836e+00; /* 0x400921fb50000000 */ + const __CLC_GENTYPE pi_tail = 3.1786509547056392e-08; /* 0x3e6110b4611a6263 */ + // 0x3ff921fb54442d18 + const __CLC_GENTYPE piby2_head = 1.5707963267948965e+00; + // 0x3c91a62633145c07 + const __CLC_GENTYPE piby2_tail = 6.1232339957367660e-17; + + __CLC_GENTYPE x2 = x; + __CLC_LONGN xneg = __CLC_AS_LONGN(x) < 0; + __CLC_INTN xexp = + __CLC_CONVERT_INTN(__CLC_AS_ULONGN(x) >> EXPSHIFTBITS_DP64) & 0x7ff; + + __CLC_GENTYPE y2 = y; + __CLC_LONGN yneg = __CLC_AS_LONGN(y) < 0; + __CLC_INTN yexp = + __CLC_CONVERT_INTN(__CLC_AS_ULONGN(y) >> EXPSHIFTBITS_DP64) & 0x7ff; + + __CLC_LONGN cond2 = __CLC_CONVERT_LONGN(xexp < 1021 & yexp < 1021); + __CLC_LONGN diff exp = __CLC_CONVERT_LONGN(yexp - xexp); + + // Scale up both x and y if they are both below 1/4 + __CLC_GENTYPE x1 = __clc_ldexp(x, 1024); + __CLC_INTN xexp1 = + __CLC_CONVERT_INTN(__CLC_AS_ULONGN(x1) >> EXPSHIFTBITS_DP64) & 0x7ff; + __CLC_GENTYPE y1 = __clc_ldexp(y, 1024); + __CLC_INTN yexp1 = + __CLC_CONVERT_INTN(__CLC_AS_ULONGN(y1) >> EXPSHIFTBITS_DP64) & 0x7ff; + __CLC_LONGN diff exp1 = __CLC_CONVERT_LONGN(yexp1 - xexp1); + + diff exp = __clc_select( diff exp, diff exp1, cond2); + x = cond2 ? x1 : x; + y = cond2 ? y1 : y; + + // General case: take absolute values of arguments + __CLC_GENTYPE u = __clc_fabs(x); + __CLC_GENTYPE v = __clc_fabs(y); + + // Swap u and v if necessary to obtain 0 < v < u. Compute v/u. + __CLC_LONGN swap_vu = u < v; + __CLC_GENTYPE uu = u; + u = swap_vu ? v : u; + v = swap_vu ? uu : v; + + __CLC_GENTYPE vbyu = v / u; + __CLC_GENTYPE q1, q2; + + // General values of v/u. Use a look-up table and series expansion. + + { + __CLC_GENTYPE val = vbyu > 0.0625 ? vbyu : 0.063; + __CLC_INTN index = __CLC_CONVERT_INTN(__clc_fma(256.0, val, 0.5)); + q1 = USE_TABLE(atan_jby256_tbl_head, (index - 16)); + q2 = USE_TABLE(atan_jby256_tbl_tail, (index - 16)); + __CLC_GENTYPE c = __CLC_CONVERT_GENTYPE(index) * 0x1.0p-8; + + // We're going to scale u and v by 2^(-u_exponent) to bring them close to 1 + // u_exponent could be EMAX so we have to do it in 2 steps + __CLC_INTN m = + -(__CLC_CONVERT_INTN(__CLC_AS_ULONGN(u) >> EXPSHIFTBITS_DP64) - + EXPBIAS_DP64); + __CLC_GENTYPE um = __clc_ldexp(u, m); + __CLC_GENTYPE vm = __clc_ldexp(v, m); + + // 26 leading bits of u + __CLC_GENTYPE u1 = + __CLC_AS_GENTYPE(__CLC_AS_ULONGN(um) & 0xfffffffff8000000UL); + __CLC_GENTYPE u2 = um - u1; + + __CLC_GENTYPE r = MATH_DIVIDE(__clc_fma(-c, u2, __clc_fma(-c, u1, vm)), + __clc_fma(c, vm, um)); + + // Polynomial approximation to atan(r) + __CLC_GENTYPE s = r * r; + q2 = q2 + __clc_fma((s * __clc_fma(-s, 0.19999918038989143496, + 0.33333333333224095522)), + -r, r); + } + + __CLC_GENTYPE q3, q4; + { + q3 = 0.0; + q4 = vbyu; + } + + __CLC_GENTYPE q5, q6; + { + __CLC_GENTYPE u1 = + __CLC_AS_GENTYPE(__CLC_AS_ULONGN(u) & 0xffffffff00000000UL); + __CLC_GENTYPE u2 = u - u1; + __CLC_GENTYPE vu1 = + __CLC_AS_GENTYPE(__CLC_AS_ULONGN(vbyu) & 0xffffffff00000000UL); + __CLC_GENTYPE vu2 = vbyu - vu1; + + q5 = 0.0; + __CLC_GENTYPE s = vbyu * vbyu; + q6 = vbyu + + __clc_fma( + -vbyu * s, + __clc_fma( + -s, + __clc_fma(-s, + __clc_fma(-s, + __clc_fma(-s, 0.90029810285449784439E-01, + 0.11110736283514525407), + 0.14285713561807169030), + 0.19999999999393223405), + 0.33333333333333170500), + MATH_DIVIDE(__clc_fma(-u, vu2, + __clc_fma(-u2, vu1, __clc_fma(-u1, vu1, v))), + u)); + } + + q3 = vbyu < 0x1.d12ed0af1a27fp-27 ? q3 : q5; + q4 = vbyu < 0x1.d12ed0af1a27fp-27 ? q4 : q6; + + q1 = vbyu > 0.0625 ? q1 : q3; + q2 = vbyu > 0.0625 ? q2 : q4; + + // Tidy-up according to which quadrant the arguments lie in + __CLC_GENTYPE res1, res2, res3, res4; + q1 = swap_vu ? piby2_head - q1 : q1; + q2 = swap_vu ? piby2_tail - q2 : q2; + q1 = xneg ? pi_head - q1 : q1; + q2 = xneg ? pi_tail - q2 : q2; + q1 = MATH_DIVIDE(q1 + q2, pi); + res4 = yneg ? -q1 : q1; + + res1 = yneg ? -0.75 : 0.75; + res2 = yneg ? -0.25 : 0.25; + res3 = xneg ? res1 : res2; + + res3 = __clc_select(res4, res3, + __CLC_CONVERT_LONGN(__clc_isinf(y2) & __clc_isinf(x2))); + res1 = yneg ? -1.0 : 1.0; + + // abs(x)/abs(y) > 2^56 and x < 0 + res3 = diff exp < -56 && xneg ? res1 : res3; + + res4 = MATH_DIVIDE(MATH_DIVIDE(y, x), pi); + // x positive and dominant over y by a factor of 2^28 + res3 = diff exp < -28 && xneg == 0 ? res4 : res3; + + // abs(y)/abs(x) > 2^56 + res4 = yneg ? -0.5 : 0.5; // atan(y/x) is insignificant compared to piby2 + res3 = diff exp > 56 ? res4 : res3; + + res3 = x2 == 0.0 ? res4 : res3; // Zero x gives +- pi/2 depending on sign of y + res4 = xneg ? res1 : y2; + + // Zero y gives +-0 for positive x and +-pi for negative x + res3 = y2 == 0.0 ? res4 : res3; + res3 = __clc_isnan(y2) ? y2 : res3; + res3 = __clc_isnan(x2) ? x2 : res3; + + return res3; +} + +#elif __CLC_FPSIZE == 16 + +_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_atan2pi(__CLC_GENTYPE x, + __CLC_GENTYPE y) { + return __CLC_CONVERT_GENTYPE( + __clc_atan2pi(__CLC_CONVERT_FLOATN(x), __CLC_CONVERT_FLOATN(y))); +} + +#endif diff --git a/libclc/clc/lib/generic/math/clc_tables.cl b/libclc/clc/lib/generic/math/clc_tables.cl index 7ad005f91b575..2d6d280f7ea06 100644 --- a/libclc/clc/lib/generic/math/clc_tables.cl +++ b/libclc/clc/lib/generic/math/clc_tables.cl @@ -180,4 +180,181 @@ DECLARE_TABLE(double, LN_TBL_HI, 65) = { CLC_TABLE_FUNCTION(double, LN_TBL_HI, ln_tbl_hi); +// Arrays atan_jby256_head and atan_jby256_tail contain leading and trailing +// parts respectively of precomputed values of atan(j/256), for j = 16, 17, ..., +// 256. atan_jby256_head contains the first 21 bits of precision, and +// atan_jby256_tail contains a further 53 bits precision. + +DECLARE_TABLE(double, ATAN_JBY256_TBL_HEAD, 241) = { + 0x1.ff55b00000000p-5, 0x1.0f99e00000000p-4, 0x1.1f86d00000000p-4, + 0x1.2f71900000000p-4, 0x1.3f59f00000000p-4, 0x1.4f3fd00000000p-4, + 0x1.5f23200000000p-4, 0x1.6f03b00000000p-4, 0x1.7ee1800000000p-4, + 0x1.8ebc500000000p-4, 0x1.9e94100000000p-4, 0x1.ae68a00000000p-4, + 0x1.be39e00000000p-4, 0x1.ce07c00000000p-4, 0x1.ddd2100000000p-4, + 0x1.ed98c00000000p-4, 0x1.fd5ba00000000p-4, 0x1.068d500000000p-3, + 0x1.0e6ad00000000p-3, 0x1.1646500000000p-3, 0x1.1e1fa00000000p-3, + 0x1.25f6e00000000p-3, 0x1.2dcbd00000000p-3, 0x1.359e800000000p-3, + 0x1.3d6ee00000000p-3, 0x1.453ce00000000p-3, 0x1.4d08700000000p-3, + 0x1.54d1800000000p-3, 0x1.5c98100000000p-3, 0x1.645bf00000000p-3, + 0x1.6c1d400000000p-3, 0x1.73dbd00000000p-3, 0x1.7b97b00000000p-3, + 0x1.8350b00000000p-3, 0x1.8b06e00000000p-3, 0x1.92ba300000000p-3, + 0x1.9a6a800000000p-3, 0x1.a217e00000000p-3, 0x1.a9c2300000000p-3, + 0x1.b169600000000p-3, 0x1.b90d700000000p-3, 0x1.c0ae500000000p-3, + 0x1.c84bf00000000p-3, 0x1.cfe6500000000p-3, 0x1.d77d500000000p-3, + 0x1.df11000000000p-3, 0x1.e6a1400000000p-3, 0x1.ee2e100000000p-3, + 0x1.f5b7500000000p-3, 0x1.fd3d100000000p-3, 0x1.025fa00000000p-2, + 0x1.061ee00000000p-2, 0x1.09dc500000000p-2, 0x1.0d97e00000000p-2, + 0x1.1151a00000000p-2, 0x1.1509700000000p-2, 0x1.18bf500000000p-2, + 0x1.1c73500000000p-2, 0x1.2025500000000p-2, 0x1.23d5600000000p-2, + 0x1.2783700000000p-2, 0x1.2b2f700000000p-2, 0x1.2ed9800000000p-2, + 0x1.3281800000000p-2, 0x1.3627700000000p-2, 0x1.39cb400000000p-2, + 0x1.3d6d100000000p-2, 0x1.410cb00000000p-2, 0x1.44aa400000000p-2, + 0x1.4845a00000000p-2, 0x1.4bdee00000000p-2, 0x1.4f75f00000000p-2, + 0x1.530ad00000000p-2, 0x1.569d800000000p-2, 0x1.5a2e000000000p-2, + 0x1.5dbc300000000p-2, 0x1.6148400000000p-2, 0x1.64d1f00000000p-2, + 0x1.6859700000000p-2, 0x1.6bdea00000000p-2, 0x1.6f61900000000p-2, + 0x1.72e2200000000p-2, 0x1.7660700000000p-2, 0x1.79dc600000000p-2, + 0x1.7d56000000000p-2, 0x1.80cd400000000p-2, 0x1.8442200000000p-2, + 0x1.87b4b00000000p-2, 0x1.8b24d00000000p-2, 0x1.8e92900000000p-2, + 0x1.91fde00000000p-2, 0x1.9566d00000000p-2, 0x1.98cd500000000p-2, + 0x1.9c31600000000p-2, 0x1.9f93000000000p-2, 0x1.a2f2300000000p-2, + 0x1.a64ee00000000p-2, 0x1.a9a9200000000p-2, 0x1.ad00f00000000p-2, + 0x1.b056400000000p-2, 0x1.b3a9100000000p-2, 0x1.b6f9600000000p-2, + 0x1.ba47300000000p-2, 0x1.bd92800000000p-2, 0x1.c0db400000000p-2, + 0x1.c421900000000p-2, 0x1.c765500000000p-2, 0x1.caa6800000000p-2, + 0x1.cde5300000000p-2, 0x1.d121500000000p-2, 0x1.d45ae00000000p-2, + 0x1.d791f00000000p-2, 0x1.dac6700000000p-2, 0x1.ddf8500000000p-2, + 0x1.e127b00000000p-2, 0x1.e454800000000p-2, 0x1.e77eb00000000p-2, + 0x1.eaa6500000000p-2, 0x1.edcb600000000p-2, 0x1.f0ede00000000p-2, + 0x1.f40dd00000000p-2, 0x1.f72b200000000p-2, 0x1.fa45d00000000p-2, + 0x1.fd5e000000000p-2, 0x1.0039c00000000p-1, 0x1.01c3400000000p-1, + 0x1.034b700000000p-1, 0x1.04d2500000000p-1, 0x1.0657e00000000p-1, + 0x1.07dc300000000p-1, 0x1.095f300000000p-1, 0x1.0ae0e00000000p-1, + 0x1.0c61400000000p-1, 0x1.0de0500000000p-1, 0x1.0f5e200000000p-1, + 0x1.10daa00000000p-1, 0x1.1255d00000000p-1, 0x1.13cfb00000000p-1, + 0x1.1548500000000p-1, 0x1.16bfa00000000p-1, 0x1.1835a00000000p-1, + 0x1.19aa500000000p-1, 0x1.1b1dc00000000p-1, 0x1.1c8fe00000000p-1, + 0x1.1e00b00000000p-1, 0x1.1f70400000000p-1, 0x1.20de800000000p-1, + 0x1.224b700000000p-1, 0x1.23b7100000000p-1, 0x1.2521700000000p-1, + 0x1.268a900000000p-1, 0x1.27f2600000000p-1, 0x1.2958e00000000p-1, + 0x1.2abe200000000p-1, 0x1.2c22100000000p-1, 0x1.2d84c00000000p-1, + 0x1.2ee6200000000p-1, 0x1.3046400000000p-1, 0x1.31a5200000000p-1, + 0x1.3302b00000000p-1, 0x1.345f000000000p-1, 0x1.35ba000000000p-1, + 0x1.3713d00000000p-1, 0x1.386c500000000p-1, 0x1.39c3900000000p-1, + 0x1.3b19800000000p-1, 0x1.3c6e400000000p-1, 0x1.3dc1c00000000p-1, + 0x1.3f13f00000000p-1, 0x1.4064f00000000p-1, 0x1.41b4a00000000p-1, + 0x1.4303200000000p-1, 0x1.4450600000000p-1, 0x1.459c600000000p-1, + 0x1.46e7200000000p-1, 0x1.4830a00000000p-1, 0x1.4978f00000000p-1, + 0x1.4ac0000000000p-1, 0x1.4c05e00000000p-1, 0x1.4d4a800000000p-1, + 0x1.4e8de00000000p-1, 0x1.4fd0100000000p-1, 0x1.5111000000000p-1, + 0x1.5250c00000000p-1, 0x1.538f500000000p-1, 0x1.54cca00000000p-1, + 0x1.5608d00000000p-1, 0x1.5743c00000000p-1, 0x1.587d800000000p-1, + 0x1.59b6000000000p-1, 0x1.5aed600000000p-1, 0x1.5c23900000000p-1, + 0x1.5d58900000000p-1, 0x1.5e8c600000000p-1, 0x1.5fbf000000000p-1, + 0x1.60f0800000000p-1, 0x1.6220d00000000p-1, 0x1.634ff00000000p-1, + 0x1.647de00000000p-1, 0x1.65aab00000000p-1, 0x1.66d6600000000p-1, + 0x1.6800e00000000p-1, 0x1.692a400000000p-1, 0x1.6a52700000000p-1, + 0x1.6b79800000000p-1, 0x1.6c9f700000000p-1, 0x1.6dc4400000000p-1, + 0x1.6ee7f00000000p-1, 0x1.700a700000000p-1, 0x1.712be00000000p-1, + 0x1.724c300000000p-1, 0x1.736b600000000p-1, 0x1.7489700000000p-1, + 0x1.75a6700000000p-1, 0x1.76c2400000000p-1, 0x1.77dd100000000p-1, + 0x1.78f6b00000000p-1, 0x1.7a0f400000000p-1, 0x1.7b26c00000000p-1, + 0x1.7c3d300000000p-1, 0x1.7d52800000000p-1, 0x1.7e66c00000000p-1, + 0x1.7f79e00000000p-1, 0x1.808c000000000p-1, 0x1.819d000000000p-1, + 0x1.82ad000000000p-1, 0x1.83bbe00000000p-1, 0x1.84c9c00000000p-1, + 0x1.85d6900000000p-1, 0x1.86e2500000000p-1, 0x1.87ed000000000p-1, + 0x1.88f6b00000000p-1, 0x1.89ff500000000p-1, 0x1.8b06f00000000p-1, + 0x1.8c0d900000000p-1, 0x1.8d13200000000p-1, 0x1.8e17a00000000p-1, + 0x1.8f1b300000000p-1, 0x1.901db00000000p-1, 0x1.911f300000000p-1, + 0x1.921fb00000000p-1, +}; + +CLC_TABLE_FUNCTION(double, ATAN_JBY256_TBL_HEAD, atan_jby256_tbl_head); + +DECLARE_TABLE(double, ATAN_JBY256_TBL_TAIL, 241) = { + 0x1.6e59fbd38db2cp-26, 0x1.4e3aa54dedf96p-25, 0x1.7e105ab1bda88p-25, + 0x1.8c5254d013fd0p-27, 0x1.cf8ab3ad62670p-29, 0x1.9dca4bec80468p-26, + 0x1.3f4b5ec98a8dap-26, 0x1.b9d49619d81fep-25, 0x1.3017887460934p-27, + 0x1.11e3eca0b9944p-26, 0x1.4f3f73c5a332ep-26, 0x1.c71c8ae0e00a6p-26, + 0x1.7cde0f86fbdc7p-25, 0x1.70f328c889c72p-26, 0x1.c07ae9b994efep-26, + 0x1.0c8021d7b1698p-27, 0x1.35585edb8cb22p-25, 0x1.0842567b30e96p-24, + 0x1.99e811031472ep-24, 0x1.041821416bceep-25, 0x1.f6086e4dc96f4p-24, + 0x1.71a535c5f1b58p-27, 0x1.65f743fe63ca1p-24, 0x1.dbd733472d014p-24, + 0x1.d18cc4d8b0d1dp-24, 0x1.8c12553c8fb29p-24, 0x1.53b49e2e8f991p-24, + 0x1.7422ae148c141p-24, 0x1.e3ec269df56a8p-27, 0x1.ff6754e7e0ac9p-24, + 0x1.131267b1b5aadp-24, 0x1.d14fa403a94bcp-24, 0x1.2f396c089a3d8p-25, + 0x1.c731d78fa95bbp-24, 0x1.c50f385177399p-24, 0x1.f41409c6f2c20p-25, + 0x1.d2d90c4c39ec0p-24, 0x1.80420696f2106p-25, 0x1.b40327943a2e8p-27, + 0x1.5d35e02f3d2a2p-25, 0x1.4a498288117b0p-25, 0x1.35da119afb324p-25, + 0x1.14e85cdb9a908p-24, 0x1.38754e5547b9ap-25, 0x1.be40ae6ce3246p-24, + 0x1.0c993b3bea7e7p-24, 0x1.1d2dd89ac3359p-24, 0x1.1476603332c46p-25, + 0x1.f25901bac55b7p-24, 0x1.f881b7c826e28p-24, 0x1.441996d698d20p-24, + 0x1.407ac521ea089p-23, 0x1.2fb0c6c4b1723p-23, 0x1.ca135966a3e18p-23, + 0x1.b1218e4d646e4p-25, 0x1.d4e72a350d288p-25, 0x1.4617e2f04c329p-23, + 0x1.096ec41e82650p-25, 0x1.9f91f25773e6ep-24, 0x1.59c0820f1d674p-25, + 0x1.02bf7a2df1064p-25, 0x1.fb36bfc40508fp-23, 0x1.ea08f3f8dc892p-24, + 0x1.3ed6254656a0ep-24, 0x1.b83f5e5e69c58p-25, 0x1.d6ec2af768592p-23, + 0x1.493889a226f94p-25, 0x1.5ad8fa65279bap-23, 0x1.b615784d45434p-25, + 0x1.09a184368f145p-23, 0x1.61a2439b0d91cp-24, 0x1.ce1a65e39a978p-24, + 0x1.32a39a93b6a66p-23, 0x1.1c3699af804e7p-23, 0x1.75e0f4e44ede8p-26, + 0x1.f77ced1a7a83bp-23, 0x1.84e7f0cb1b500p-29, 0x1.ec6b838b02dfep-23, + 0x1.3ebf4dfbeda87p-23, 0x1.9397aed9cb475p-23, 0x1.07937bc239c54p-24, + 0x1.aa754553131b6p-23, 0x1.4a05d407c45dcp-24, 0x1.132231a206dd0p-23, + 0x1.2d8ecfdd69c88p-24, 0x1.a852c74218606p-24, 0x1.71bf2baeebb50p-23, + 0x1.83d7db7491820p-27, 0x1.ca50d92b6da14p-25, 0x1.6f5cde8530298p-26, + 0x1.f343198910740p-24, 0x1.0e8d241ccd80ap-24, 0x1.1535ac619e6c8p-24, + 0x1.7316041c36cd2p-24, 0x1.985a000637d8ep-24, 0x1.f2f29858c0a68p-25, + 0x1.879847f96d909p-23, 0x1.ab3d319e12e42p-23, 0x1.5088162dfc4c2p-24, + 0x1.05749a1cd9d8cp-25, 0x1.da65c6c6b8618p-26, 0x1.739bf7df1ad64p-25, + 0x1.bc31252aa3340p-25, 0x1.e528191ad3aa8p-26, 0x1.929d93df19f18p-23, + 0x1.ff11eb693a080p-26, 0x1.55ae3f145a3a0p-27, 0x1.cbcd8c6c0ca82p-24, + 0x1.0cb04d425d304p-24, 0x1.9adfcab5be678p-24, 0x1.93d90c5662508p-23, + 0x1.68489bd35ff40p-24, 0x1.586ed3da2b7e0p-28, 0x1.7604d2e850eeep-23, + 0x1.ac1d12bfb53d8p-24, 0x1.9b3d468274740p-28, 0x1.fc5d68d10e53cp-24, + 0x1.8f9e51884becbp-23, 0x1.a87f0869c06d1p-23, 0x1.31e7279f685fap-23, + 0x1.6a8282f9719b0p-27, 0x1.0d2724a8a44e0p-25, 0x1.a60524b11ad4ep-23, + 0x1.75fdf832750f0p-26, 0x1.cf06902e4cd36p-23, 0x1.e82422d4f6d10p-25, + 0x1.24a091063e6c0p-26, 0x1.8a1a172dc6f38p-24, 0x1.29b6619f8a92dp-22, + 0x1.9274d9c1b70c8p-24, 0x1.0c34b1fbb7930p-26, 0x1.639866c20eb50p-25, + 0x1.6d6d0f6832e9ep-23, 0x1.af54def99f25ep-22, 0x1.16cfc52a00262p-22, + 0x1.dcc1e83569c32p-23, 0x1.37f7a551ed425p-22, 0x1.f6360adc98887p-22, + 0x1.2c6ec8d35a2c1p-22, 0x1.bd44df84cb036p-23, 0x1.117cf826e310ep-22, + 0x1.ca533f332cfc9p-22, 0x1.0f208509dbc2ep-22, 0x1.cd07d93c945dep-23, + 0x1.57bdfd67e6d72p-22, 0x1.aab89c516c658p-24, 0x1.3e823b1a1b8a0p-25, + 0x1.307464a9d6d3cp-23, 0x1.c5993cd438843p-22, 0x1.ba2fca02ab554p-22, + 0x1.01a5b6983a268p-23, 0x1.273d1b350efc8p-25, 0x1.64c238c37b0c6p-23, + 0x1.aded07370a300p-25, 0x1.78091197eb47ep-23, 0x1.4b0f245e0dabcp-24, + 0x1.080d9794e2eafp-22, 0x1.d4ec242b60c76p-23, 0x1.221d2f940caa0p-27, + 0x1.cdbc42b2bba5cp-24, 0x1.cce37bb440840p-25, 0x1.6c1d999cf1dd0p-22, + 0x1.bed8a07eb0870p-26, 0x1.69ed88f490e3cp-24, 0x1.cd41719b73ef0p-25, + 0x1.cbc4ac95b41b7p-22, 0x1.238f1b890f5d7p-22, 0x1.50c4282259cc4p-24, + 0x1.713d2de87b3e2p-22, 0x1.1d5a7d2255276p-23, 0x1.c0dfd48227ac1p-22, + 0x1.1c964dab76753p-22, 0x1.6de56d5704496p-23, 0x1.4aeb71fd19968p-23, + 0x1.fbf91c57b1918p-23, 0x1.d6bef7fbe5d9ap-22, 0x1.464d3dc249066p-22, + 0x1.638e2ec4d9073p-22, 0x1.16f4a7247ea7cp-24, 0x1.1a0a740f1d440p-28, + 0x1.6edbb0114a33cp-23, 0x1.dbee8bf1d513cp-24, 0x1.5b8bdb0248f73p-22, + 0x1.7de3d3f5eac64p-22, 0x1.ee24187ae448ap-23, 0x1.e06c591ec5192p-22, + 0x1.4e3861a332738p-24, 0x1.a9599dcc2bfe4p-24, 0x1.f732fbad43468p-25, + 0x1.eb9f573b727d9p-22, 0x1.8b212a2eb9897p-22, 0x1.384884c167215p-22, + 0x1.0e2d363020051p-22, 0x1.2820879fbd022p-22, 0x1.a1ab9893e4b30p-22, + 0x1.2d1b817a24478p-23, 0x1.15d7b8ded4878p-25, 0x1.8968f9db3a5e4p-24, + 0x1.71c4171fe135fp-22, 0x1.6d80f605d0d8cp-22, 0x1.c91f043691590p-24, + 0x1.39f8a15fce2b2p-23, 0x1.55beda9d94b80p-27, 0x1.b12c15d60949ap-23, + 0x1.24167b312bfe3p-22, 0x1.0ab8633070277p-22, 0x1.54554ebbc80eep-23, + 0x1.0204aef5a4bb8p-25, 0x1.8af08c679cf2cp-22, 0x1.0852a330ae6c8p-22, + 0x1.6d3eb9ec32916p-23, 0x1.685cb7fcbbafep-23, 0x1.1f751c1e0bd95p-22, + 0x1.705b1b0f72560p-26, 0x1.b98d8d808ca92p-22, 0x1.2ea22c75cc980p-25, + 0x1.7aba62bca0350p-22, 0x1.d73833442278cp-22, 0x1.5a5ca1fb18bf9p-22, + 0x1.1a6092b6ecf28p-25, 0x1.44fd049aac104p-24, 0x1.c114fd8df5180p-29, + 0x1.5972f130feae5p-22, 0x1.ca034a55fe198p-24, 0x1.6e2b149990227p-22, + 0x1.b00000294592cp-24, 0x1.8b9bdc442620ep-22, 0x1.d94fdfabf3e4ep-23, + 0x1.5db30b145ad9ap-23, 0x1.e3e1eb95022b0p-23, 0x1.d5b8b45442bd6p-22, + 0x1.7a046231ecd2ep-22, 0x1.feafe3ef55232p-22, 0x1.839e7bfd78267p-22, + 0x1.45cf49d6fa900p-25, 0x1.be3132b27f380p-27, 0x1.533980bb84f9fp-22, + 0x1.889e2ce3ba390p-26, 0x1.f7778c3ad0cc8p-24, 0x1.46660cec4eba2p-23, + 0x1.5110b4611a626p-23, +}; + +CLC_TABLE_FUNCTION(double, ATAN_JBY256_TBL_TAIL, atan_jby256_tbl_tail); + #endif // cl_khr_fp64 diff --git a/libclc/generic/lib/math/atan2.cl b/libclc/generic/lib/math/atan2.cl index 635f1cdfaca7e..20651c1ec1bf3 100644 --- a/libclc/generic/lib/math/atan2.cl +++ b/libclc/generic/lib/math/atan2.cl @@ -8,223 +8,9 @@ #include <clc/clc.h> #include <clc/clcmacro.h> -#include <clc/math/math.h> -#include <clc/math/tables.h> +#include <clc/math/clc_atan2.h> -_CLC_OVERLOAD _CLC_DEF float atan2(float y, float x) -{ - const float pi = 0x1.921fb6p+1f; - const float piby2 = 0x1.921fb6p+0f; - const float piby4 = 0x1.921fb6p-1f; - const float threepiby4 = 0x1.2d97c8p+1f; +#define FUNCTION atan2 +#define __CLC_BODY <clc/shared/binary_def.inc> - float ax = fabs(x); - float ay = fabs(y); - float v = min(ax, ay); - float u = max(ax, ay); - - // Scale since u could be large, as in "regular" divide - float s = u > 0x1.0p+96f ? 0x1.0p-32f : 1.0f; - float vbyu = s * MATH_DIVIDE(v, s*u); - - float vbyu2 = vbyu * vbyu; - -#define USE_2_2_APPROXIMATION -#if defined USE_2_2_APPROXIMATION - float p = mad(vbyu2, mad(vbyu2, -0x1.7e1f78p-9f, -0x1.7d1b98p-3f), -0x1.5554d0p-2f) * vbyu2 * vbyu; - float q = mad(vbyu2, mad(vbyu2, 0x1.1a714cp-2f, 0x1.287c56p+0f), 1.0f); -#else - float p = mad(vbyu2, mad(vbyu2, -0x1.55cd22p-5f, -0x1.26cf76p-2f), -0x1.55554ep-2f) * vbyu2 * vbyu; - float q = mad(vbyu2, mad(vbyu2, mad(vbyu2, 0x1.9f1304p-5f, 0x1.2656fap-1f), 0x1.76b4b8p+0f), 1.0f); -#endif - - // Octant 0 result - float a = mad(p, MATH_RECIP(q), vbyu); - - // Fix up 3 other octants - float at = piby2 - a; - a = ay > ax ? at : a; - at = pi - a; - a = x < 0.0F ? at : a; - - // y == 0 => 0 for x >= 0, pi for x < 0 - at = as_int(x) < 0 ? pi : 0.0f; - a = y == 0.0f ? at : a; - - // if (!FINITE_ONLY()) { - // x and y are +- Inf - at = x > 0.0f ? piby4 : threepiby4; - a = ax == INFINITY & ay == INFINITY ? at : a; - - // x or y is NaN - a = isnan(x) | isnan(y) ? as_float(QNANBITPATT_SP32) : a; - // } - - // Fixup sign and return - return copysign(a, y); -} - -_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, atan2, float, float); - -#ifdef cl_khr_fp64 - -#pragma OPENCL EXTENSION cl_khr_fp64 : enable - -_CLC_OVERLOAD _CLC_DEF double atan2(double y, double x) -{ - const double pi = 3.1415926535897932e+00; /* 0x400921fb54442d18 */ - const double piby2 = 1.5707963267948966e+00; /* 0x3ff921fb54442d18 */ - const double piby4 = 7.8539816339744831e-01; /* 0x3fe921fb54442d18 */ - const double three_piby4 = 2.3561944901923449e+00; /* 0x4002d97c7f3321d2 */ - const double pi_head = 3.1415926218032836e+00; /* 0x400921fb50000000 */ - const double pi_tail = 3.1786509547056392e-08; /* 0x3e6110b4611a6263 */ - const double piby2_head = 1.5707963267948965e+00; /* 0x3ff921fb54442d18 */ - const double piby2_tail = 6.1232339957367660e-17; /* 0x3c91a62633145c07 */ - - double x2 = x; - int xneg = as_int2(x).hi < 0; - int xexp = (as_int2(x).hi >> 20) & 0x7ff; - - double y2 = y; - int yneg = as_int2(y).hi < 0; - int yexp = (as_int2(y).hi >> 20) & 0x7ff; - - int cond2 = (xexp < 1021) & (yexp < 1021); - int diff exp = yexp - xexp; - - // Scale up both x and y if they are both below 1/4 - double x1 = ldexp(x, 1024); - int xexp1 = (as_int2(x1).hi >> 20) & 0x7ff; - double y1 = ldexp(y, 1024); - int yexp1 = (as_int2(y1).hi >> 20) & 0x7ff; - int diff exp1 = yexp1 - xexp1; - - diff exp = cond2 ? diff exp1 : diff exp; - x = cond2 ? x1 : x; - y = cond2 ? y1 : y; - - // General case: take absolute values of arguments - double u = fabs(x); - double v = fabs(y); - - // Swap u and v if necessary to obtain 0 < v < u. Compute v/u. - int swap_vu = u < v; - double uu = u; - u = swap_vu ? v : u; - v = swap_vu ? uu : v; - - double vbyu = v / u; - double q1, q2; - - // General values of v/u. Use a look-up table and series expansion. - - { - double val = vbyu > 0.0625 ? vbyu : 0.063; - int index = convert_int(fma(256.0, val, 0.5)); - double2 tv = USE_TABLE(atan_jby256_tbl, index - 16); - q1 = tv.s0; - q2 = tv.s1; - double c = (double)index * 0x1.0p-8; - - // We're going to scale u and v by 2^(-u_exponent) to bring them close to 1 - // u_exponent could be EMAX so we have to do it in 2 steps - int m = -((int)(as_ulong(u) >> EXPSHIFTBITS_DP64) - EXPBIAS_DP64); - //double um = __amdil_ldexp_f64(u, m); - //double vm = __amdil_ldexp_f64(v, m); - double um = ldexp(u, m); - double vm = ldexp(v, m); - - // 26 leading bits of u - double u1 = as_double(as_ulong(um) & 0xfffffffff8000000UL); - double u2 = um - u1; - - double r = MATH_DIVIDE(fma(-c, u2, fma(-c, u1, vm)), fma(c, vm, um)); - - // Polynomial approximation to atan(r) - double s = r * r; - q2 = q2 + fma((s * fma(-s, 0.19999918038989143496, 0.33333333333224095522)), -r, r); - } - - - double q3, q4; - { - q3 = 0.0; - q4 = vbyu; - } - - double q5, q6; - { - double u1 = as_double(as_ulong(u) & 0xffffffff00000000UL); - double u2 = u - u1; - double vu1 = as_double(as_ulong(vbyu) & 0xffffffff00000000UL); - double vu2 = vbyu - vu1; - - q5 = 0.0; - double s = vbyu * vbyu; - q6 = vbyu + fma(-vbyu * s, - fma(-s, - fma(-s, - fma(-s, - fma(-s, 0.90029810285449784439E-01, - 0.11110736283514525407), - 0.14285713561807169030), - 0.19999999999393223405), - 0.33333333333333170500), - MATH_DIVIDE(fma(-u, vu2, fma(-u2, vu1, fma(-u1, vu1, v))), u)); - } - - - q3 = vbyu < 0x1.d12ed0af1a27fp-27 ? q3 : q5; - q4 = vbyu < 0x1.d12ed0af1a27fp-27 ? q4 : q6; - - q1 = vbyu > 0.0625 ? q1 : q3; - q2 = vbyu > 0.0625 ? q2 : q4; - - // Tidy-up according to which quadrant the arguments lie in - double res1, res2, res3, res4; - q1 = swap_vu ? piby2_head - q1 : q1; - q2 = swap_vu ? piby2_tail - q2 : q2; - q1 = xneg ? pi_head - q1 : q1; - q2 = xneg ? pi_tail - q2 : q2; - q1 = q1 + q2; - res4 = yneg ? -q1 : q1; - - res1 = yneg ? -three_piby4 : three_piby4; - res2 = yneg ? -piby4 : piby4; - res3 = xneg ? res1 : res2; - - res3 = isinf(x2) & isinf(y2) ? res3 : res4; - res1 = yneg ? -pi : pi; - - // abs(x)/abs(y) > 2^56 and x < 0 - res3 = ( diff exp < -56 && xneg) ? res1 : res3; - - res4 = MATH_DIVIDE(y, x); - // x positive and dominant over y by a factor of 2^28 - res3 = diff exp < -28 & xneg == 0 ? res4 : res3; - - // abs(y)/abs(x) > 2^56 - res4 = yneg ? -piby2 : piby2; // atan(y/x) is insignificant compared to piby2 - res3 = diff exp > 56 ? res4 : res3; - - res3 = x2 == 0.0 ? res4 : res3; // Zero x gives +- pi/2 depending on sign of y - res4 = xneg ? res1 : y2; - - res3 = y2 == 0.0 ? res4 : res3; // Zero y gives +-0 for positive x and +-pi for negative x - res3 = isnan(y2) ? y2 : res3; - res3 = isnan(x2) ? x2 : res3; - - return res3; -} - -_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, atan2, double, double); - -#endif - -#ifdef cl_khr_fp16 - -#pragma OPENCL EXTENSION cl_khr_fp16 : enable - -_CLC_DEFINE_BINARY_BUILTIN_FP16(atan2) - -#endif +#include <clc/math/gentype.inc> diff --git a/libclc/generic/lib/math/atan2pi.cl b/libclc/generic/lib/math/atan2pi.cl index 667e4519d8043..316db1d6c9c48 100644 --- a/libclc/generic/lib/math/atan2pi.cl +++ b/libclc/generic/lib/math/atan2pi.cl @@ -8,207 +8,9 @@ #include <clc/clc.h> #include <clc/clcmacro.h> -#include <clc/math/math.h> -#include <clc/math/tables.h> +#include <clc/math/clc_atan2pi.h> -_CLC_OVERLOAD _CLC_DEF float atan2pi(float y, float x) { - const float pi = 0x1.921fb6p+1f; +#define FUNCTION atan2pi +#define __CLC_BODY <clc/shared/binary_def.inc> - float ax = fabs(x); - float ay = fabs(y); - float v = min(ax, ay); - float u = max(ax, ay); - - // Scale since u could be large, as in "regular" divide - float s = u > 0x1.0p+96f ? 0x1.0p-32f : 1.0f; - float vbyu = s * MATH_DIVIDE(v, s*u); - - float vbyu2 = vbyu * vbyu; - - float p = mad(vbyu2, mad(vbyu2, -0x1.7e1f78p-9f, -0x1.7d1b98p-3f), -0x1.5554d0p-2f) * vbyu2 * vbyu; - float q = mad(vbyu2, mad(vbyu2, 0x1.1a714cp-2f, 0x1.287c56p+0f), 1.0f); - - // Octant 0 result - float a = MATH_DIVIDE(mad(p, MATH_RECIP(q), vbyu), pi); - - // Fix up 3 other octants - float at = 0.5f - a; - a = ay > ax ? at : a; - at = 1.0f - a; - a = x < 0.0F ? at : a; - - // y == 0 => 0 for x >= 0, pi for x < 0 - at = as_int(x) < 0 ? 1.0f : 0.0f; - a = y == 0.0f ? at : a; - - // if (!FINITE_ONLY()) { - // x and y are +- Inf - at = x > 0.0f ? 0.25f : 0.75f; - a = ax == INFINITY & ay == INFINITY ? at : a; - - // x or y is NaN - a = isnan(x) | isnan(y) ? as_float(QNANBITPATT_SP32) : a; - // } - - // Fixup sign and return - return copysign(a, y); -} - -_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, atan2pi, float, float) - -#ifdef cl_khr_fp64 -#pragma OPENCL EXTENSION cl_khr_fp64 : enable - -_CLC_OVERLOAD _CLC_DEF double atan2pi(double y, double x) { - const double pi = 3.1415926535897932e+00; /* 0x400921fb54442d18 */ - const double pi_head = 3.1415926218032836e+00; /* 0x400921fb50000000 */ - const double pi_tail = 3.1786509547056392e-08; /* 0x3e6110b4611a6263 */ - const double piby2_head = 1.5707963267948965e+00; /* 0x3ff921fb54442d18 */ - const double piby2_tail = 6.1232339957367660e-17; /* 0x3c91a62633145c07 */ - - double x2 = x; - int xneg = as_int2(x).hi < 0; - int xexp = (as_int2(x).hi >> 20) & 0x7ff; - - double y2 = y; - int yneg = as_int2(y).hi < 0; - int yexp = (as_int2(y).hi >> 20) & 0x7ff; - - int cond2 = (xexp < 1021) & (yexp < 1021); - int diff exp = yexp - xexp; - - // Scale up both x and y if they are both below 1/4 - double x1 = ldexp(x, 1024); - int xexp1 = (as_int2(x1).hi >> 20) & 0x7ff; - double y1 = ldexp(y, 1024); - int yexp1 = (as_int2(y1).hi >> 20) & 0x7ff; - int diff exp1 = yexp1 - xexp1; - - diff exp = cond2 ? diff exp1 : diff exp; - x = cond2 ? x1 : x; - y = cond2 ? y1 : y; - - // General case: take absolute values of arguments - double u = fabs(x); - double v = fabs(y); - - // Swap u and v if necessary to obtain 0 < v < u. Compute v/u. - int swap_vu = u < v; - double uu = u; - u = swap_vu ? v : u; - v = swap_vu ? uu : v; - - double vbyu = v / u; - double q1, q2; - - // General values of v/u. Use a look-up table and series expansion. - - { - double val = vbyu > 0.0625 ? vbyu : 0.063; - int index = convert_int(fma(256.0, val, 0.5)); - double2 tv = USE_TABLE(atan_jby256_tbl, (index - 16)); - q1 = tv.s0; - q2 = tv.s1; - double c = (double)index * 0x1.0p-8; - - // We're going to scale u and v by 2^(-u_exponent) to bring them close to 1 - // u_exponent could be EMAX so we have to do it in 2 steps - int m = -((int)(as_ulong(u) >> EXPSHIFTBITS_DP64) - EXPBIAS_DP64); - double um = ldexp(u, m); - double vm = ldexp(v, m); - - // 26 leading bits of u - double u1 = as_double(as_ulong(um) & 0xfffffffff8000000UL); - double u2 = um - u1; - - double r = MATH_DIVIDE(fma(-c, u2, fma(-c, u1, vm)), fma(c, vm, um)); - - // Polynomial approximation to atan(r) - double s = r * r; - q2 = q2 + fma((s * fma(-s, 0.19999918038989143496, 0.33333333333224095522)), -r, r); - } - - - double q3, q4; - { - q3 = 0.0; - q4 = vbyu; - } - - double q5, q6; - { - double u1 = as_double(as_ulong(u) & 0xffffffff00000000UL); - double u2 = u - u1; - double vu1 = as_double(as_ulong(vbyu) & 0xffffffff00000000UL); - double vu2 = vbyu - vu1; - - q5 = 0.0; - double s = vbyu * vbyu; - q6 = vbyu + fma(-vbyu * s, - fma(-s, - fma(-s, - fma(-s, - fma(-s, 0.90029810285449784439E-01, - 0.11110736283514525407), - 0.14285713561807169030), - 0.19999999999393223405), - 0.33333333333333170500), - MATH_DIVIDE(fma(-u, vu2, fma(-u2, vu1, fma(-u1, vu1, v))), u)); - } - - - q3 = vbyu < 0x1.d12ed0af1a27fp-27 ? q3 : q5; - q4 = vbyu < 0x1.d12ed0af1a27fp-27 ? q4 : q6; - - q1 = vbyu > 0.0625 ? q1 : q3; - q2 = vbyu > 0.0625 ? q2 : q4; - - // Tidy-up according to which quadrant the arguments lie in - double res1, res2, res3, res4; - q1 = swap_vu ? piby2_head - q1 : q1; - q2 = swap_vu ? piby2_tail - q2 : q2; - q1 = xneg ? pi_head - q1 : q1; - q2 = xneg ? pi_tail - q2 : q2; - q1 = MATH_DIVIDE(q1 + q2, pi); - res4 = yneg ? -q1 : q1; - - res1 = yneg ? -0.75 : 0.75; - res2 = yneg ? -0.25 : 0.25; - res3 = xneg ? res1 : res2; - - res3 = isinf(y2) & isinf(x2) ? res3 : res4; - res1 = yneg ? -1.0 : 1.0; - - // abs(x)/abs(y) > 2^56 and x < 0 - res3 = ( diff exp < -56 && xneg) ? res1 : res3; - - res4 = MATH_DIVIDE(MATH_DIVIDE(y, x), pi); - // x positive and dominant over y by a factor of 2^28 - res3 = diff exp < -28 & xneg == 0 ? res4 : res3; - - // abs(y)/abs(x) > 2^56 - res4 = yneg ? -0.5 : 0.5; // atan(y/x) is insignificant compared to piby2 - res3 = diff exp > 56 ? res4 : res3; - - res3 = x2 == 0.0 ? res4 : res3; // Zero x gives +- pi/2 depending on sign of y - res4 = xneg ? res1 : y2; - - res3 = y2 == 0.0 ? res4 : res3; // Zero y gives +-0 for positive x and +-pi for negative x - res3 = isnan(y2) ? y2 : res3; - res3 = isnan(x2) ? x2 : res3; - - return res3; -} - - -_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, atan2pi, double, double) - -#endif - -#ifdef cl_khr_fp16 - -#pragma OPENCL EXTENSION cl_khr_fp16 : enable - -_CLC_DEFINE_BINARY_BUILTIN_FP16(atan2pi) - -#endif +#include <clc/math/gentype.inc> diff --git a/libclc/generic/lib/math/tables.cl b/libclc/generic/lib/math/tables.cl index d7d3ba4aafec9..c03b4d1a3c18a 100644 --- a/libclc/generic/lib/math/tables.cl +++ b/libclc/generic/lib/math/tables.cl @@ -745,258 +745,6 @@ TABLE_FUNCTION(float2, EXP_TBL_EP, exp_tbl_ep); #ifdef cl_khr_fp64 - - -// Arrays atan_jby256_lead and atan_jby256_tail contain -// leading and trailing parts respectively of precomputed -// values of atan(j/256), for j = 16, 17, ..., 256. -// atan_jby256_lead contains the first 21 bits of precision, -// and atan_jby256_tail contains a further 53 bits precision. - -DECLARE_TABLE(double2, ATAN_JBY256_TBL, 241) = { - (double2)(0x1.ff55b00000000p-5, 0x1.6e59fbd38db2cp-26), - (double2)(0x1.0f99e00000000p-4, 0x1.4e3aa54dedf96p-25), - (double2)(0x1.1f86d00000000p-4, 0x1.7e105ab1bda88p-25), - (double2)(0x1.2f71900000000p-4, 0x1.8c5254d013fd0p-27), - (double2)(0x1.3f59f00000000p-4, 0x1.cf8ab3ad62670p-29), - (double2)(0x1.4f3fd00000000p-4, 0x1.9dca4bec80468p-26), - (double2)(0x1.5f23200000000p-4, 0x1.3f4b5ec98a8dap-26), - (double2)(0x1.6f03b00000000p-4, 0x1.b9d49619d81fep-25), - (double2)(0x1.7ee1800000000p-4, 0x1.3017887460934p-27), - (double2)(0x1.8ebc500000000p-4, 0x1.11e3eca0b9944p-26), - (double2)(0x1.9e94100000000p-4, 0x1.4f3f73c5a332ep-26), - (double2)(0x1.ae68a00000000p-4, 0x1.c71c8ae0e00a6p-26), - (double2)(0x1.be39e00000000p-4, 0x1.7cde0f86fbdc7p-25), - (double2)(0x1.ce07c00000000p-4, 0x1.70f328c889c72p-26), - (double2)(0x1.ddd2100000000p-4, 0x1.c07ae9b994efep-26), - (double2)(0x1.ed98c00000000p-4, 0x1.0c8021d7b1698p-27), - (double2)(0x1.fd5ba00000000p-4, 0x1.35585edb8cb22p-25), - (double2)(0x1.068d500000000p-3, 0x1.0842567b30e96p-24), - (double2)(0x1.0e6ad00000000p-3, 0x1.99e811031472ep-24), - (double2)(0x1.1646500000000p-3, 0x1.041821416bceep-25), - (double2)(0x1.1e1fa00000000p-3, 0x1.f6086e4dc96f4p-24), - (double2)(0x1.25f6e00000000p-3, 0x1.71a535c5f1b58p-27), - (double2)(0x1.2dcbd00000000p-3, 0x1.65f743fe63ca1p-24), - (double2)(0x1.359e800000000p-3, 0x1.dbd733472d014p-24), - (double2)(0x1.3d6ee00000000p-3, 0x1.d18cc4d8b0d1dp-24), - (double2)(0x1.453ce00000000p-3, 0x1.8c12553c8fb29p-24), - (double2)(0x1.4d08700000000p-3, 0x1.53b49e2e8f991p-24), - (double2)(0x1.54d1800000000p-3, 0x1.7422ae148c141p-24), - (double2)(0x1.5c98100000000p-3, 0x1.e3ec269df56a8p-27), - (double2)(0x1.645bf00000000p-3, 0x1.ff6754e7e0ac9p-24), - (double2)(0x1.6c1d400000000p-3, 0x1.131267b1b5aadp-24), - (double2)(0x1.73dbd00000000p-3, 0x1.d14fa403a94bcp-24), - (double2)(0x1.7b97b00000000p-3, 0x1.2f396c089a3d8p-25), - (double2)(0x1.8350b00000000p-3, 0x1.c731d78fa95bbp-24), - (double2)(0x1.8b06e00000000p-3, 0x1.c50f385177399p-24), - (double2)(0x1.92ba300000000p-3, 0x1.f41409c6f2c20p-25), - (double2)(0x1.9a6a800000000p-3, 0x1.d2d90c4c39ec0p-24), - (double2)(0x1.a217e00000000p-3, 0x1.80420696f2106p-25), - (double2)(0x1.a9c2300000000p-3, 0x1.b40327943a2e8p-27), - (double2)(0x1.b169600000000p-3, 0x1.5d35e02f3d2a2p-25), - (double2)(0x1.b90d700000000p-3, 0x1.4a498288117b0p-25), - (double2)(0x1.c0ae500000000p-3, 0x1.35da119afb324p-25), - (double2)(0x1.c84bf00000000p-3, 0x1.14e85cdb9a908p-24), - (double2)(0x1.cfe6500000000p-3, 0x1.38754e5547b9ap-25), - (double2)(0x1.d77d500000000p-3, 0x1.be40ae6ce3246p-24), - (double2)(0x1.df11000000000p-3, 0x1.0c993b3bea7e7p-24), - (double2)(0x1.e6a1400000000p-3, 0x1.1d2dd89ac3359p-24), - (double2)(0x1.ee2e100000000p-3, 0x1.1476603332c46p-25), - (double2)(0x1.f5b7500000000p-3, 0x1.f25901bac55b7p-24), - (double2)(0x1.fd3d100000000p-3, 0x1.f881b7c826e28p-24), - (double2)(0x1.025fa00000000p-2, 0x1.441996d698d20p-24), - (double2)(0x1.061ee00000000p-2, 0x1.407ac521ea089p-23), - (double2)(0x1.09dc500000000p-2, 0x1.2fb0c6c4b1723p-23), - (double2)(0x1.0d97e00000000p-2, 0x1.ca135966a3e18p-23), - (double2)(0x1.1151a00000000p-2, 0x1.b1218e4d646e4p-25), - (double2)(0x1.1509700000000p-2, 0x1.d4e72a350d288p-25), - (double2)(0x1.18bf500000000p-2, 0x1.4617e2f04c329p-23), - (double2)(0x1.1c73500000000p-2, 0x1.096ec41e82650p-25), - (double2)(0x1.2025500000000p-2, 0x1.9f91f25773e6ep-24), - (double2)(0x1.23d5600000000p-2, 0x1.59c0820f1d674p-25), - (double2)(0x1.2783700000000p-2, 0x1.02bf7a2df1064p-25), - (double2)(0x1.2b2f700000000p-2, 0x1.fb36bfc40508fp-23), - (double2)(0x1.2ed9800000000p-2, 0x1.ea08f3f8dc892p-24), - (double2)(0x1.3281800000000p-2, 0x1.3ed6254656a0ep-24), - (double2)(0x1.3627700000000p-2, 0x1.b83f5e5e69c58p-25), - (double2)(0x1.39cb400000000p-2, 0x1.d6ec2af768592p-23), - (double2)(0x1.3d6d100000000p-2, 0x1.493889a226f94p-25), - (double2)(0x1.410cb00000000p-2, 0x1.5ad8fa65279bap-23), - (double2)(0x1.44aa400000000p-2, 0x1.b615784d45434p-25), - (double2)(0x1.4845a00000000p-2, 0x1.09a184368f145p-23), - (double2)(0x1.4bdee00000000p-2, 0x1.61a2439b0d91cp-24), - (double2)(0x1.4f75f00000000p-2, 0x1.ce1a65e39a978p-24), - (double2)(0x1.530ad00000000p-2, 0x1.32a39a93b6a66p-23), - (double2)(0x1.569d800000000p-2, 0x1.1c3699af804e7p-23), - (double2)(0x1.5a2e000000000p-2, 0x1.75e0f4e44ede8p-26), - (double2)(0x1.5dbc300000000p-2, 0x1.f77ced1a7a83bp-23), - (double2)(0x1.6148400000000p-2, 0x1.84e7f0cb1b500p-29), - (double2)(0x1.64d1f00000000p-2, 0x1.ec6b838b02dfep-23), - (double2)(0x1.6859700000000p-2, 0x1.3ebf4dfbeda87p-23), - (double2)(0x1.6bdea00000000p-2, 0x1.9397aed9cb475p-23), - (double2)(0x1.6f61900000000p-2, 0x1.07937bc239c54p-24), - (double2)(0x1.72e2200000000p-2, 0x1.aa754553131b6p-23), - (double2)(0x1.7660700000000p-2, 0x1.4a05d407c45dcp-24), - (double2)(0x1.79dc600000000p-2, 0x1.132231a206dd0p-23), - (double2)(0x1.7d56000000000p-2, 0x1.2d8ecfdd69c88p-24), - (double2)(0x1.80cd400000000p-2, 0x1.a852c74218606p-24), - (double2)(0x1.8442200000000p-2, 0x1.71bf2baeebb50p-23), - (double2)(0x1.87b4b00000000p-2, 0x1.83d7db7491820p-27), - (double2)(0x1.8b24d00000000p-2, 0x1.ca50d92b6da14p-25), - (double2)(0x1.8e92900000000p-2, 0x1.6f5cde8530298p-26), - (double2)(0x1.91fde00000000p-2, 0x1.f343198910740p-24), - (double2)(0x1.9566d00000000p-2, 0x1.0e8d241ccd80ap-24), - (double2)(0x1.98cd500000000p-2, 0x1.1535ac619e6c8p-24), - (double2)(0x1.9c31600000000p-2, 0x1.7316041c36cd2p-24), - (double2)(0x1.9f93000000000p-2, 0x1.985a000637d8ep-24), - (double2)(0x1.a2f2300000000p-2, 0x1.f2f29858c0a68p-25), - (double2)(0x1.a64ee00000000p-2, 0x1.879847f96d909p-23), - (double2)(0x1.a9a9200000000p-2, 0x1.ab3d319e12e42p-23), - (double2)(0x1.ad00f00000000p-2, 0x1.5088162dfc4c2p-24), - (double2)(0x1.b056400000000p-2, 0x1.05749a1cd9d8cp-25), - (double2)(0x1.b3a9100000000p-2, 0x1.da65c6c6b8618p-26), - (double2)(0x1.b6f9600000000p-2, 0x1.739bf7df1ad64p-25), - (double2)(0x1.ba47300000000p-2, 0x1.bc31252aa3340p-25), - (double2)(0x1.bd92800000000p-2, 0x1.e528191ad3aa8p-26), - (double2)(0x1.c0db400000000p-2, 0x1.929d93df19f18p-23), - (double2)(0x1.c421900000000p-2, 0x1.ff11eb693a080p-26), - (double2)(0x1.c765500000000p-2, 0x1.55ae3f145a3a0p-27), - (double2)(0x1.caa6800000000p-2, 0x1.cbcd8c6c0ca82p-24), - (double2)(0x1.cde5300000000p-2, 0x1.0cb04d425d304p-24), - (double2)(0x1.d121500000000p-2, 0x1.9adfcab5be678p-24), - (double2)(0x1.d45ae00000000p-2, 0x1.93d90c5662508p-23), - (double2)(0x1.d791f00000000p-2, 0x1.68489bd35ff40p-24), - (double2)(0x1.dac6700000000p-2, 0x1.586ed3da2b7e0p-28), - (double2)(0x1.ddf8500000000p-2, 0x1.7604d2e850eeep-23), - (double2)(0x1.e127b00000000p-2, 0x1.ac1d12bfb53d8p-24), - (double2)(0x1.e454800000000p-2, 0x1.9b3d468274740p-28), - (double2)(0x1.e77eb00000000p-2, 0x1.fc5d68d10e53cp-24), - (double2)(0x1.eaa6500000000p-2, 0x1.8f9e51884becbp-23), - (double2)(0x1.edcb600000000p-2, 0x1.a87f0869c06d1p-23), - (double2)(0x1.f0ede00000000p-2, 0x1.31e7279f685fap-23), - (double2)(0x1.f40dd00000000p-2, 0x1.6a8282f9719b0p-27), - (double2)(0x1.f72b200000000p-2, 0x1.0d2724a8a44e0p-25), - (double2)(0x1.fa45d00000000p-2, 0x1.a60524b11ad4ep-23), - (double2)(0x1.fd5e000000000p-2, 0x1.75fdf832750f0p-26), - (double2)(0x1.0039c00000000p-1, 0x1.cf06902e4cd36p-23), - (double2)(0x1.01c3400000000p-1, 0x1.e82422d4f6d10p-25), - (double2)(0x1.034b700000000p-1, 0x1.24a091063e6c0p-26), - (double2)(0x1.04d2500000000p-1, 0x1.8a1a172dc6f38p-24), - (double2)(0x1.0657e00000000p-1, 0x1.29b6619f8a92dp-22), - (double2)(0x1.07dc300000000p-1, 0x1.9274d9c1b70c8p-24), - (double2)(0x1.095f300000000p-1, 0x1.0c34b1fbb7930p-26), - (double2)(0x1.0ae0e00000000p-1, 0x1.639866c20eb50p-25), - (double2)(0x1.0c61400000000p-1, 0x1.6d6d0f6832e9ep-23), - (double2)(0x1.0de0500000000p-1, 0x1.af54def99f25ep-22), - (double2)(0x1.0f5e200000000p-1, 0x1.16cfc52a00262p-22), - (double2)(0x1.10daa00000000p-1, 0x1.dcc1e83569c32p-23), - (double2)(0x1.1255d00000000p-1, 0x1.37f7a551ed425p-22), - (double2)(0x1.13cfb00000000p-1, 0x1.f6360adc98887p-22), - (double2)(0x1.1548500000000p-1, 0x1.2c6ec8d35a2c1p-22), - (double2)(0x1.16bfa00000000p-1, 0x1.bd44df84cb036p-23), - (double2)(0x1.1835a00000000p-1, 0x1.117cf826e310ep-22), - (double2)(0x1.19aa500000000p-1, 0x1.ca533f332cfc9p-22), - (double2)(0x1.1b1dc00000000p-1, 0x1.0f208509dbc2ep-22), - (double2)(0x1.1c8fe00000000p-1, 0x1.cd07d93c945dep-23), - (double2)(0x1.1e00b00000000p-1, 0x1.57bdfd67e6d72p-22), - (double2)(0x1.1f70400000000p-1, 0x1.aab89c516c658p-24), - (double2)(0x1.20de800000000p-1, 0x1.3e823b1a1b8a0p-25), - (double2)(0x1.224b700000000p-1, 0x1.307464a9d6d3cp-23), - (double2)(0x1.23b7100000000p-1, 0x1.c5993cd438843p-22), - (double2)(0x1.2521700000000p-1, 0x1.ba2fca02ab554p-22), - (double2)(0x1.268a900000000p-1, 0x1.01a5b6983a268p-23), - (double2)(0x1.27f2600000000p-1, 0x1.273d1b350efc8p-25), - (double2)(0x1.2958e00000000p-1, 0x1.64c238c37b0c6p-23), - (double2)(0x1.2abe200000000p-1, 0x1.aded07370a300p-25), - (double2)(0x1.2c22100000000p-1, 0x1.78091197eb47ep-23), - (double2)(0x1.2d84c00000000p-1, 0x1.4b0f245e0dabcp-24), - (double2)(0x1.2ee6200000000p-1, 0x1.080d9794e2eafp-22), - (double2)(0x1.3046400000000p-1, 0x1.d4ec242b60c76p-23), - (double2)(0x1.31a5200000000p-1, 0x1.221d2f940caa0p-27), - (double2)(0x1.3302b00000000p-1, 0x1.cdbc42b2bba5cp-24), - (double2)(0x1.345f000000000p-1, 0x1.cce37bb440840p-25), - (double2)(0x1.35ba000000000p-1, 0x1.6c1d999cf1dd0p-22), - (double2)(0x1.3713d00000000p-1, 0x1.bed8a07eb0870p-26), - (double2)(0x1.386c500000000p-1, 0x1.69ed88f490e3cp-24), - (double2)(0x1.39c3900000000p-1, 0x1.cd41719b73ef0p-25), - (double2)(0x1.3b19800000000p-1, 0x1.cbc4ac95b41b7p-22), - (double2)(0x1.3c6e400000000p-1, 0x1.238f1b890f5d7p-22), - (double2)(0x1.3dc1c00000000p-1, 0x1.50c4282259cc4p-24), - (double2)(0x1.3f13f00000000p-1, 0x1.713d2de87b3e2p-22), - (double2)(0x1.4064f00000000p-1, 0x1.1d5a7d2255276p-23), - (double2)(0x1.41b4a00000000p-1, 0x1.c0dfd48227ac1p-22), - (double2)(0x1.4303200000000p-1, 0x1.1c964dab76753p-22), - (double2)(0x1.4450600000000p-1, 0x1.6de56d5704496p-23), - (double2)(0x1.459c600000000p-1, 0x1.4aeb71fd19968p-23), - (double2)(0x1.46e7200000000p-1, 0x1.fbf91c57b1918p-23), - (double2)(0x1.4830a00000000p-1, 0x1.d6bef7fbe5d9ap-22), - (double2)(0x1.4978f00000000p-1, 0x1.464d3dc249066p-22), - (double2)(0x1.4ac0000000000p-1, 0x1.638e2ec4d9073p-22), - (double2)(0x1.4c05e00000000p-1, 0x1.16f4a7247ea7cp-24), - (double2)(0x1.4d4a800000000p-1, 0x1.1a0a740f1d440p-28), - (double2)(0x1.4e8de00000000p-1, 0x1.6edbb0114a33cp-23), - (double2)(0x1.4fd0100000000p-1, 0x1.dbee8bf1d513cp-24), - (double2)(0x1.5111000000000p-1, 0x1.5b8bdb0248f73p-22), - (double2)(0x1.5250c00000000p-1, 0x1.7de3d3f5eac64p-22), - (double2)(0x1.538f500000000p-1, 0x1.ee24187ae448ap-23), - (double2)(0x1.54cca00000000p-1, 0x1.e06c591ec5192p-22), - (double2)(0x1.5608d00000000p-1, 0x1.4e3861a332738p-24), - (double2)(0x1.5743c00000000p-1, 0x1.a9599dcc2bfe4p-24), - (double2)(0x1.587d800000000p-1, 0x1.f732fbad43468p-25), - (double2)(0x1.59b6000000000p-1, 0x1.eb9f573b727d9p-22), - (double2)(0x1.5aed600000000p-1, 0x1.8b212a2eb9897p-22), - (double2)(0x1.5c23900000000p-1, 0x1.384884c167215p-22), - (double2)(0x1.5d58900000000p-1, 0x1.0e2d363020051p-22), - (double2)(0x1.5e8c600000000p-1, 0x1.2820879fbd022p-22), - (double2)(0x1.5fbf000000000p-1, 0x1.a1ab9893e4b30p-22), - (double2)(0x1.60f0800000000p-1, 0x1.2d1b817a24478p-23), - (double2)(0x1.6220d00000000p-1, 0x1.15d7b8ded4878p-25), - (double2)(0x1.634ff00000000p-1, 0x1.8968f9db3a5e4p-24), - (double2)(0x1.647de00000000p-1, 0x1.71c4171fe135fp-22), - (double2)(0x1.65aab00000000p-1, 0x1.6d80f605d0d8cp-22), - (double2)(0x1.66d6600000000p-1, 0x1.c91f043691590p-24), - (double2)(0x1.6800e00000000p-1, 0x1.39f8a15fce2b2p-23), - (double2)(0x1.692a400000000p-1, 0x1.55beda9d94b80p-27), - (double2)(0x1.6a52700000000p-1, 0x1.b12c15d60949ap-23), - (double2)(0x1.6b79800000000p-1, 0x1.24167b312bfe3p-22), - (double2)(0x1.6c9f700000000p-1, 0x1.0ab8633070277p-22), - (double2)(0x1.6dc4400000000p-1, 0x1.54554ebbc80eep-23), - (double2)(0x1.6ee7f00000000p-1, 0x1.0204aef5a4bb8p-25), - (double2)(0x1.700a700000000p-1, 0x1.8af08c679cf2cp-22), - (double2)(0x1.712be00000000p-1, 0x1.0852a330ae6c8p-22), - (double2)(0x1.724c300000000p-1, 0x1.6d3eb9ec32916p-23), - (double2)(0x1.736b600000000p-1, 0x1.685cb7fcbbafep-23), - (double2)(0x1.7489700000000p-1, 0x1.1f751c1e0bd95p-22), - (double2)(0x1.75a6700000000p-1, 0x1.705b1b0f72560p-26), - (double2)(0x1.76c2400000000p-1, 0x1.b98d8d808ca92p-22), - (double2)(0x1.77dd100000000p-1, 0x1.2ea22c75cc980p-25), - (double2)(0x1.78f6b00000000p-1, 0x1.7aba62bca0350p-22), - (double2)(0x1.7a0f400000000p-1, 0x1.d73833442278cp-22), - (double2)(0x1.7b26c00000000p-1, 0x1.5a5ca1fb18bf9p-22), - (double2)(0x1.7c3d300000000p-1, 0x1.1a6092b6ecf28p-25), - (double2)(0x1.7d52800000000p-1, 0x1.44fd049aac104p-24), - (double2)(0x1.7e66c00000000p-1, 0x1.c114fd8df5180p-29), - (double2)(0x1.7f79e00000000p-1, 0x1.5972f130feae5p-22), - (double2)(0x1.808c000000000p-1, 0x1.ca034a55fe198p-24), - (double2)(0x1.819d000000000p-1, 0x1.6e2b149990227p-22), - (double2)(0x1.82ad000000000p-1, 0x1.b00000294592cp-24), - (double2)(0x1.83bbe00000000p-1, 0x1.8b9bdc442620ep-22), - (double2)(0x1.84c9c00000000p-1, 0x1.d94fdfabf3e4ep-23), - (double2)(0x1.85d6900000000p-1, 0x1.5db30b145ad9ap-23), - (double2)(0x1.86e2500000000p-1, 0x1.e3e1eb95022b0p-23), - (double2)(0x1.87ed000000000p-1, 0x1.d5b8b45442bd6p-22), - (double2)(0x1.88f6b00000000p-1, 0x1.7a046231ecd2ep-22), - (double2)(0x1.89ff500000000p-1, 0x1.feafe3ef55232p-22), - (double2)(0x1.8b06f00000000p-1, 0x1.839e7bfd78267p-22), - (double2)(0x1.8c0d900000000p-1, 0x1.45cf49d6fa900p-25), - (double2)(0x1.8d13200000000p-1, 0x1.be3132b27f380p-27), - (double2)(0x1.8e17a00000000p-1, 0x1.533980bb84f9fp-22), - (double2)(0x1.8f1b300000000p-1, 0x1.889e2ce3ba390p-26), - (double2)(0x1.901db00000000p-1, 0x1.f7778c3ad0cc8p-24), - (double2)(0x1.911f300000000p-1, 0x1.46660cec4eba2p-23), - (double2)(0x1.921fb00000000p-1, 0x1.5110b4611a626p-23), -}; - DECLARE_TABLE(double2, TWO_TO_JBY64_EP, 64) = { (double2)(0x1.0000000000000p+0, 0x0.0000000000000p+0), (double2)(0x1.02c9a30000000p+0, 0x1.cef00c1dcdef9p-25), @@ -2197,7 +1945,6 @@ DECLARE_TABLE(double2, LOG_F_INV_TBL, 258) = { (double2)(0x1.0000000000000p+0, 0x0.0000000000000p+0), }; -TABLE_FUNCTION(double2, ATAN_JBY256_TBL, atan_jby256_tbl); TABLE_FUNCTION(double2, TWO_TO_JBY64_EP, two_to_jby64_ep_tbl); TABLE_FUNCTION(double2, SINH_TBL, sinh_tbl); TABLE_FUNCTION(double2, COSH_TBL, cosh_tbl); _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits